- Introduced tools/heal.py to automatically dynamically identify and build missing structural concepts and entities by tracing contextual usages using litellm. - Add docs/automated-sync.md with cron/launchd orchestration best-practices. - Closes Issue #16 on Graph Integrity Constraints.
101 lines
3.1 KiB
Python
Executable File
101 lines
3.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Graph Self-Healing Tool
|
|
|
|
Automatically retrieves "Missing Entity Pages" from the wiki and generates
|
|
comprehensive definition pages for them using the LLM.
|
|
It resolves broken entity links by scanning existing contexts where the entity is referenced.
|
|
|
|
Usage:
|
|
python tools/heal.py
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
try:
|
|
from litellm import completion
|
|
except ImportError:
|
|
print("Error: litellm not installed. Run: pip install litellm")
|
|
sys.exit(1)
|
|
|
|
# Ensure tools can be imported
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from tools.lint import find_missing_entities, all_wiki_pages
|
|
|
|
REPO_ROOT = Path(__file__).parent.parent
|
|
WIKI_DIR = REPO_ROOT / "wiki"
|
|
ENTITIES_DIR = WIKI_DIR / "entities"
|
|
|
|
def call_llm(prompt: str, max_tokens: int = 1500) -> str:
|
|
# Use litellm standard environment variables
|
|
# e.g., GEMINI_API_KEY, ANTHROPIC_API_KEY, OPENAI_API_KEY
|
|
model = os.getenv("LLM_MODEL", "claude-3-5-haiku-latest") # default to fast model
|
|
|
|
response = completion(
|
|
model=model,
|
|
messages=[{"role": "user", "content": prompt}],
|
|
max_tokens=max_tokens
|
|
)
|
|
return response.choices[0].message.content
|
|
|
|
def search_sources(entity: str, pages: list[Path]) -> list[Path]:
|
|
"""Find up to 15 pages where this entity is mentioned natively."""
|
|
sources = []
|
|
for p in pages:
|
|
if "entities" not in str(p.parent) and "concepts" not in str(p.parent):
|
|
content = p.read_text(encoding="utf-8")
|
|
if entity.lower() in content.lower():
|
|
sources.append(p)
|
|
return sources[:15]
|
|
|
|
def heal_missing_entities():
|
|
pages = all_wiki_pages()
|
|
missing_entities = find_missing_entities(pages)
|
|
|
|
if not missing_entities:
|
|
print("Graph is fully connected. No missing entities found!")
|
|
return
|
|
|
|
ENTITIES_DIR.mkdir(exist_ok=True, parents=True)
|
|
print(f"Found {len(missing_entities)} missing entity nodes. Commencing auto-heal...")
|
|
|
|
for entity in missing_entities:
|
|
print(f"Healing entity page for: {entity}")
|
|
sources = search_sources(entity, pages)
|
|
|
|
context = ""
|
|
for s in sources:
|
|
context += f"\n\n### {s.name}\n{s.read_text(encoding='utf-8')[:800]}"
|
|
|
|
prompt = f"""You are filling a data gap in the Personal LLM Wiki.
|
|
Create an Entity definition page for "{entity}".
|
|
|
|
Here is how the entity appears in the current sources:
|
|
{context}
|
|
|
|
Format:
|
|
---
|
|
title: "{entity}"
|
|
type: entity
|
|
tags: []
|
|
sources: {[s.name for s in sources]}
|
|
---
|
|
|
|
# {entity}
|
|
|
|
Write a comprehensive paragraph defining what `{entity}` means in the context of this wiki, its main significance, and any actions or associations related to it.
|
|
"""
|
|
try:
|
|
result = call_llm(prompt)
|
|
out_path = ENTITIES_DIR / f"{entity}.md"
|
|
out_path.write_text(result, encoding="utf-8")
|
|
print(f" -> Saved to {out_path.relative_to(REPO_ROOT)}")
|
|
except Exception as e:
|
|
print(f" [!] Failed to generate {entity}: {e}")
|
|
|
|
if __name__ == "__main__":
|
|
heal_missing_entities()
|