Add LLM Wiki Agent — persistent LLM-maintained knowledge base

Replaces dual-agent demo with a full personal knowledge base system where Claude reads source documents and incrementally builds and maintains a structured, interlinked wiki of markdown pages. - tools/ingest.py: reads a source, extracts knowledge, updates wiki pages - tools/query.py: queries the wiki with Claude, optionally files answers back - tools/lint.py: health-checks the wiki (orphans, contradictions, gaps) - tools/build_graph.py: two-pass graph builder (wikilinks + Claude inference) with Louvain community detection and vis.js interactive HTML output - CLAUDE.md: schema and workflow instructions for the LLM - wiki/: starter index, log, and overview pages - raw/, graph/: directory scaffolding Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-07 07:04:22 +05:30
parent b5ab57bc30
commit d12089aaaf
12 changed files with 1304 additions and 70 deletions
--- a/tools/query.py
+++ b/tools/query.py
@@ -0,0 +1,177 @@
+#!/usr/bin/env python3
+"""
+Query the LLM Wiki.
+
+Usage:
+    python tools/query.py "What are the main themes across all sources?"
+    python tools/query.py "How does ConceptA relate to ConceptB?" --save
+    python tools/query.py "Summarize everything about EntityName" --save synthesis/my-analysis.md
+
+Flags:
+    --save              Save the answer back into the wiki (prompts for filename)
+    --save <path>       Save to a specific wiki path
+"""
+
+import sys
+import re
+import json
+import argparse
+from pathlib import Path
+from datetime import date
+
+import anthropic
+
+REPO_ROOT = Path(__file__).parent.parent
+WIKI_DIR = REPO_ROOT / "wiki"
+INDEX_FILE = WIKI_DIR / "index.md"
+LOG_FILE = WIKI_DIR / "log.md"
+SCHEMA_FILE = REPO_ROOT / "CLAUDE.md"
+
+
+def read_file(path: Path) -> str:
+    return path.read_text(encoding="utf-8") if path.exists() else ""
+
+
+def write_file(path: Path, content: str):
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(content, encoding="utf-8")
+    print(f"  saved: {path.relative_to(REPO_ROOT)}")
+
+
+def find_relevant_pages(question: str, index_content: str) -> list[Path]:
+    """Extract linked pages from index that seem relevant to the question."""
+    # Pull all [[links]] and markdown links from index
+    md_links = re.findall(r'\[([^\]]+)\]\(([^)]+)\)', index_content)
+    # Simple keyword match: check if any word in the title appears in the question
+    question_lower = question.lower()
+    relevant = []
+    for title, href in md_links:
+        if any(word in question_lower for word in title.lower().split() if len(word) > 3):
+            p = WIKI_DIR / href
+            if p.exists():
+                relevant.append(p)
+    # Always include overview
+    overview = WIKI_DIR / "overview.md"
+    if overview.exists() and overview not in relevant:
+        relevant.insert(0, overview)
+    return relevant[:12]  # cap to avoid context overflow
+
+
+def append_log(entry: str):
+    existing = read_file(LOG_FILE)
+    LOG_FILE.write_text(entry.strip() + "\n\n" + existing, encoding="utf-8")
+
+
+def query(question: str, save_path: str | None = None):
+    today = date.today().isoformat()
+    client = anthropic.Anthropic()
+
+    # Step 1: Read index
+    index_content = read_file(INDEX_FILE)
+    if not index_content:
+        print("Wiki is empty. Ingest some sources first with: python tools/ingest.py <source>")
+        sys.exit(1)
+
+    # Step 2: Find relevant pages
+    relevant_pages = find_relevant_pages(question, index_content)
+
+    # If no keyword match, ask Claude to identify relevant pages from the index
+    if not relevant_pages or len(relevant_pages) <= 1:
+        print("  selecting relevant pages via Claude...")
+        selection_response = client.messages.create(
+            model="claude-haiku-4-5-20251001",
+            max_tokens=512,
+            messages=[{
+                "role": "user",
+                "content": f"Given this wiki index:\n\n{index_content}\n\nWhich pages are most relevant to answering: \"{question}\"\n\nReturn ONLY a JSON array of relative file paths (as listed in the index), e.g. [\"sources/foo.md\", \"concepts/Bar.md\"]. Maximum 10 pages."
+            }]
+        )
+        raw = selection_response.content[0].text.strip()
+        raw = re.sub(r"^```(?:json)?\s*", "", raw)
+        raw = re.sub(r"\s*```$", "", raw)
+        try:
+            paths = json.loads(raw)
+            relevant_pages = [WIKI_DIR / p for p in paths if (WIKI_DIR / p).exists()]
+        except (json.JSONDecodeError, TypeError):
+            pass
+
+    # Step 3: Read relevant pages
+    pages_context = ""
+    for p in relevant_pages:
+        rel = p.relative_to(REPO_ROOT)
+        pages_context += f"\n\n### {rel}\n{p.read_text(encoding='utf-8')}"
+
+    if not pages_context:
+        pages_context = f"\n\n### wiki/index.md\n{index_content}"
+
+    schema = read_file(SCHEMA_FILE)
+
+    # Step 4: Synthesize answer
+    print(f"  synthesizing answer from {len(relevant_pages)} pages...")
+    response = client.messages.create(
+        model="claude-sonnet-4-6",
+        max_tokens=4096,
+        messages=[{
+            "role": "user",
+            "content": f"""You are querying an LLM Wiki to answer a question. Use the wiki pages below to synthesize a thorough answer. Cite sources using [[PageName]] wikilink syntax.
+
+Schema:
+{schema}
+
+Wiki pages:
+{pages_context}
+
+Question: {question}
+
+Write a well-structured markdown answer with headers, bullets, and [[wikilink]] citations. At the end, add a ## Sources section listing the pages you drew from.
+"""
+        }]
+    )
+
+    answer = response.content[0].text
+    print("\n" + "=" * 60)
+    print(answer)
+    print("=" * 60)
+
+    # Step 5: Optionally save answer
+    if save_path is not None:
+        if save_path == "":
+            # Prompt for filename
+            slug = input("\nSave as (slug, e.g. 'my-analysis'): ").strip()
+            if not slug:
+                print("Skipping save.")
+                return
+            save_path = f"syntheses/{slug}.md"
+
+        full_save_path = WIKI_DIR / save_path
+        frontmatter = f"""---
+title: "{question[:80]}"
+type: synthesis
+tags: []
+sources: []
+last_updated: {today}
+---
+
+"""
+        write_file(full_save_path, frontmatter + answer)
+
+        # Update index
+        index_content = read_file(INDEX_FILE)
+        entry = f"- [{question[:60]}]({save_path}) — synthesis"
+        if "## Syntheses" in index_content:
+            index_content = index_content.replace("## Syntheses\n", f"## Syntheses\n{entry}\n")
+            INDEX_FILE.write_text(index_content, encoding="utf-8")
+        print(f"  indexed: {save_path}")
+
+    # Append to log
+    append_log(f"## [{today}] query | {question[:80]}\n\nSynthesized answer from {len(relevant_pages)} pages." +
+               (f" Saved to {save_path}." if save_path else ""))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Query the LLM Wiki")
+    parser.add_argument("question", help="Question to ask the wiki")
+    parser.add_argument("--save", nargs="?", const="", default=None,
+                        help="Save answer to wiki (optionally specify path)")
+    args = parser.parse_args()
+    query(args.question, args.save)