Add LLM Wiki Agent — persistent LLM-maintained knowledge base

Replaces dual-agent demo with a full personal knowledge base system
where Claude reads source documents and incrementally builds and
maintains a structured, interlinked wiki of markdown pages.

- tools/ingest.py: reads a source, extracts knowledge, updates wiki pages
- tools/query.py: queries the wiki with Claude, optionally files answers back
- tools/lint.py: health-checks the wiki (orphans, contradictions, gaps)
- tools/build_graph.py: two-pass graph builder (wikilinks + Claude inference)
  with Louvain community detection and vis.js interactive HTML output
- CLAUDE.md: schema and workflow instructions for the LLM
- wiki/: starter index, log, and overview pages
- raw/, graph/: directory scaffolding

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Anil Matcha
2026-04-07 07:04:22 +05:30
parent b5ab57bc30
commit d12089aaaf
12 changed files with 1304 additions and 70 deletions

422
tools/build_graph.py Normal file
View File

@@ -0,0 +1,422 @@
#!/usr/bin/env python3
"""
Build the knowledge graph from the wiki.
Usage:
python tools/build_graph.py # full rebuild
python tools/build_graph.py --no-infer # skip semantic inference (faster)
python tools/build_graph.py --open # open graph.html in browser after build
Outputs:
graph/graph.json — node/edge data (cached by SHA256)
graph/graph.html — interactive vis.js visualization
Edge types:
EXTRACTED — explicit [[wikilink]] in a page
INFERRED — Claude-detected implicit relationship
AMBIGUOUS — low-confidence inferred relationship
"""
import re
import json
import hashlib
import argparse
import webbrowser
from pathlib import Path
from datetime import date
import anthropic
try:
import networkx as nx
from networkx.algorithms import community as nx_community
HAS_NETWORKX = True
except ImportError:
HAS_NETWORKX = False
print("Warning: networkx not installed. Community detection disabled. Run: pip install networkx")
REPO_ROOT = Path(__file__).parent.parent
WIKI_DIR = REPO_ROOT / "wiki"
GRAPH_DIR = REPO_ROOT / "graph"
GRAPH_JSON = GRAPH_DIR / "graph.json"
GRAPH_HTML = GRAPH_DIR / "graph.html"
CACHE_FILE = GRAPH_DIR / ".cache.json"
LOG_FILE = WIKI_DIR / "log.md"
SCHEMA_FILE = REPO_ROOT / "CLAUDE.md"
# Node type → color mapping
TYPE_COLORS = {
"source": "#4CAF50",
"entity": "#2196F3",
"concept": "#FF9800",
"synthesis": "#9C27B0",
"unknown": "#9E9E9E",
}
EDGE_COLORS = {
"EXTRACTED": "#555555",
"INFERRED": "#FF5722",
"AMBIGUOUS": "#BDBDBD",
}
def read_file(path: Path) -> str:
return path.read_text(encoding="utf-8") if path.exists() else ""
def sha256(text: str) -> str:
return hashlib.sha256(text.encode()).hexdigest()
def all_wiki_pages() -> list[Path]:
return [p for p in WIKI_DIR.rglob("*.md")
if p.name not in ("index.md", "log.md", "lint-report.md")]
def extract_wikilinks(content: str) -> list[str]:
return list(set(re.findall(r'\[\[([^\]]+)\]\]', content)))
def extract_frontmatter_type(content: str) -> str:
match = re.search(r'^type:\s*(\S+)', content, re.MULTILINE)
return match.group(1).strip('"\'') if match else "unknown"
def page_id(path: Path) -> str:
return path.relative_to(WIKI_DIR).as_posix().replace(".md", "")
def load_cache() -> dict:
if CACHE_FILE.exists():
try:
return json.loads(CACHE_FILE.read_text())
except (json.JSONDecodeError, IOError):
return {}
return {}
def save_cache(cache: dict):
GRAPH_DIR.mkdir(parents=True, exist_ok=True)
CACHE_FILE.write_text(json.dumps(cache, indent=2))
def build_nodes(pages: list[Path]) -> list[dict]:
nodes = []
for p in pages:
content = read_file(p)
node_type = extract_frontmatter_type(content)
title_match = re.search(r'^title:\s*"?([^"\n]+)"?', content, re.MULTILINE)
label = title_match.group(1).strip() if title_match else p.stem
nodes.append({
"id": page_id(p),
"label": label,
"type": node_type,
"color": TYPE_COLORS.get(node_type, TYPE_COLORS["unknown"]),
"path": str(p.relative_to(REPO_ROOT)),
})
return nodes
def build_extracted_edges(pages: list[Path]) -> list[dict]:
"""Pass 1: deterministic wikilink edges."""
# Build a map from stem (lower) -> page_id for resolution
stem_map = {p.stem.lower(): page_id(p) for p in pages}
edges = []
seen = set()
for p in pages:
content = read_file(p)
src = page_id(p)
for link in extract_wikilinks(content):
target = stem_map.get(link.lower())
if target and target != src:
key = (src, target)
if key not in seen:
seen.add(key)
edges.append({
"from": src,
"to": target,
"type": "EXTRACTED",
"color": EDGE_COLORS["EXTRACTED"],
"confidence": 1.0,
})
return edges
def build_inferred_edges(pages: list[Path], existing_edges: list[dict], cache: dict) -> list[dict]:
"""Pass 2: Claude-inferred semantic relationships."""
client = anthropic.Anthropic()
new_edges = []
# Only process pages that changed since last run
changed_pages = []
for p in pages:
content = read_file(p)
h = sha256(content)
if cache.get(str(p)) != h:
changed_pages.append(p)
cache[str(p)] = h
if not changed_pages:
print(" no changed pages — skipping semantic inference")
return []
print(f" inferring relationships for {len(changed_pages)} changed pages...")
# Build a summary of existing nodes for context
node_list = "\n".join(f"- {page_id(p)} ({extract_frontmatter_type(read_file(p))})" for p in pages)
existing_edge_summary = "\n".join(
f"- {e['from']}{e['to']} (EXTRACTED)" for e in existing_edges[:30]
)
for p in changed_pages:
content = read_file(p)[:2000] # truncate for context efficiency
src = page_id(p)
response = client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=1024,
messages=[{
"role": "user",
"content": f"""Analyze this wiki page and identify implicit semantic relationships to other pages in the wiki.
Source page: {src}
Content:
{content}
All available pages:
{node_list}
Already-extracted edges from this page:
{existing_edge_summary}
Return ONLY a JSON array of NEW relationships not already captured by explicit wikilinks:
[
{{"to": "page-id", "relationship": "one-line description", "confidence": 0.0-1.0, "type": "INFERRED or AMBIGUOUS"}}
]
Rules:
- Only include pages from the available list above
- Confidence >= 0.7 → INFERRED, < 0.7 → AMBIGUOUS
- Do not repeat edges already in the extracted list
- Return empty array [] if no new relationships found
"""
}]
)
raw = response.content[0].text.strip()
raw = re.sub(r"^```(?:json)?\s*", "", raw)
raw = re.sub(r"\s*```$", "", raw)
try:
inferred = json.loads(raw)
for rel in inferred:
if isinstance(rel, dict) and "to" in rel:
new_edges.append({
"from": src,
"to": rel["to"],
"type": rel.get("type", "INFERRED"),
"label": rel.get("relationship", ""),
"color": EDGE_COLORS.get(rel.get("type", "INFERRED"), EDGE_COLORS["INFERRED"]),
"confidence": rel.get("confidence", 0.7),
})
except (json.JSONDecodeError, TypeError):
pass
return new_edges
def detect_communities(nodes: list[dict], edges: list[dict]) -> dict[str, int]:
"""Assign community IDs to nodes using Louvain algorithm."""
if not HAS_NETWORKX:
return {}
G = nx.Graph()
for n in nodes:
G.add_node(n["id"])
for e in edges:
G.add_edge(e["from"], e["to"])
if G.number_of_edges() == 0:
return {}
try:
communities = nx_community.louvain_communities(G, seed=42)
node_to_community = {}
for i, comm in enumerate(communities):
for node in comm:
node_to_community[node] = i
return node_to_community
except Exception:
return {}
COMMUNITY_COLORS = [
"#E91E63", "#00BCD4", "#8BC34A", "#FF5722", "#673AB7",
"#FFC107", "#009688", "#F44336", "#3F51B5", "#CDDC39",
]
def render_html(nodes: list[dict], edges: list[dict]) -> str:
"""Generate self-contained vis.js HTML."""
nodes_json = json.dumps(nodes, indent=2)
edges_json = json.dumps(edges, indent=2)
legend_items = "".join(
f'<span style="background:{color};padding:3px 8px;margin:2px;border-radius:3px;font-size:12px">{t}</span>'
for t, color in TYPE_COLORS.items() if t != "unknown"
)
return f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>LLM Wiki — Knowledge Graph</title>
<script src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
<style>
body {{ margin: 0; background: #1a1a2e; font-family: sans-serif; color: #eee; }}
#graph {{ width: 100vw; height: 100vh; }}
#controls {{
position: fixed; top: 10px; left: 10px; background: rgba(0,0,0,0.7);
padding: 12px; border-radius: 8px; z-index: 10; max-width: 260px;
}}
#controls h3 {{ margin: 0 0 8px; font-size: 14px; }}
#search {{ width: 100%; padding: 4px; margin-bottom: 8px; background: #333; color: #eee; border: 1px solid #555; border-radius: 4px; }}
#info {{
position: fixed; bottom: 10px; left: 10px; background: rgba(0,0,0,0.8);
padding: 12px; border-radius: 8px; z-index: 10; max-width: 320px;
display: none;
}}
#stats {{ position: fixed; top: 10px; right: 10px; background: rgba(0,0,0,0.7); padding: 10px; border-radius: 8px; font-size: 12px; }}
</style>
</head>
<body>
<div id="controls">
<h3>LLM Wiki Graph</h3>
<input id="search" type="text" placeholder="Search nodes..." oninput="searchNodes(this.value)">
<div>{legend_items}</div>
<div style="margin-top:8px;font-size:11px;color:#aaa">
<span style="background:#555;padding:2px 6px;border-radius:3px;margin-right:4px">──</span> Explicit link<br>
<span style="background:#FF5722;padding:2px 6px;border-radius:3px;margin-right:4px">──</span> Inferred
</div>
</div>
<div id="graph"></div>
<div id="info">
<b id="info-title"></b><br>
<span id="info-type" style="font-size:12px;color:#aaa"></span><br>
<span id="info-path" style="font-size:11px;color:#666"></span>
</div>
<div id="stats"></div>
<script>
const nodes = new vis.DataSet({nodes_json});
const edges = new vis.DataSet({edges_json});
const container = document.getElementById("graph");
const network = new vis.Network(container, {{ nodes, edges }}, {{
nodes: {{
shape: "dot",
size: 12,
font: {{ color: "#eee", size: 13 }},
borderWidth: 2,
}},
edges: {{
width: 1.2,
smooth: {{ type: "continuous" }},
arrows: {{ to: {{ enabled: true, scaleFactor: 0.5 }} }},
}},
physics: {{
stabilization: {{ iterations: 150 }},
barnesHut: {{ gravitationalConstant: -8000, springLength: 120 }},
}},
interaction: {{ hover: true, tooltipDelay: 200 }},
}});
network.on("click", params => {{
if (params.nodes.length > 0) {{
const node = nodes.get(params.nodes[0]);
document.getElementById("info").style.display = "block";
document.getElementById("info-title").textContent = node.label;
document.getElementById("info-type").textContent = node.type;
document.getElementById("info-path").textContent = node.path;
}} else {{
document.getElementById("info").style.display = "none";
}}
}});
document.getElementById("stats").textContent =
`${{nodes.length}} nodes · ${{edges.length}} edges`;
function searchNodes(q) {{
const lower = q.toLowerCase();
nodes.forEach(n => {{
nodes.update({{ id: n.id, opacity: (!q || n.label.toLowerCase().includes(lower)) ? 1 : 0.15 }});
}});
}}
</script>
</body>
</html>"""
def append_log(entry: str):
log_path = WIKI_DIR / "log.md"
existing = read_file(log_path)
log_path.write_text(entry.strip() + "\n\n" + existing, encoding="utf-8")
def build_graph(infer: bool = True, open_browser: bool = False):
pages = all_wiki_pages()
today = date.today().isoformat()
if not pages:
print("Wiki is empty. Ingest some sources first.")
return
print(f"Building graph from {len(pages)} wiki pages...")
GRAPH_DIR.mkdir(parents=True, exist_ok=True)
cache = load_cache()
# Pass 1: extracted edges
print(" Pass 1: extracting wikilinks...")
nodes = build_nodes(pages)
edges = build_extracted_edges(pages)
print(f"{len(edges)} extracted edges")
# Pass 2: inferred edges
if infer:
print(" Pass 2: inferring semantic relationships...")
inferred = build_inferred_edges(pages, edges, cache)
edges.extend(inferred)
print(f"{len(inferred)} inferred edges")
save_cache(cache)
# Community detection
print(" Running Louvain community detection...")
communities = detect_communities(nodes, edges)
for node in nodes:
comm_id = communities.get(node["id"], -1)
if comm_id >= 0:
node["color"] = COMMUNITY_COLORS[comm_id % len(COMMUNITY_COLORS)]
node["group"] = comm_id
# Save graph.json
graph_data = {"nodes": nodes, "edges": edges, "built": today}
GRAPH_JSON.write_text(json.dumps(graph_data, indent=2))
print(f" saved: graph/graph.json ({len(nodes)} nodes, {len(edges)} edges)")
# Save graph.html
html = render_html(nodes, edges)
GRAPH_HTML.write_text(html)
print(f" saved: graph/graph.html")
append_log(f"## [{today}] graph | Knowledge graph rebuilt\n\n{len(nodes)} nodes, {len(edges)} edges ({len([e for e in edges if e['type']=='EXTRACTED'])} extracted, {len([e for e in edges if e['type']=='INFERRED'])} inferred).")
if open_browser:
webbrowser.open(f"file://{GRAPH_HTML.resolve()}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Build LLM Wiki knowledge graph")
parser.add_argument("--no-infer", action="store_true", help="Skip semantic inference (faster)")
parser.add_argument("--open", action="store_true", help="Open graph.html in browser")
args = parser.parse_args()
build_graph(infer=not args.no_infer, open_browser=args.open)

195
tools/ingest.py Normal file
View File

@@ -0,0 +1,195 @@
#!/usr/bin/env python3
"""
Ingest a source document into the LLM Wiki.
Usage:
python tools/ingest.py <path-to-source>
python tools/ingest.py raw/articles/my-article.md
The LLM reads the source, extracts knowledge, and updates the wiki:
- Creates wiki/sources/<slug>.md
- Updates wiki/index.md
- Updates wiki/overview.md (if warranted)
- Creates/updates entity and concept pages
- Appends to wiki/log.md
- Flags contradictions
"""
import os
import sys
import json
import hashlib
import re
from pathlib import Path
from datetime import date
import anthropic
REPO_ROOT = Path(__file__).parent.parent
WIKI_DIR = REPO_ROOT / "wiki"
LOG_FILE = WIKI_DIR / "log.md"
INDEX_FILE = WIKI_DIR / "index.md"
OVERVIEW_FILE = WIKI_DIR / "overview.md"
SCHEMA_FILE = REPO_ROOT / "CLAUDE.md"
def sha256(text: str) -> str:
return hashlib.sha256(text.encode()).hexdigest()[:16]
def read_file(path: Path) -> str:
return path.read_text(encoding="utf-8") if path.exists() else ""
def write_file(path: Path, content: str):
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(content, encoding="utf-8")
print(f" wrote: {path.relative_to(REPO_ROOT)}")
def build_wiki_context() -> str:
parts = []
if INDEX_FILE.exists():
parts.append(f"## wiki/index.md\n{read_file(INDEX_FILE)}")
if OVERVIEW_FILE.exists():
parts.append(f"## wiki/overview.md\n{read_file(OVERVIEW_FILE)}")
# Include a few recent source pages for contradiction checking
sources_dir = WIKI_DIR / "sources"
if sources_dir.exists():
recent = sorted(sources_dir.glob("*.md"), key=lambda p: p.stat().st_mtime, reverse=True)[:5]
for p in recent:
parts.append(f"## {p.relative_to(REPO_ROOT)}\n{p.read_text()}")
return "\n\n---\n\n".join(parts)
def parse_json_from_response(text: str) -> dict:
# Strip markdown code fences if present
text = re.sub(r"^```(?:json)?\s*", "", text.strip())
text = re.sub(r"\s*```$", "", text.strip())
# Find the outermost JSON object
match = re.search(r"\{[\s\S]*\}", text)
if not match:
raise ValueError("No JSON object found in response")
return json.loads(match.group())
def update_index(new_entry: str, section: str = "Sources"):
content = read_file(INDEX_FILE)
if not content:
content = "# Wiki Index\n\n## Overview\n- [Overview](overview.md) — living synthesis\n\n## Sources\n\n## Entities\n\n## Concepts\n\n## Syntheses\n"
section_header = f"## {section}"
if section_header in content:
content = content.replace(section_header + "\n", section_header + "\n" + new_entry + "\n")
else:
content += f"\n{section_header}\n{new_entry}\n"
write_file(INDEX_FILE, content)
def append_log(entry: str):
existing = read_file(LOG_FILE)
write_file(LOG_FILE, entry.strip() + "\n\n" + existing)
def ingest(source_path: str):
source = Path(source_path)
if not source.exists():
print(f"Error: file not found: {source_path}")
sys.exit(1)
source_content = source.read_text(encoding="utf-8")
source_hash = sha256(source_content)
today = date.today().isoformat()
print(f"\nIngesting: {source.name} (hash: {source_hash})")
wiki_context = build_wiki_context()
schema = read_file(SCHEMA_FILE)
client = anthropic.Anthropic()
prompt = f"""You are maintaining an LLM Wiki. Process this source document and integrate its knowledge into the wiki.
Schema and conventions:
{schema}
Current wiki state (index + recent pages):
{wiki_context if wiki_context else "(wiki is empty — this is the first source)"}
New source to ingest (file: {source.relative_to(REPO_ROOT) if source.is_relative_to(REPO_ROOT) else source.name}):
=== SOURCE START ===
{source_content}
=== SOURCE END ===
Today's date: {today}
Return ONLY a valid JSON object with these fields (no markdown fences, no prose outside the JSON):
{{
"title": "Human-readable title for this source",
"slug": "kebab-case-slug-for-filename",
"source_page": "full markdown content for wiki/sources/<slug>.md — use the source page format from the schema",
"index_entry": "- [Title](sources/slug.md) — one-line summary",
"overview_update": "full updated content for wiki/overview.md, or null if no update needed",
"entity_pages": [
{{"path": "entities/EntityName.md", "content": "full markdown content"}}
],
"concept_pages": [
{{"path": "concepts/ConceptName.md", "content": "full markdown content"}}
],
"contradictions": ["describe any contradiction with existing wiki content, or empty list"],
"log_entry": "## [{today}] ingest | <title>\\n\\nAdded source. Key claims: ..."
}}
"""
print(" calling Claude API...")
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=8192,
messages=[{"role": "user", "content": prompt}],
)
raw = response.content[0].text
try:
data = parse_json_from_response(raw)
except (ValueError, json.JSONDecodeError) as e:
print(f"Error parsing API response: {e}")
print("Raw response saved to /tmp/ingest_debug.txt")
Path("/tmp/ingest_debug.txt").write_text(raw)
sys.exit(1)
# Write source page
slug = data["slug"]
write_file(WIKI_DIR / "sources" / f"{slug}.md", data["source_page"])
# Write entity pages
for page in data.get("entity_pages", []):
write_file(WIKI_DIR / page["path"], page["content"])
# Write concept pages
for page in data.get("concept_pages", []):
write_file(WIKI_DIR / page["path"], page["content"])
# Update overview
if data.get("overview_update"):
write_file(OVERVIEW_FILE, data["overview_update"])
# Update index
update_index(data["index_entry"], section="Sources")
# Append log
append_log(data["log_entry"])
# Report contradictions
contradictions = data.get("contradictions", [])
if contradictions:
print("\n ⚠️ Contradictions detected:")
for c in contradictions:
print(f" - {c}")
print(f"\nDone. Ingested: {data['title']}")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python tools/ingest.py <path-to-source>")
sys.exit(1)
ingest(sys.argv[1])

203
tools/lint.py Normal file
View File

@@ -0,0 +1,203 @@
#!/usr/bin/env python3
"""
Lint the LLM Wiki for health issues.
Usage:
python tools/lint.py
python tools/lint.py --save # save lint report to wiki/lint-report.md
Checks:
- Orphan pages (no inbound wikilinks from other pages)
- Broken wikilinks (pointing to pages that don't exist)
- Missing entity pages (entities mentioned in 3+ pages but no page)
- Contradictions between pages
- Data gaps and suggested new sources
"""
import re
import sys
import argparse
from pathlib import Path
from collections import defaultdict
from datetime import date
import anthropic
REPO_ROOT = Path(__file__).parent.parent
WIKI_DIR = REPO_ROOT / "wiki"
LOG_FILE = WIKI_DIR / "log.md"
SCHEMA_FILE = REPO_ROOT / "CLAUDE.md"
def read_file(path: Path) -> str:
return path.read_text(encoding="utf-8") if path.exists() else ""
def all_wiki_pages() -> list[Path]:
return [p for p in WIKI_DIR.rglob("*.md")
if p.name not in ("index.md", "log.md", "lint-report.md")]
def extract_wikilinks(content: str) -> list[str]:
return re.findall(r'\[\[([^\]]+)\]\]', content)
def page_name_to_path(name: str) -> list[Path]:
"""Try to resolve a [[WikiLink]] to a file path."""
candidates = []
for p in all_wiki_pages():
if p.stem.lower() == name.lower() or p.stem == name:
candidates.append(p)
return candidates
def find_orphans(pages: list[Path]) -> list[Path]:
inbound = defaultdict(int)
for p in pages:
content = read_file(p)
for link in extract_wikilinks(content):
resolved = page_name_to_path(link)
for r in resolved:
inbound[r] += 1
return [p for p in pages if inbound[p] == 0 and p != WIKI_DIR / "overview.md"]
def find_broken_links(pages: list[Path]) -> list[tuple[Path, str]]:
broken = []
for p in pages:
content = read_file(p)
for link in extract_wikilinks(content):
if not page_name_to_path(link):
broken.append((p, link))
return broken
def find_missing_entities(pages: list[Path]) -> list[str]:
"""Find entity-like names mentioned in 3+ pages but lacking their own page."""
mention_counts: dict[str, int] = defaultdict(int)
existing_pages = {p.stem.lower() for p in pages}
for p in pages:
content = read_file(p)
links = extract_wikilinks(content)
for link in links:
if link.lower() not in existing_pages:
mention_counts[link] += 1
return [name for name, count in mention_counts.items() if count >= 3]
def run_lint():
pages = all_wiki_pages()
today = date.today().isoformat()
if not pages:
print("Wiki is empty. Nothing to lint.")
return ""
print(f"Linting {len(pages)} wiki pages...")
# Deterministic checks
orphans = find_orphans(pages)
broken = find_broken_links(pages)
missing_entities = find_missing_entities(pages)
print(f" orphans: {len(orphans)}")
print(f" broken links: {len(broken)}")
print(f" missing entity pages: {len(missing_entities)}")
# Build context for semantic checks (contradictions, gaps)
# Use a sample of pages to stay within context limits
sample = pages[:20]
pages_context = ""
for p in sample:
rel = p.relative_to(REPO_ROOT)
pages_context += f"\n\n### {rel}\n{read_file(p)[:1500]}" # truncate long pages
client = anthropic.Anthropic()
print(" running semantic lint via Claude API...")
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=3000,
messages=[{
"role": "user",
"content": f"""You are linting an LLM Wiki. Review the pages below and identify:
1. Contradictions between pages (claims that conflict)
2. Stale content (summaries that newer sources have superseded)
3. Data gaps (important questions the wiki can't answer — suggest specific sources to find)
4. Concepts mentioned but lacking depth
Wiki pages (sample of {len(sample)} pages):
{pages_context}
Return a markdown lint report with these sections:
## Contradictions
## Stale Content
## Data Gaps & Suggested Sources
## Concepts Needing More Depth
Be specific — name the exact pages and claims involved.
"""
}]
)
semantic_report = response.content[0].text
# Compose full report
report_lines = [
f"# Wiki Lint Report — {today}",
"",
f"Scanned {len(pages)} pages.",
"",
"## Structural Issues",
"",
]
if orphans:
report_lines.append("### Orphan Pages (no inbound links)")
for p in orphans:
report_lines.append(f"- `{p.relative_to(REPO_ROOT)}`")
report_lines.append("")
if broken:
report_lines.append("### Broken Wikilinks")
for page, link in broken:
report_lines.append(f"- `{page.relative_to(REPO_ROOT)}` links to `[[{link}]]` — not found")
report_lines.append("")
if missing_entities:
report_lines.append("### Missing Entity Pages (mentioned 3+ times but no page)")
for name in missing_entities:
report_lines.append(f"- `[[{name}]]`")
report_lines.append("")
if not orphans and not broken and not missing_entities:
report_lines.append("No structural issues found.")
report_lines.append("")
report_lines.append("---")
report_lines.append("")
report_lines.append(semantic_report)
report = "\n".join(report_lines)
print("\n" + report)
return report
def append_log(entry: str):
existing = read_file(LOG_FILE)
LOG_FILE.write_text(entry.strip() + "\n\n" + existing, encoding="utf-8")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Lint the LLM Wiki")
parser.add_argument("--save", action="store_true", help="Save lint report to wiki/lint-report.md")
args = parser.parse_args()
report = run_lint()
if args.save and report:
report_path = WIKI_DIR / "lint-report.md"
report_path.write_text(report, encoding="utf-8")
print(f"\nSaved: {report_path.relative_to(REPO_ROOT)}")
today = date.today().isoformat()
append_log(f"## [{today}] lint | Wiki health check\n\nRan lint. See lint-report.md for details.")

177
tools/query.py Normal file
View File

@@ -0,0 +1,177 @@
#!/usr/bin/env python3
"""
Query the LLM Wiki.
Usage:
python tools/query.py "What are the main themes across all sources?"
python tools/query.py "How does ConceptA relate to ConceptB?" --save
python tools/query.py "Summarize everything about EntityName" --save synthesis/my-analysis.md
Flags:
--save Save the answer back into the wiki (prompts for filename)
--save <path> Save to a specific wiki path
"""
import sys
import re
import json
import argparse
from pathlib import Path
from datetime import date
import anthropic
REPO_ROOT = Path(__file__).parent.parent
WIKI_DIR = REPO_ROOT / "wiki"
INDEX_FILE = WIKI_DIR / "index.md"
LOG_FILE = WIKI_DIR / "log.md"
SCHEMA_FILE = REPO_ROOT / "CLAUDE.md"
def read_file(path: Path) -> str:
return path.read_text(encoding="utf-8") if path.exists() else ""
def write_file(path: Path, content: str):
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(content, encoding="utf-8")
print(f" saved: {path.relative_to(REPO_ROOT)}")
def find_relevant_pages(question: str, index_content: str) -> list[Path]:
"""Extract linked pages from index that seem relevant to the question."""
# Pull all [[links]] and markdown links from index
md_links = re.findall(r'\[([^\]]+)\]\(([^)]+)\)', index_content)
# Simple keyword match: check if any word in the title appears in the question
question_lower = question.lower()
relevant = []
for title, href in md_links:
if any(word in question_lower for word in title.lower().split() if len(word) > 3):
p = WIKI_DIR / href
if p.exists():
relevant.append(p)
# Always include overview
overview = WIKI_DIR / "overview.md"
if overview.exists() and overview not in relevant:
relevant.insert(0, overview)
return relevant[:12] # cap to avoid context overflow
def append_log(entry: str):
existing = read_file(LOG_FILE)
LOG_FILE.write_text(entry.strip() + "\n\n" + existing, encoding="utf-8")
def query(question: str, save_path: str | None = None):
today = date.today().isoformat()
client = anthropic.Anthropic()
# Step 1: Read index
index_content = read_file(INDEX_FILE)
if not index_content:
print("Wiki is empty. Ingest some sources first with: python tools/ingest.py <source>")
sys.exit(1)
# Step 2: Find relevant pages
relevant_pages = find_relevant_pages(question, index_content)
# If no keyword match, ask Claude to identify relevant pages from the index
if not relevant_pages or len(relevant_pages) <= 1:
print(" selecting relevant pages via Claude...")
selection_response = client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=512,
messages=[{
"role": "user",
"content": f"Given this wiki index:\n\n{index_content}\n\nWhich pages are most relevant to answering: \"{question}\"\n\nReturn ONLY a JSON array of relative file paths (as listed in the index), e.g. [\"sources/foo.md\", \"concepts/Bar.md\"]. Maximum 10 pages."
}]
)
raw = selection_response.content[0].text.strip()
raw = re.sub(r"^```(?:json)?\s*", "", raw)
raw = re.sub(r"\s*```$", "", raw)
try:
paths = json.loads(raw)
relevant_pages = [WIKI_DIR / p for p in paths if (WIKI_DIR / p).exists()]
except (json.JSONDecodeError, TypeError):
pass
# Step 3: Read relevant pages
pages_context = ""
for p in relevant_pages:
rel = p.relative_to(REPO_ROOT)
pages_context += f"\n\n### {rel}\n{p.read_text(encoding='utf-8')}"
if not pages_context:
pages_context = f"\n\n### wiki/index.md\n{index_content}"
schema = read_file(SCHEMA_FILE)
# Step 4: Synthesize answer
print(f" synthesizing answer from {len(relevant_pages)} pages...")
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=4096,
messages=[{
"role": "user",
"content": f"""You are querying an LLM Wiki to answer a question. Use the wiki pages below to synthesize a thorough answer. Cite sources using [[PageName]] wikilink syntax.
Schema:
{schema}
Wiki pages:
{pages_context}
Question: {question}
Write a well-structured markdown answer with headers, bullets, and [[wikilink]] citations. At the end, add a ## Sources section listing the pages you drew from.
"""
}]
)
answer = response.content[0].text
print("\n" + "=" * 60)
print(answer)
print("=" * 60)
# Step 5: Optionally save answer
if save_path is not None:
if save_path == "":
# Prompt for filename
slug = input("\nSave as (slug, e.g. 'my-analysis'): ").strip()
if not slug:
print("Skipping save.")
return
save_path = f"syntheses/{slug}.md"
full_save_path = WIKI_DIR / save_path
frontmatter = f"""---
title: "{question[:80]}"
type: synthesis
tags: []
sources: []
last_updated: {today}
---
"""
write_file(full_save_path, frontmatter + answer)
# Update index
index_content = read_file(INDEX_FILE)
entry = f"- [{question[:60]}]({save_path}) — synthesis"
if "## Syntheses" in index_content:
index_content = index_content.replace("## Syntheses\n", f"## Syntheses\n{entry}\n")
INDEX_FILE.write_text(index_content, encoding="utf-8")
print(f" indexed: {save_path}")
# Append to log
append_log(f"## [{today}] query | {question[:80]}\n\nSynthesized answer from {len(relevant_pages)} pages." +
(f" Saved to {save_path}." if save_path else ""))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Query the LLM Wiki")
parser.add_argument("question", help="Question to ask the wiki")
parser.add_argument("--save", nargs="?", const="", default=None,
help="Save answer to wiki (optionally specify path)")
args = parser.parse_args()
query(args.question, args.save)