192 lines
6.6 KiB
Python
192 lines
6.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Query the LLM Wiki.
|
|
|
|
Usage:
|
|
python tools/query.py "What are the main themes across all sources?"
|
|
python tools/query.py "How does ConceptA relate to ConceptB?" --save
|
|
python tools/query.py "Summarize everything about EntityName" --save synthesis/my-analysis.md
|
|
|
|
Flags:
|
|
--save Save the answer back into the wiki (prompts for filename)
|
|
--save <path> Save to a specific wiki path
|
|
"""
|
|
|
|
import sys
|
|
import re
|
|
import json
|
|
import argparse
|
|
from pathlib import Path
|
|
from datetime import date
|
|
|
|
import anthropic
|
|
|
|
REPO_ROOT = Path(__file__).parent.parent
|
|
WIKI_DIR = REPO_ROOT / "wiki"
|
|
INDEX_FILE = WIKI_DIR / "index.md"
|
|
LOG_FILE = WIKI_DIR / "log.md"
|
|
SCHEMA_FILE = REPO_ROOT / "CLAUDE.md"
|
|
|
|
|
|
def read_file(path: Path) -> str:
|
|
return path.read_text(encoding="utf-8") if path.exists() else ""
|
|
|
|
|
|
def write_file(path: Path, content: str):
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_text(content, encoding="utf-8")
|
|
print(f" saved: {path.relative_to(REPO_ROOT)}")
|
|
|
|
|
|
def find_relevant_pages(question: str, index_content: str) -> list[Path]:
|
|
"""Extract linked pages from index that seem relevant to the question."""
|
|
# Pull all [[links]] and markdown links from index
|
|
md_links = re.findall(r'\[([^\]]+)\]\(([^)]+)\)', index_content)
|
|
question_lower = question.lower()
|
|
relevant = []
|
|
|
|
for title, href in md_links:
|
|
title_lower = title.lower()
|
|
match = False
|
|
|
|
# 1. English/Space-separated: check words > 3 chars
|
|
if any(word in question_lower for word in title_lower.split() if len(word) > 3):
|
|
match = True
|
|
# 2. Exact substring match for the whole title (useful for short CJK titles, e.g. len=2)
|
|
elif len(title_lower) >= 2 and title_lower in question_lower:
|
|
match = True
|
|
# 3. CJK chunks: find contiguous non-ASCII characters (len >= 2) and check if in question
|
|
elif any(chunk in question_lower for chunk in re.findall(r'[^\x00-\x7F]{2,}', title_lower)):
|
|
match = True
|
|
|
|
if match:
|
|
p = WIKI_DIR / href
|
|
if p.exists() and p not in relevant:
|
|
relevant.append(p)
|
|
|
|
# Always include overview
|
|
overview = WIKI_DIR / "overview.md"
|
|
if overview.exists() and overview not in relevant:
|
|
relevant.insert(0, overview)
|
|
return relevant[:12] # cap to avoid context overflow
|
|
|
|
|
|
def append_log(entry: str):
|
|
existing = read_file(LOG_FILE)
|
|
LOG_FILE.write_text(entry.strip() + "\n\n" + existing, encoding="utf-8")
|
|
|
|
|
|
def query(question: str, save_path: str | None = None):
|
|
today = date.today().isoformat()
|
|
client = anthropic.Anthropic()
|
|
|
|
# Step 1: Read index
|
|
index_content = read_file(INDEX_FILE)
|
|
if not index_content:
|
|
print("Wiki is empty. Ingest some sources first with: python tools/ingest.py <source>")
|
|
sys.exit(1)
|
|
|
|
# Step 2: Find relevant pages
|
|
relevant_pages = find_relevant_pages(question, index_content)
|
|
|
|
# If no keyword match, ask Claude to identify relevant pages from the index
|
|
if not relevant_pages or len(relevant_pages) <= 1:
|
|
print(" selecting relevant pages via Claude...")
|
|
selection_response = client.messages.create(
|
|
model="claude-haiku-4-5-20251001",
|
|
max_tokens=512,
|
|
messages=[{
|
|
"role": "user",
|
|
"content": f"Given this wiki index:\n\n{index_content}\n\nWhich pages are most relevant to answering: \"{question}\"\n\nReturn ONLY a JSON array of relative file paths (as listed in the index), e.g. [\"sources/foo.md\", \"concepts/Bar.md\"]. Maximum 10 pages."
|
|
}]
|
|
)
|
|
raw = selection_response.content[0].text.strip()
|
|
raw = re.sub(r"^```(?:json)?\s*", "", raw)
|
|
raw = re.sub(r"\s*```$", "", raw)
|
|
try:
|
|
paths = json.loads(raw)
|
|
relevant_pages = [WIKI_DIR / p for p in paths if (WIKI_DIR / p).exists()]
|
|
except (json.JSONDecodeError, TypeError):
|
|
pass
|
|
|
|
# Step 3: Read relevant pages
|
|
pages_context = ""
|
|
for p in relevant_pages:
|
|
rel = p.relative_to(REPO_ROOT)
|
|
pages_context += f"\n\n### {rel}\n{p.read_text(encoding='utf-8')}"
|
|
|
|
if not pages_context:
|
|
pages_context = f"\n\n### wiki/index.md\n{index_content}"
|
|
|
|
schema = read_file(SCHEMA_FILE)
|
|
|
|
# Step 4: Synthesize answer
|
|
print(f" synthesizing answer from {len(relevant_pages)} pages...")
|
|
response = client.messages.create(
|
|
model="claude-sonnet-4-6",
|
|
max_tokens=4096,
|
|
messages=[{
|
|
"role": "user",
|
|
"content": f"""You are querying an LLM Wiki to answer a question. Use the wiki pages below to synthesize a thorough answer. Cite sources using [[PageName]] wikilink syntax.
|
|
|
|
Schema:
|
|
{schema}
|
|
|
|
Wiki pages:
|
|
{pages_context}
|
|
|
|
Question: {question}
|
|
|
|
Write a well-structured markdown answer with headers, bullets, and [[wikilink]] citations. At the end, add a ## Sources section listing the pages you drew from.
|
|
"""
|
|
}]
|
|
)
|
|
|
|
answer = response.content[0].text
|
|
print("\n" + "=" * 60)
|
|
print(answer)
|
|
print("=" * 60)
|
|
|
|
# Step 5: Optionally save answer
|
|
if save_path is not None:
|
|
if save_path == "":
|
|
# Prompt for filename
|
|
slug = input("\nSave as (slug, e.g. 'my-analysis'): ").strip()
|
|
if not slug:
|
|
print("Skipping save.")
|
|
return
|
|
save_path = f"syntheses/{slug}.md"
|
|
|
|
full_save_path = WIKI_DIR / save_path
|
|
frontmatter = f"""---
|
|
title: "{question[:80]}"
|
|
type: synthesis
|
|
tags: []
|
|
sources: []
|
|
last_updated: {today}
|
|
---
|
|
|
|
"""
|
|
write_file(full_save_path, frontmatter + answer)
|
|
|
|
# Update index
|
|
index_content = read_file(INDEX_FILE)
|
|
entry = f"- [{question[:60]}]({save_path}) — synthesis"
|
|
if "## Syntheses" in index_content:
|
|
index_content = index_content.replace("## Syntheses\n", f"## Syntheses\n{entry}\n")
|
|
INDEX_FILE.write_text(index_content, encoding="utf-8")
|
|
print(f" indexed: {save_path}")
|
|
|
|
# Append to log
|
|
append_log(f"## [{today}] query | {question[:80]}\n\nSynthesized answer from {len(relevant_pages)} pages." +
|
|
(f" Saved to {save_path}." if save_path else ""))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Query the LLM Wiki")
|
|
parser.add_argument("question", help="Question to ask the wiki")
|
|
parser.add_argument("--save", nargs="?", const="", default=None,
|
|
help="Save answer to wiki (optionally specify path)")
|
|
args = parser.parse_args()
|
|
query(args.question, args.save)
|