From 6868034554ad5c59d2d171637df591195a2e6f6a Mon Sep 17 00:00:00 2001 From: watsonk1998 <1515673657@qq.com> Date: Tue, 14 Apr 2026 00:37:44 +0800 Subject: [PATCH] feat(tools): add self-healing graph utility and automated orchestration docs - Introduced tools/heal.py to automatically dynamically identify and build missing structural concepts and entities by tracing contextual usages using litellm. - Add docs/automated-sync.md with cron/launchd orchestration best-practices. - Closes Issue #16 on Graph Integrity Constraints. --- docs/automated-sync.md | 101 +++++++++++++++++++++++++++++++++++++++++ tools/heal.py | 100 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 201 insertions(+) create mode 100644 docs/automated-sync.md create mode 100755 tools/heal.py diff --git a/docs/automated-sync.md b/docs/automated-sync.md new file mode 100644 index 0000000..fc7f06e --- /dev/null +++ b/docs/automated-sync.md @@ -0,0 +1,101 @@ +# Automated Wiki Synchronization Guide + +Managing an LLM Wiki works best when it constantly reflects your background note-taking system. Instead of manually ingesting files every time you write something new, you can orchestrate an end-to-end automation pipeline. + +This guide outlines a production-grade cron/launchd strategy for local Mac/Linux environments. + +## The Two-Step Architecture + +LLM Wiki Agent ingestion is a two-step process: +1. **Syncing to `raw/`**: Getting files from your personal vault/tools into the agent's staging area. +2. **Batch Ingestion**: Triggering `tools/ingest.py` on the synchronized directories to synthesize and weave them into the graph. + +### Step 1: The Master Orchestrator Script + +Create a comprehensive shell script in your wiki root (`daily-automated-sync.sh`): + +```bash +#!/usr/bin/env bash +set -uo pipefail + +# Define variables +LAB_DIR="$HOME/projects/active/personal-wiki-lab" +LOG_FILE="$LAB_DIR/automation-cron.log" +DATE=$(date "+%Y-%m-%d %H:%M:%S") + +echo "=====================================================" >> "$LOG_FILE" +echo "[$DATE] Starting automated wiki synchronization..." >> "$LOG_FILE" + +cd "$LAB_DIR" || exit 1 + +# 1. Run your personal Vault-to-Raw symlink script here +# Example: ./sync-raw.sh >> "$LOG_FILE" 2>&1 + +# 2. Trigger Litellm Batch Ingestion using LLM of your choice +export LLM_MODEL="gemini/gemini-3-flash-preview" +export GEMINI_API_KEY="AIzaSy..." # or export OPENAI_API_KEY + +echo "[$DATE] Batch ingesting markdown files..." >> "$LOG_FILE" +find raw/ -type l -name "*.md" -o -type f -name "*.md" | \ +while read file; do + python3 tools/ingest.py "$file" >> "$LOG_FILE" 2>&1 +done + +# 3. Heal Graph Context (Auto-resolves broken semantic links) +echo "[$DATE] Healing broken nodes..." >> "$LOG_FILE" +python3 tools/heal.py >> "$LOG_FILE" 2>&1 + +echo "[$(date "+%Y-%m-%d %H:%M:%S")] Automated sync completed." >> "$LOG_FILE" +echo "=====================================================" >> "$LOG_FILE" +``` + +Don't forget to make it executable: `chmod +x daily-automated-sync.sh`. + +### Step 2: System Scheduler (macOS launchd) + +For macOS, `launchd` is significantly more robust than `cron`. + +Create a `.plist` file at `~/Library/LaunchAgents/com.personal-wiki-sync.plist`: + +```xml + + + + + Label + com.personal-wiki-sync + ProgramArguments + + /bin/bash + /Users/your-username/projects/active/personal-wiki-lab/daily-automated-sync.sh + + + + StartCalendarInterval + + Hour + 2 + Minute + 0 + + + + RunAtLoad + + + + StandardOutPath + /Users/your-username/projects/active/personal-wiki-lab/daemon.stdout.log + StandardErrorPath + /Users/your-username/projects/active/personal-wiki-lab/daemon.stderr.log + + +``` + +Load the daemon: +```bash +launchctl load ~/Library/LaunchAgents/com.personal-wiki-sync.plist +``` + +### Self-Healing & Health Monitoring +Since the automation runs silently at night, your `daemon.stderr.log` guarantees you will spot any API failures. The orchestrated script includes `tools/heal.py`, which is strongly recommended: it will seamlessly intercept and build concepts that accumulated throughout your day but were never individually formalized. diff --git a/tools/heal.py b/tools/heal.py new file mode 100755 index 0000000..cf85a68 --- /dev/null +++ b/tools/heal.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +""" +Graph Self-Healing Tool + +Automatically retrieves "Missing Entity Pages" from the wiki and generates +comprehensive definition pages for them using the LLM. +It resolves broken entity links by scanning existing contexts where the entity is referenced. + +Usage: + python tools/heal.py +""" + +import os +import sys +from pathlib import Path + +try: + from litellm import completion +except ImportError: + print("Error: litellm not installed. Run: pip install litellm") + sys.exit(1) + +# Ensure tools can be imported +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from tools.lint import find_missing_entities, all_wiki_pages + +REPO_ROOT = Path(__file__).parent.parent +WIKI_DIR = REPO_ROOT / "wiki" +ENTITIES_DIR = WIKI_DIR / "entities" + +def call_llm(prompt: str, max_tokens: int = 1500) -> str: + # Use litellm standard environment variables + # e.g., GEMINI_API_KEY, ANTHROPIC_API_KEY, OPENAI_API_KEY + model = os.getenv("LLM_MODEL", "claude-3-5-haiku-latest") # default to fast model + + response = completion( + model=model, + messages=[{"role": "user", "content": prompt}], + max_tokens=max_tokens + ) + return response.choices[0].message.content + +def search_sources(entity: str, pages: list[Path]) -> list[Path]: + """Find up to 15 pages where this entity is mentioned natively.""" + sources = [] + for p in pages: + if "entities" not in str(p.parent) and "concepts" not in str(p.parent): + content = p.read_text(encoding="utf-8") + if entity.lower() in content.lower(): + sources.append(p) + return sources[:15] + +def heal_missing_entities(): + pages = all_wiki_pages() + missing_entities = find_missing_entities(pages) + + if not missing_entities: + print("Graph is fully connected. No missing entities found!") + return + + ENTITIES_DIR.mkdir(exist_ok=True, parents=True) + print(f"Found {len(missing_entities)} missing entity nodes. Commencing auto-heal...") + + for entity in missing_entities: + print(f"Healing entity page for: {entity}") + sources = search_sources(entity, pages) + + context = "" + for s in sources: + context += f"\n\n### {s.name}\n{s.read_text(encoding='utf-8')[:800]}" + + prompt = f"""You are filling a data gap in the Personal LLM Wiki. +Create an Entity definition page for "{entity}". + +Here is how the entity appears in the current sources: +{context} + +Format: +--- +title: "{entity}" +type: entity +tags: [] +sources: {[s.name for s in sources]} +--- + +# {entity} + +Write a comprehensive paragraph defining what `{entity}` means in the context of this wiki, its main significance, and any actions or associations related to it. +""" + try: + result = call_llm(prompt) + out_path = ENTITIES_DIR / f"{entity}.md" + out_path.write_text(result, encoding="utf-8") + print(f" -> Saved to {out_path.relative_to(REPO_ROOT)}") + except Exception as e: + print(f" [!] Failed to generate {entity}: {e}") + +if __name__ == "__main__": + heal_missing_entities()