#!/usr/bin/env python3 """ Wiki ↔ Raw 三向同步工具 功能: - 检测 raw/ 下文件变化(新增/修改/删除) - 调用 Claude Code agent 进行同步(不使用 litellm) - 维护 manifest.json 状态映射 - 检测 orphan entity/concept(仅报告,不删除) 用法: python tools/sync.py --check 预览变化(不执行) python tools/sync.py --sync 执行同步 python tools/sync.py --rebuild 从 manifest 重建 wiki/index(兜底) manifest.json 格式: { "version": 1, "updated_at": "ISO timestamp", "files": { "relative/path/to/file.md": { "hash": "sha256", "modified": "ISO timestamp", "slug": "wiki-source-slug", "source_path": "wiki/sources/slug.md", "ingested": true } } } """ import os import sys import json import hashlib import subprocess from pathlib import Path from datetime import datetime, timezone REPO_ROOT = Path(__file__).parent.parent.resolve() WIKI_DIR = REPO_ROOT / "wiki" # manifest.json 放在 tools/ 而非 wiki/,避免 symlink 路径问题 MANIFEST_FILE = Path(__file__).parent / "manifest.json" SCHEMA_FILE = REPO_ROOT / "CLAUDE.md" # ─── 工具函数 ─────────────────────────────────────────────── def green(text): return f"\033[92m{text}\033[0m" def yellow(text): return f"\033[93m{text}\033[0m" def red(text): return f"\033[91m{text}\033[0m" def dim(text): return f"\033[2m{text}\033[0m" def bold(text): return f"\033[1m{text}\033[0m" def log(msg, style="normal"): prefixes = { "normal": " ", "info": " ℹ ", "success": " ✓ ", "warn": " ⚠ ", "error": " ✗ ", "section": "\n── ", } print(f"{prefixes.get(style, ' ')}{msg}") def sha256_file(path: Path) -> str: h = hashlib.sha256() h.update(path.read_bytes()) return h.hexdigest()[:16] def iso_now(): return datetime.now(timezone.utc).isoformat() def load_manifest() -> dict: if MANIFEST_FILE.exists(): try: return json.loads(MANIFEST_FILE.read_text(encoding="utf-8")) except (json.JSONDecodeError, IOError): pass return {"version": 1, "updated_at": iso_now(), "files": {}} def save_manifest(manifest: dict): manifest["updated_at"] = iso_now() MANIFEST_FILE.write_text(json.dumps(manifest, ensure_ascii=False, indent=2), encoding="utf-8") def scan_raw() -> dict[str, dict]: """返回 {relative_path: {hash, modified, size}}""" raw_dir = REPO_ROOT / "raw" result = {} if not raw_dir.exists(): return result for p in raw_dir.rglob("*.md"): if p.is_file() and not p.name.startswith("."): rel = str(p.relative_to(REPO_ROOT)) stat = p.stat() result[rel] = { "hash": sha256_file(p), "modified": datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).isoformat(), "size": stat.st_size, "abs_path": str(p), } return result def build_slug_from_path(rel_path: str) -> str: """从相对路径生成 slug(尽量保留中文,kebab-case)""" name = Path(rel_path).stem name = name.replace(" ", "-").replace("/", "-").replace("\\", "-") name = "".join(c if c.isalnum() or c in ("-", "_", "·") else "-" for c in name) name = name.strip("-") return name or "untitled" def call_ingest(abs_path: str, slug: str, json_mode: bool = False) -> dict: """调用 Claude Code agent 执行 /wiki-ingest,返回结果 使用 TMUX 交互模式调用 Claude Code Agent 返回 special status 让 Hermes 可以手动处理 TMUX 会话 """ return { "status": "needs_manual_tmux", "abs_path": abs_path, "slug": slug, "message": "需要通过 TMUX 手动执行 /wiki-ingest,请使用 llm-wiki-sync 技能流程", } def start_tmux_session() -> bool: """启动 TMUX session 用于 wiki ingest""" TMUX_SESSION = "wiki-ingest" # 检查 TMUX session 是否存在 check = subprocess.run( ["tmux", "has-session", "-t", TMUX_SESSION], capture_output=True, ) if check.returncode == 0: print(f" TMUX session '{TMUX_SESSION}' already exists") return True # 创建新 session subprocess.run( ["tmux", "new-session", "-d", "-s", TMUX_SESSION, f"cd {REPO_ROOT} && claude --permission-mode bypassPermissions"], check=True, ) print(f" Created TMUX session '{TMUX_SESSION}'") return True def send_to_tmux(command: str) -> bool: """发送命令到 TMUX session""" TMUX_SESSION = "wiki-ingest" subprocess.run( ["tmux", "send-keys", "-t", TMUX_SESSION, f"{command}\n"], check=True, ) return True def wait_for_completion(seconds: int = 120) -> bool: """等待 Claude Code 完成处理""" import time print(f" Waiting {seconds}s for Claude Code to complete...") time.sleep(seconds) return True def cleanup_tmux() -> bool: """清理 TMUX session""" TMUX_SESSION = "wiki-ingest" subprocess.run( ["tmux", "kill-session", "-t", TMUX_SESSION], capture_output=True, ) print(f" Killed TMUX session '{TMUX_SESSION}'") return True def find_orphan_entity_concept(manifest: dict) -> tuple[list, list]: """检测未被任何 source page 引用的 entity 和 concept""" # 从所有 source 内容中提取 [[wikilinks]] import re wikilink_pattern = re.compile(r"\[\[([^\]]+)\]\]") sources_dir = WIKI_DIR / "sources" referenced_entities = set() referenced_concepts = set() if sources_dir.exists(): for src in sources_dir.glob("*.md"): content = src.read_text(encoding="utf-8") for link in wikilink_pattern.findall(content): name = link.strip() if name.startswith("entities/"): referenced_entities.add(Path(name).stem) elif name.startswith("concepts/"): referenced_concepts.add(Path(name).stem) elif "/" not in name: # 裸 wikilink,可能是 entity 或 concept referenced_entities.add(name) referenced_concepts.add(name) # 检查 entity 目录 orphan_entities = [] entities_dir = WIKI_DIR / "entities" if entities_dir.exists(): for f in entities_dir.glob("*.md"): if f.stem not in referenced_entities: orphan_entities.append(f.name) # 检查 concept 目录 orphan_concepts = [] concepts_dir = WIKI_DIR / "concepts" if concepts_dir.exists(): for f in concepts_dir.glob("*.md"): if f.stem not in referenced_concepts: orphan_concepts.append(f.name) return orphan_entities, orphan_concepts # ─── 核心同步逻辑 ─────────────────────────────────────────────── def check_changes(manifest: dict, raw_files: dict) -> dict: """对比 manifest 和实际 raw 文件,返回变化""" changes = {"new": [], "updated": [], "deleted": [], "unchanged": []} manifest_files = manifest.get("files", {}) # 遍历当前 raw 文件 for rel_path, info in raw_files.items(): if rel_path not in manifest_files: changes["new"].append({"rel_path": rel_path, **info}) elif info["hash"] != manifest_files[rel_path]["hash"]: changes["updated"].append({ "rel_path": rel_path, "old_hash": manifest_files[rel_path]["hash"], **info, }) else: changes["unchanged"].append(rel_path) # 遍历 manifest,找已删除的 for rel_path in manifest_files: abs_path = REPO_ROOT / rel_path if not abs_path.exists(): changes["deleted"].append({ "rel_path": rel_path, "slug": manifest_files[rel_path].get("slug", build_slug_from_path(rel_path)), "source_path": manifest_files[rel_path].get("source_path"), }) return changes def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = False): print(f"\n{bold('=== Wiki Sync')}\n", end="") print(f" Date: {datetime.now().strftime('%Y-%m-%d %H:%M')}\n") print(f" Raw: {REPO_ROOT / 'raw'}\n") print(f" Wiki: {WIKI_DIR}\n") print(f" Mode: {'DRY-RUN (preview only)' if dry_run else 'LIVE SYNC'}") # Step 1: load manifest manifest = load_manifest() log("manifest.json loaded", "info") # Step 2: scan raw/ raw_files = scan_raw() log(f"raw/ scan: {len(raw_files)} .md files found", "info") # Step 3: check changes changes = check_changes(manifest, raw_files) total_changes = len(changes["new"]) + len(changes["updated"]) + len(changes["deleted"]) if total_changes == 0: log("No changes detected — wiki is up to date.", "success") return # ─── Report ─── if not json_mode: print(f"\n{bold('--- Changes ---')}") print(f" {green('+')} New: {len(changes['new'])}") print(f" {yellow('~')} Updated: {len(changes['updated'])}") print(f" {red('-')} Deleted: {len(changes['deleted'])}") if verbose or not dry_run: if changes["new"]: if not json_mode: print(f"\n {bold('New Files:')}") for f in changes["new"]: if json_mode: print(json.dumps({ "event": "new_detected", "rel_path": f["rel_path"], "slug": build_slug_from_path(f["rel_path"]), })) else: log(f"{green('[+')} {f['rel_path']}", "normal") if changes["updated"]: if not json_mode: print(f"\n {bold('Updated Files:')}") for f in changes["updated"]: if json_mode: print(json.dumps({ "event": "updated_detected", "rel_path": f["rel_path"], "slug": manifest["files"].get(f["rel_path"], {}).get("slug") or build_slug_from_path(f["rel_path"]), })) else: log(f"{yellow('[~]')} {f['rel_path']} (hash changed)", "normal") if changes["deleted"]: if not json_mode: print(f"\n {bold('Deleted Files:')}") for f in changes["deleted"]: if json_mode: print(json.dumps({ "event": "deleted_detected", "rel_path": f["rel_path"], })) else: log(f"{red('[-]')} {f['rel_path']}", "normal") if dry_run: log("\nDry-run complete. Run with --sync to apply.", "warn") return # ─── Apply Sync ─── # 注意:call_ingest 现在返回 needs_manual_tmux,提示 Hermes 通过 TMUX 手动执行 # 这里只做 manifest 记录,不实际调用 Claude Code if not json_mode: print(f"\n{bold('--- Applying Sync ---')}") print(" Note: Ingest execution requires manual TMUX workflow") print(" Use llm-wiki-sync skill for actual ingestion") print() updated_manifest = manifest.copy() updated_manifest["files"] = manifest.get("files", {}).copy() # 标记新增和更新的文件为待处理(由 Hermes 手动执行) pending_files = [] # ① 新增 → 标记待处理 for f in changes["new"]: rel_path = f["rel_path"] abs_path = f["abs_path"] slug = build_slug_from_path(rel_path) if json_mode: print(json.dumps({"event": "pending", "rel_path": rel_path, "slug": slug, "action": "new"})) pending_files.append({"rel_path": rel_path, "abs_path": abs_path, "slug": slug, "action": "new"}) # 先不更新 manifest,等实际执行完成后再更新 # updated_manifest["files"][rel_path] = {...} # ② 修改 → 标记待处理 for f in changes["updated"]: rel_path = f["rel_path"] abs_path = f["abs_path"] old_slug = manifest["files"].get(rel_path, {}).get("slug") or build_slug_from_path(rel_path) if json_mode: print(json.dumps({"event": "pending", "rel_path": rel_path, "slug": old_slug, "action": "updated"})) pending_files.append({"rel_path": rel_path, "abs_path": abs_path, "slug": old_slug, "action": "updated"}) # ③ 删除 → 保留 wiki 内容,仅从 manifest 移除 deleted_files = [] for f in changes["deleted"]: rel_path = f["rel_path"] source_path = f.get("source_path") log(f"Deleted: {rel_path}", "warn") if source_path: sp = WIKI_DIR / source_path log(f" Wiki source kept: {sp}", "warn") if rel_path in updated_manifest["files"]: del updated_manifest["files"][rel_path] deleted_files.append(rel_path) # Step 4: Save manifest(不保存 pending files,等待实际执行后更新) # 注意:不再在 --sync 时自动更新 manifest,而是由 Hermes 手动更新 # 这样可以追踪哪些文件需要处理 # 只保存已删除的文件变更 save_manifest(updated_manifest) log(f"\nmanifest.json updated ({len(updated_manifest['files'])} entries)", "success") # 输出 pending files 列表,供 Hermes 手动执行 if json_mode: print(json.dumps({ "event": "sync_complete", "summary": { "pending": len(pending_files), "deleted": len(deleted_files), "manifest_entries": len(updated_manifest["files"]), }, "pending_files": pending_files, "deleted_files": deleted_files, })) # Step 5: Orphan detection orphan_entities, orphan_concepts = find_orphan_entity_concept(updated_manifest) if not json_mode: if orphan_entities or orphan_concepts: print(f"\n{bold('--- Orphan Report (kept as requested) ---')}") if orphan_entities: print(f" {bold('Orphan Entities')} ({len(orphan_entities)}):") for e in sorted(orphan_entities): print(f" {dim('?')} {e}") if orphan_concepts: print(f" {bold('Orphan Concepts')} ({len(orphan_concepts)}):") for c in sorted(orphan_concepts): print(f" {dim('?')} {c}") log("\nOrphan pages are kept (not deleted per user request).", "info") else: log("No orphan entity/concept detected.", "success") # Step 6: JSON summary(已在上面输出 pending_files) # 旧的 summary 输出已删除,因为不再自动执行 ingest print(f"\n{bold('Done.')}") print(f"\n Pending files for manual TMUX ingestion: {len(pending_files)}") print(" Use llm-wiki-sync skill to process these files.") def run_check(): """只预览变化,不执行""" manifest = load_manifest() raw_files = scan_raw() changes = check_changes(manifest, raw_files) total = len(changes["new"]) + len(changes["updated"]) + len(changes["deleted"]) print(f"\n{bold('=== Wiki Sync Check')} (preview mode)\n") print(f" Raw files: {len(raw_files)}") print(f" Manifest entries: {len(manifest.get('files', {}))}") print(f" {green('+')} New: {len(changes['new'])}") print(f" {yellow('~')} Updated: {len(changes['updated'])}") print(f" {red('-')} Deleted: {len(changes['deleted'])}") if total > 0: if changes["new"]: print(f"\n {bold('New Files:')}") for f in changes["new"]: print(f" {green('[+]')} {f['rel_path']}") if changes["updated"]: print(f"\n {bold('Updated Files:')}") for f in changes["updated"]: print(f" {yellow('[~]')} {f['rel_path']} (was {f['old_hash']}, now {f['hash']})") if changes["deleted"]: print(f"\n {bold('Deleted Files:')}") for f in changes["deleted"]: print(f" {red('[-]')} {f['rel_path']}") else: print(f"\n {green('No changes — wiki is in sync.')}") print() def run_rebuild(): """从 manifest 重建 wiki/index.md(兜底方案)""" manifest = load_manifest() print(f"\n{bold('=== Wiki Rebuild from Manifest')}\n") print(f" Manifest entries: {len(manifest.get('files', {}))}") print(f" Rebuilding index.md ...\n") index_lines = [ "# Wiki Index\n", "\n## Overview\n", "- [Overview](overview.md) — living synthesis\n", "\n## Sources\n", ] files = manifest.get("files", {}) # 按 modified 时间倒序 sorted_files = sorted(files.items(), key=lambda x: x[1].get("modified", ""), reverse=True) for rel_path, info in sorted_files: slug = info.get("slug", build_slug_from_path(rel_path)) source_md_path = WIKI_DIR / "sources" / f"{slug}.md" if source_md_path.exists(): title = source_md_path.read_text(encoding="utf-8").split("\n")[0].lstrip("# ").strip() index_lines.append(f"- [{title}](sources/{slug}.md)\n") else: index_lines.append(f"- [{slug}](sources/{slug}.md) — (source missing)\n") index_lines.append("\n## Entities\n\n## Concepts\n\n## Syntheses\n") index_file = WIKI_DIR / "index.md" index_file.write_text("".join(index_lines), encoding="utf-8") print(f" {green('✓')} index.md rebuilt with {len(sorted_files)} sources") # Orphan report orphan_entities, orphan_concepts = find_orphan_entity_concept(manifest) if orphan_entities: print(f" {dim('?')} Orphan entities: {len(orphan_entities)}") if orphan_concepts: print(f" {dim('?')} Orphan concepts: {len(orphan_concepts)}") print(f"\nDone.") # ─── CLI 入口 ─────────────────────────────────────────────── if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description="Wiki ↔ Raw 三向同步工具", formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument( "--check", action="store_true", help="预览变化,不执行同步", ) parser.add_argument( "--sync", action="store_true", help="执行完整同步(新增/修改/删除 + orphan 检测)", ) parser.add_argument( "--rebuild", action="store_true", help="从 manifest 重建 wiki/index.md(兜底方案)", ) parser.add_argument( "--reset-failed", action="store_true", help="重置所有 failed 的 ingest 状态(让它们重新待处理)", ) parser.add_argument( "--pending", action="store_true", help="列出所有待摄取的 pending 文件", ) parser.add_argument( "--verbose", "-v", action="store_true", help="详细输出", ) parser.add_argument( "--json", action="store_true", help="JSON 行输出模式(供调用方解析)", ) args = parser.parse_args() if args.rebuild: run_rebuild() elif args.pending: # 列出待摄取的文件 manifest = load_manifest() pending = [k for k, v in manifest["files"].items() if not v.get("ingested")] print(f"=== Pending Ingest Files ({len(pending)}) ===\n") for i, path in enumerate(pending, 1): print(f"{i:3}. {path}") elif args.reset_failed: # 重置失败的 ingest 状态 manifest = load_manifest() reset_count = 0 for k, v in manifest["files"].items(): if v.get("error"): v["ingested"] = False v.pop("error", None) v.pop("ingested_at", None) reset_count += 1 if reset_count > 0: save_manifest(manifest) print(f"Reset {reset_count} failed entries to pending.") else: print("No failed entries found.") elif args.check: run_check() elif args.sync: run_sync(dry_run=False, verbose=args.verbose, json_mode=args.json) else: parser.print_help() print("\n示例:") print(" python tools/sync.py --check # 预览变化") print(" python tools/sync.py --sync # 执行同步") print(" python tools/sync.py --sync -v # 详细模式") print(" python tools/sync.py --rebuild # 重建 index")