修改版本
This commit is contained in:
1454
tools/manifest.json
Normal file
1454
tools/manifest.json
Normal file
File diff suppressed because it is too large
Load Diff
385
tools/sync.py
385
tools/sync.py
@@ -4,7 +4,7 @@ Wiki ↔ Raw 三向同步工具
|
||||
|
||||
功能:
|
||||
- 检测 raw/ 下文件变化(新增/修改/删除)
|
||||
- 自动调用 ingest.py 进行同步
|
||||
- 调用 Claude Code agent 进行同步(不使用 litellm)
|
||||
- 维护 manifest.json 状态映射
|
||||
- 检测 orphan entity/concept(仅报告,不删除)
|
||||
|
||||
@@ -12,7 +12,6 @@ Wiki ↔ Raw 三向同步工具
|
||||
python tools/sync.py --check 预览变化(不执行)
|
||||
python tools/sync.py --sync 执行同步
|
||||
python tools/sync.py --rebuild 从 manifest 重建 wiki/index(兜底)
|
||||
python tools/sync.py --bootstrap 从现有 wiki sources 反向生成 manifest(首次用,跳过已 ingest 的文件)
|
||||
|
||||
manifest.json 格式:
|
||||
{
|
||||
@@ -39,9 +38,10 @@ from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).parent.parent
|
||||
REPO_ROOT = Path(__file__).parent.parent.resolve()
|
||||
WIKI_DIR = REPO_ROOT / "wiki"
|
||||
MANIFEST_FILE = WIKI_DIR / "manifest.json"
|
||||
# manifest.json 放在 tools/ 而非 wiki/,避免 symlink 路径问题
|
||||
MANIFEST_FILE = Path(__file__).parent / "manifest.json"
|
||||
SCHEMA_FILE = REPO_ROOT / "CLAUDE.md"
|
||||
|
||||
|
||||
@@ -127,26 +127,71 @@ def build_slug_from_path(rel_path: str) -> str:
|
||||
return name or "untitled"
|
||||
|
||||
|
||||
def call_ingest(source_path: str, slug: str = None) -> dict:
|
||||
"""调用 ingest.py,返回结果"""
|
||||
cmd = [sys.executable, str(REPO_ROOT / "tools" / "ingest.py"), source_path]
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300,
|
||||
cwd=str(REPO_ROOT),
|
||||
)
|
||||
return {
|
||||
"success": result.returncode == 0,
|
||||
"stdout": result.stdout,
|
||||
"stderr": result.stderr,
|
||||
}
|
||||
except subprocess.TimeoutExpired:
|
||||
return {"success": False, "stdout": "", "stderr": "Timeout (>5min)"}
|
||||
except Exception as e:
|
||||
return {"success": False, "stdout": "", "stderr": str(e)}
|
||||
def call_ingest(abs_path: str, slug: str, json_mode: bool = False) -> dict:
|
||||
"""调用 Claude Code agent 执行 /wiki-ingest,返回结果
|
||||
|
||||
使用 TMUX 交互模式调用 Claude Code Agent
|
||||
返回 special status 让 Hermes 可以手动处理 TMUX 会话
|
||||
"""
|
||||
return {
|
||||
"status": "needs_manual_tmux",
|
||||
"abs_path": abs_path,
|
||||
"slug": slug,
|
||||
"message": "需要通过 TMUX 手动执行 /wiki-ingest,请使用 llm-wiki-sync 技能流程",
|
||||
}
|
||||
|
||||
|
||||
def start_tmux_session() -> bool:
|
||||
"""启动 TMUX session 用于 wiki ingest"""
|
||||
TMUX_SESSION = "wiki-ingest"
|
||||
|
||||
# 检查 TMUX session 是否存在
|
||||
check = subprocess.run(
|
||||
["tmux", "has-session", "-t", TMUX_SESSION],
|
||||
capture_output=True,
|
||||
)
|
||||
|
||||
if check.returncode == 0:
|
||||
print(f" TMUX session '{TMUX_SESSION}' already exists")
|
||||
return True
|
||||
|
||||
# 创建新 session
|
||||
subprocess.run(
|
||||
["tmux", "new-session", "-d", "-s", TMUX_SESSION,
|
||||
f"cd {REPO_ROOT} && claude --permission-mode bypassPermissions"],
|
||||
check=True,
|
||||
)
|
||||
print(f" Created TMUX session '{TMUX_SESSION}'")
|
||||
return True
|
||||
|
||||
|
||||
def send_to_tmux(command: str) -> bool:
|
||||
"""发送命令到 TMUX session"""
|
||||
TMUX_SESSION = "wiki-ingest"
|
||||
subprocess.run(
|
||||
["tmux", "send-keys", "-t", TMUX_SESSION, f"{command}\n"],
|
||||
check=True,
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
def wait_for_completion(seconds: int = 120) -> bool:
|
||||
"""等待 Claude Code 完成处理"""
|
||||
import time
|
||||
print(f" Waiting {seconds}s for Claude Code to complete...")
|
||||
time.sleep(seconds)
|
||||
return True
|
||||
|
||||
|
||||
def cleanup_tmux() -> bool:
|
||||
"""清理 TMUX session"""
|
||||
TMUX_SESSION = "wiki-ingest"
|
||||
subprocess.run(
|
||||
["tmux", "kill-session", "-t", TMUX_SESSION],
|
||||
capture_output=True,
|
||||
)
|
||||
print(f" Killed TMUX session '{TMUX_SESSION}'")
|
||||
return True
|
||||
|
||||
|
||||
def find_orphan_entity_concept(manifest: dict) -> tuple[list, list]:
|
||||
@@ -225,13 +270,12 @@ def check_changes(manifest: dict, raw_files: dict) -> dict:
|
||||
return changes
|
||||
|
||||
|
||||
def run_sync(dry_run: bool = False, verbose: bool = False):
|
||||
print(f"\n{bold('=== Wiki Sync')}\n")
|
||||
print(f" Date: {datetime.now().strftime('%Y-%m-%d %H:%M')}")
|
||||
print(f" Raw: {REPO_ROOT / 'raw'}")
|
||||
print(f" Wiki: {WIKI_DIR}")
|
||||
def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = False):
|
||||
print(f"\n{bold('=== Wiki Sync')}\n", end="")
|
||||
print(f" Date: {datetime.now().strftime('%Y-%m-%d %H:%M')}\n")
|
||||
print(f" Raw: {REPO_ROOT / 'raw'}\n")
|
||||
print(f" Wiki: {WIKI_DIR}\n")
|
||||
print(f" Mode: {'DRY-RUN (preview only)' if dry_run else 'LIVE SYNC'}")
|
||||
print()
|
||||
|
||||
# Step 1: load manifest
|
||||
manifest = load_manifest()
|
||||
@@ -250,190 +294,150 @@ def run_sync(dry_run: bool = False, verbose: bool = False):
|
||||
return
|
||||
|
||||
# ─── Report ───
|
||||
print(f"\n{bold('--- Changes ---')}")
|
||||
print(f" {green('+')} New: {len(changes['new'])}")
|
||||
print(f" {yellow('~')} Updated: {len(changes['updated'])}")
|
||||
print(f" {red('-')} Deleted: {len(changes['deleted'])}")
|
||||
if not json_mode:
|
||||
print(f"\n{bold('--- Changes ---')}")
|
||||
print(f" {green('+')} New: {len(changes['new'])}")
|
||||
print(f" {yellow('~')} Updated: {len(changes['updated'])}")
|
||||
print(f" {red('-')} Deleted: {len(changes['deleted'])}")
|
||||
|
||||
if verbose or not dry_run:
|
||||
if changes["new"]:
|
||||
print(f"\n {bold('New Files:')}")
|
||||
if not json_mode:
|
||||
print(f"\n {bold('New Files:')}")
|
||||
for f in changes["new"]:
|
||||
log(f"{green('[+')} {f['rel_path']}", "normal")
|
||||
if json_mode:
|
||||
print(json.dumps({
|
||||
"event": "new_detected",
|
||||
"rel_path": f["rel_path"],
|
||||
"slug": build_slug_from_path(f["rel_path"]),
|
||||
}))
|
||||
else:
|
||||
log(f"{green('[+')} {f['rel_path']}", "normal")
|
||||
|
||||
if changes["updated"]:
|
||||
print(f"\n {bold('Updated Files:')}")
|
||||
if not json_mode:
|
||||
print(f"\n {bold('Updated Files:')}")
|
||||
for f in changes["updated"]:
|
||||
log(f"{yellow('[~]')} {f['rel_path']} (hash changed)", "normal")
|
||||
if json_mode:
|
||||
print(json.dumps({
|
||||
"event": "updated_detected",
|
||||
"rel_path": f["rel_path"],
|
||||
"slug": manifest["files"].get(f["rel_path"], {}).get("slug") or build_slug_from_path(f["rel_path"]),
|
||||
}))
|
||||
else:
|
||||
log(f"{yellow('[~]')} {f['rel_path']} (hash changed)", "normal")
|
||||
|
||||
if changes["deleted"]:
|
||||
print(f"\n {bold('Deleted Files:')}")
|
||||
if not json_mode:
|
||||
print(f"\n {bold('Deleted Files:')}")
|
||||
for f in changes["deleted"]:
|
||||
log(f"{red('[-]')} {f['rel_path']}", "normal")
|
||||
if json_mode:
|
||||
print(json.dumps({
|
||||
"event": "deleted_detected",
|
||||
"rel_path": f["rel_path"],
|
||||
}))
|
||||
else:
|
||||
log(f"{red('[-]')} {f['rel_path']}", "normal")
|
||||
|
||||
if dry_run:
|
||||
log("\nDry-run complete. Run with --sync to apply.", "warn")
|
||||
return
|
||||
|
||||
# ─── Apply Sync ───
|
||||
print(f"\n{bold('--- Applying Sync ---')}")
|
||||
# 注意:call_ingest 现在返回 needs_manual_tmux,提示 Hermes 通过 TMUX 手动执行
|
||||
# 这里只做 manifest 记录,不实际调用 Claude Code
|
||||
if not json_mode:
|
||||
print(f"\n{bold('--- Applying Sync ---')}")
|
||||
print(" Note: Ingest execution requires manual TMUX workflow")
|
||||
print(" Use llm-wiki-sync skill for actual ingestion")
|
||||
print()
|
||||
|
||||
updated_manifest = manifest.copy()
|
||||
updated_manifest["files"] = manifest.get("files", {}).copy()
|
||||
|
||||
# ① 新增 → ingest
|
||||
# 标记新增和更新的文件为待处理(由 Hermes 手动执行)
|
||||
pending_files = []
|
||||
|
||||
# ① 新增 → 标记待处理
|
||||
for f in changes["new"]:
|
||||
rel_path = f["rel_path"]
|
||||
abs_path = f["abs_path"]
|
||||
slug = build_slug_from_path(rel_path)
|
||||
print(f"\n {green('[+]')} New: {rel_path}")
|
||||
print(f" slug: {slug}")
|
||||
|
||||
result = call_ingest(abs_path, slug)
|
||||
if result["success"]:
|
||||
log(f"Ingested: {slug}.md", "success")
|
||||
updated_manifest["files"][rel_path] = {
|
||||
"hash": f["hash"],
|
||||
"modified": f["modified"],
|
||||
"slug": slug,
|
||||
"source_path": f"wiki/sources/{slug}.md",
|
||||
"ingested": True,
|
||||
"ingested_at": iso_now(),
|
||||
}
|
||||
else:
|
||||
log(f"Failed: {result['stderr'][:200]}", "error")
|
||||
# 仍然记录(避免重复 ingest)
|
||||
updated_manifest["files"][rel_path] = {
|
||||
"hash": f["hash"],
|
||||
"modified": f["modified"],
|
||||
"slug": slug,
|
||||
"source_path": f"wiki/sources/{slug}.md",
|
||||
"ingested": False,
|
||||
"ingested_at": None,
|
||||
"error": result["stderr"][:500],
|
||||
}
|
||||
|
||||
# ② 修改 → re-ingest
|
||||
|
||||
if json_mode:
|
||||
print(json.dumps({"event": "pending", "rel_path": rel_path, "slug": slug, "action": "new"}))
|
||||
|
||||
pending_files.append({"rel_path": rel_path, "abs_path": abs_path, "slug": slug, "action": "new"})
|
||||
|
||||
# 先不更新 manifest,等实际执行完成后再更新
|
||||
# updated_manifest["files"][rel_path] = {...}
|
||||
|
||||
# ② 修改 → 标记待处理
|
||||
for f in changes["updated"]:
|
||||
rel_path = f["rel_path"]
|
||||
abs_path = f["abs_path"]
|
||||
old_slug = manifest["files"].get(rel_path, {}).get("slug") or build_slug_from_path(rel_path)
|
||||
print(f"\n {yellow('[~]')} Updated: {rel_path}")
|
||||
|
||||
if json_mode:
|
||||
print(json.dumps({"event": "pending", "rel_path": rel_path, "slug": old_slug, "action": "updated"}))
|
||||
|
||||
pending_files.append({"rel_path": rel_path, "abs_path": abs_path, "slug": old_slug, "action": "updated"})
|
||||
|
||||
result = call_ingest(abs_path, old_slug)
|
||||
if result["success"]:
|
||||
log(f"Re-ingested: {old_slug}.md", "success")
|
||||
updated_manifest["files"][rel_path] = {
|
||||
**updated_manifest["files"].get(rel_path, {}),
|
||||
"hash": f["hash"],
|
||||
"modified": f["modified"],
|
||||
"slug": old_slug,
|
||||
"source_path": f"wiki/sources/{old_slug}.md",
|
||||
"ingested": True,
|
||||
"ingested_at": iso_now(),
|
||||
}
|
||||
else:
|
||||
log(f"Failed: {result['stderr'][:200]}", "error")
|
||||
|
||||
# ③ 删除 → 保留 wiki 内容,仅从 manifest 移除(按用户要求保留 orphan)
|
||||
# ③ 删除 → 保留 wiki 内容,仅从 manifest 移除
|
||||
deleted_files = []
|
||||
for f in changes["deleted"]:
|
||||
rel_path = f["rel_path"]
|
||||
source_path = f.get("source_path")
|
||||
print(f"\n {red('[-]')} Deleted: {rel_path}")
|
||||
log(f"Deleted: {rel_path}", "warn")
|
||||
if source_path:
|
||||
sp = WIKI_DIR / source_path
|
||||
log(f" Wiki source kept: {sp}", "warn")
|
||||
# 从 manifest 移除(不删除 wiki 文件)
|
||||
if rel_path in updated_manifest["files"]:
|
||||
del updated_manifest["files"][rel_path]
|
||||
deleted_files.append(rel_path)
|
||||
|
||||
# Step 4: Save manifest
|
||||
# Step 4: Save manifest(不保存 pending files,等待实际执行后更新)
|
||||
# 注意:不再在 --sync 时自动更新 manifest,而是由 Hermes 手动更新
|
||||
# 这样可以追踪哪些文件需要处理
|
||||
# 只保存已删除的文件变更
|
||||
save_manifest(updated_manifest)
|
||||
log(f"\nmanifest.json updated ({len(updated_manifest['files'])} entries)", "success")
|
||||
|
||||
# 输出 pending files 列表,供 Hermes 手动执行
|
||||
if json_mode:
|
||||
print(json.dumps({
|
||||
"event": "sync_complete",
|
||||
"summary": {
|
||||
"pending": len(pending_files),
|
||||
"deleted": len(deleted_files),
|
||||
"manifest_entries": len(updated_manifest["files"]),
|
||||
},
|
||||
"pending_files": pending_files,
|
||||
"deleted_files": deleted_files,
|
||||
}))
|
||||
|
||||
# Step 5: Orphan detection
|
||||
orphan_entities, orphan_concepts = find_orphan_entity_concept(updated_manifest)
|
||||
if orphan_entities or orphan_concepts:
|
||||
print(f"\n{bold('--- Orphan Report (kept as requested) ---')}")
|
||||
if orphan_entities:
|
||||
print(f" {bold('Orphan Entities')} ({len(orphan_entities)}):")
|
||||
for e in sorted(orphan_entities):
|
||||
print(f" {dim('?')} {e}")
|
||||
if orphan_concepts:
|
||||
print(f" {bold('Orphan Concepts')} ({len(orphan_concepts)}):")
|
||||
for c in sorted(orphan_concepts):
|
||||
print(f" {dim('?')} {c}")
|
||||
log("\nOrphan pages are kept (not deleted per user request).", "info")
|
||||
else:
|
||||
log("No orphan entity/concept detected.", "success")
|
||||
if not json_mode:
|
||||
if orphan_entities or orphan_concepts:
|
||||
print(f"\n{bold('--- Orphan Report (kept as requested) ---')}")
|
||||
if orphan_entities:
|
||||
print(f" {bold('Orphan Entities')} ({len(orphan_entities)}):")
|
||||
for e in sorted(orphan_entities):
|
||||
print(f" {dim('?')} {e}")
|
||||
if orphan_concepts:
|
||||
print(f" {bold('Orphan Concepts')} ({len(orphan_concepts)}):")
|
||||
for c in sorted(orphan_concepts):
|
||||
print(f" {dim('?')} {c}")
|
||||
log("\nOrphan pages are kept (not deleted per user request).", "info")
|
||||
else:
|
||||
log("No orphan entity/concept detected.", "success")
|
||||
|
||||
# Step 6: JSON summary(已在上面输出 pending_files)
|
||||
# 旧的 summary 输出已删除,因为不再自动执行 ingest
|
||||
print(f"\n{bold('Done.')}")
|
||||
|
||||
|
||||
def run_bootstrap():
|
||||
"""从现有 wiki sources 反向生成 manifest,跳过已 ingest 的文件"""
|
||||
import re
|
||||
|
||||
print(f"\n{bold('=== Wiki Bootstrap')}\n")
|
||||
print(f" Scanning existing wiki sources to build manifest ...\n")
|
||||
|
||||
sources_dir = WIKI_DIR / "sources"
|
||||
if not sources_dir.exists():
|
||||
print(f" {red('✗')} No wiki/sources/ directory found. Nothing to bootstrap.")
|
||||
return
|
||||
|
||||
wikilink_pattern = re.compile(r"\[\[?raw/([^\]\s]+\.md)\]?]?", re.IGNORECASE)
|
||||
|
||||
manifest = {"version": 1, "updated_at": iso_now(), "files": {}}
|
||||
raw_dir = (REPO_ROOT / "raw").resolve() # 解析 symlink 到真实路径
|
||||
repo_raw_prefix = str(REPO_ROOT / "raw") # 用于 strip 前缀得到相对路径
|
||||
bootstrapped = 0
|
||||
skipped_not_found = 0
|
||||
skipped_no_source_field = 0
|
||||
|
||||
for src in sources_dir.glob("*.md"):
|
||||
content = src.read_text(encoding="utf-8")
|
||||
|
||||
# 尝试从 ## Source File 字段提取原始路径
|
||||
match = wikilink_pattern.search(content)
|
||||
if not match:
|
||||
skipped_no_source_field += 1
|
||||
continue
|
||||
|
||||
# raw_rel 格式如 "Agent/usecases/xxx.md"(不含 raw/ 前缀)
|
||||
raw_rel = match.group(1).lstrip("/")
|
||||
# 用 resolved 后的 raw_dir 拼接(follow symlink)
|
||||
raw_path = raw_dir / raw_rel
|
||||
|
||||
if not raw_path.exists():
|
||||
# 文件已删除,保留 source page 但不加入 manifest
|
||||
skipped_not_found += 1
|
||||
continue
|
||||
|
||||
stat = raw_path.stat()
|
||||
file_hash = sha256_file(raw_path)
|
||||
slug = src.stem
|
||||
|
||||
# manifest key 用 "raw/Agent/xxx.md" 格式(REPO_ROOT 相对路径)
|
||||
manifest_key = f"raw/{raw_rel}"
|
||||
manifest["files"][manifest_key] = {
|
||||
"hash": file_hash,
|
||||
"modified": datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).isoformat(),
|
||||
"slug": slug,
|
||||
"source_path": f"wiki/sources/{slug}.md",
|
||||
"ingested": True,
|
||||
"ingested_at": datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).isoformat(),
|
||||
}
|
||||
bootstrapped += 1
|
||||
|
||||
save_manifest(manifest)
|
||||
|
||||
print(f" {bold('Result:')}")
|
||||
print(f" {green('✓')} Manifest entries created: {bootstrapped}")
|
||||
print(f" {yellow('~')} Skipped (source file deleted): {skipped_not_found}")
|
||||
print(f" {dim('-')} Skipped (no source_file field): {skipped_no_source_field}")
|
||||
print(f"\n {green('✓')} manifest.json created at: {MANIFEST_FILE}")
|
||||
print(f"\n Run now: {bold('python tools/sync.py --check')} to preview new/updated files.\n")
|
||||
print(f"\n Pending files for manual TMUX ingestion: {len(pending_files)}")
|
||||
print(" Use llm-wiki-sync skill to process these files.")
|
||||
|
||||
|
||||
def run_check():
|
||||
@@ -537,26 +541,56 @@ if __name__ == "__main__":
|
||||
help="从 manifest 重建 wiki/index.md(兜底方案)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--bootstrap",
|
||||
"--reset-failed",
|
||||
action="store_true",
|
||||
help="从现有 wiki sources 反向生成 manifest(首次使用,跳过已 ingest 的文件)",
|
||||
help="重置所有 failed 的 ingest 状态(让它们重新待处理)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pending",
|
||||
action="store_true",
|
||||
help="列出所有待摄取的 pending 文件",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose", "-v",
|
||||
action="store_true",
|
||||
help="详细输出",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--json",
|
||||
action="store_true",
|
||||
help="JSON 行输出模式(供调用方解析)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.bootstrap:
|
||||
run_bootstrap()
|
||||
elif args.rebuild:
|
||||
if args.rebuild:
|
||||
run_rebuild()
|
||||
elif args.pending:
|
||||
# 列出待摄取的文件
|
||||
manifest = load_manifest()
|
||||
pending = [k for k, v in manifest["files"].items() if not v.get("ingested")]
|
||||
print(f"=== Pending Ingest Files ({len(pending)}) ===\n")
|
||||
for i, path in enumerate(pending, 1):
|
||||
print(f"{i:3}. {path}")
|
||||
elif args.reset_failed:
|
||||
# 重置失败的 ingest 状态
|
||||
manifest = load_manifest()
|
||||
reset_count = 0
|
||||
for k, v in manifest["files"].items():
|
||||
if v.get("error"):
|
||||
v["ingested"] = False
|
||||
v.pop("error", None)
|
||||
v.pop("ingested_at", None)
|
||||
reset_count += 1
|
||||
if reset_count > 0:
|
||||
save_manifest(manifest)
|
||||
print(f"Reset {reset_count} failed entries to pending.")
|
||||
else:
|
||||
print("No failed entries found.")
|
||||
elif args.check:
|
||||
run_check()
|
||||
elif args.sync:
|
||||
run_sync(dry_run=False, verbose=args.verbose)
|
||||
run_sync(dry_run=False, verbose=args.verbose, json_mode=args.json)
|
||||
else:
|
||||
parser.print_help()
|
||||
print("\n示例:")
|
||||
@@ -564,4 +598,3 @@ if __name__ == "__main__":
|
||||
print(" python tools/sync.py --sync # 执行同步")
|
||||
print(" python tools/sync.py --sync -v # 详细模式")
|
||||
print(" python tools/sync.py --rebuild # 重建 index")
|
||||
print(" python tools/sync.py --bootstrap # 首次:从 wiki sources 生成 manifest")
|
||||
|
||||
Reference in New Issue
Block a user