feat(wiki): ingest Docker image transfer guide
- Create source page with complete documentation - Add Docker-Image, Docker-Save, Docker-Load concept pages - Update Docker entity with new source reference - Update log.md entry
This commit is contained in:
3028
tools/manifest.json
3028
tools/manifest.json
File diff suppressed because it is too large
Load Diff
310
tools/sync.py
310
tools/sync.py
@@ -4,13 +4,14 @@ Wiki ↔ Raw 三向同步工具
|
|||||||
|
|
||||||
功能:
|
功能:
|
||||||
- 检测 raw/ 下文件变化(新增/修改/删除)
|
- 检测 raw/ 下文件变化(新增/修改/删除)
|
||||||
- 调用 Claude Code agent 进行同步(不使用 litellm)
|
|
||||||
- 维护 manifest.json 状态映射
|
- 维护 manifest.json 状态映射
|
||||||
- 检测 orphan entity/concept(仅报告,不删除)
|
- 检测 orphan entity/concept(仅报告,不删除)
|
||||||
|
|
||||||
用法:
|
用法:
|
||||||
python tools/sync.py --check 预览变化(不执行)
|
python tools/sync.py --check 预览变化(不执行)
|
||||||
python tools/sync.py --sync 执行同步
|
python tools/sync.py --sync 执行同步(更新 manifest)
|
||||||
|
python tools/sync.py --pending 显示待处理文件列表
|
||||||
|
python tools/sync.py --json JSON 行输出(供程序消费)
|
||||||
python tools/sync.py --rebuild 从 manifest 重建 wiki/index(兜底)
|
python tools/sync.py --rebuild 从 manifest 重建 wiki/index(兜底)
|
||||||
|
|
||||||
manifest.json 格式:
|
manifest.json 格式:
|
||||||
@@ -29,20 +30,16 @@ manifest.json 格式:
|
|||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import json
|
import json
|
||||||
import hashlib
|
import hashlib
|
||||||
import subprocess
|
import argparse
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
|
||||||
REPO_ROOT = Path(__file__).parent.parent.resolve()
|
REPO_ROOT = Path(__file__).parent.parent.resolve()
|
||||||
WIKI_DIR = REPO_ROOT / "wiki"
|
WIKI_DIR = REPO_ROOT / "wiki"
|
||||||
# manifest.json 放在 tools/ 而非 wiki/,避免 symlink 路径问题
|
|
||||||
MANIFEST_FILE = Path(__file__).parent / "manifest.json"
|
MANIFEST_FILE = Path(__file__).parent / "manifest.json"
|
||||||
SCHEMA_FILE = REPO_ROOT / "CLAUDE.md"
|
|
||||||
|
|
||||||
|
|
||||||
# ─── 工具函数 ───────────────────────────────────────────────
|
# ─── 工具函数 ───────────────────────────────────────────────
|
||||||
@@ -127,107 +124,8 @@ def build_slug_from_path(rel_path: str) -> str:
|
|||||||
return name or "untitled"
|
return name or "untitled"
|
||||||
|
|
||||||
|
|
||||||
def call_ingest(abs_path: str, slug: str, json_mode: bool = False) -> dict:
|
|
||||||
"""调用 Claude Code agent 执行 /wiki-ingest,返回结果
|
|
||||||
|
|
||||||
使用 TMUX 交互模式调用 Claude Code Agent
|
|
||||||
返回 special status 让 Hermes 可以手动处理 TMUX 会话
|
|
||||||
"""
|
|
||||||
return {
|
|
||||||
"status": "needs_manual_tmux",
|
|
||||||
"abs_path": abs_path,
|
|
||||||
"slug": slug,
|
|
||||||
"message": "需要通过 TMUX 手动执行 /wiki-ingest,请使用 llm-wiki-sync 技能流程",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def parse_slug_from_output(output: str) -> str | None:
|
|
||||||
"""从 TMUX 输出中解析 SLUG: xxx 行"""
|
|
||||||
import re
|
|
||||||
match = re.search(r"SLUG:\s*([a-zA-Z0-9_-]+)", output)
|
|
||||||
return match.group(1) if match else None
|
|
||||||
|
|
||||||
|
|
||||||
def update_manifest_with_slug(rel_path: str, actual_slug: str) -> bool:
|
|
||||||
"""更新 manifest 中的 slug 和 source_path"""
|
|
||||||
import json
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
|
|
||||||
manifest_file = Path(__file__).parent / "manifest.json"
|
|
||||||
if not manifest_file.exists():
|
|
||||||
return False
|
|
||||||
|
|
||||||
try:
|
|
||||||
manifest = json.loads(manifest_file.read_text(encoding="utf-8"))
|
|
||||||
if rel_path in manifest["files"]:
|
|
||||||
manifest["files"][rel_path]["slug"] = actual_slug
|
|
||||||
manifest["files"][rel_path]["source_path"] = f"wiki/sources/{actual_slug}.md"
|
|
||||||
manifest["files"][rel_path]["ingested"] = True
|
|
||||||
manifest["files"][rel_path]["ingested_at"] = datetime.now(timezone.utc).isoformat()
|
|
||||||
manifest["updated_at"] = datetime.now(timezone.utc).isoformat()
|
|
||||||
manifest_file.write_text(json.dumps(manifest, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error updating manifest: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def start_tmux_session() -> bool:
|
|
||||||
"""启动 TMUX session 用于 wiki ingest"""
|
|
||||||
TMUX_SESSION = "wiki-ingest"
|
|
||||||
|
|
||||||
# 检查 TMUX session 是否存在
|
|
||||||
check = subprocess.run(
|
|
||||||
["tmux", "has-session", "-t", TMUX_SESSION],
|
|
||||||
capture_output=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
if check.returncode == 0:
|
|
||||||
print(f" TMUX session '{TMUX_SESSION}' already exists")
|
|
||||||
return True
|
|
||||||
|
|
||||||
# 创建新 session
|
|
||||||
subprocess.run(
|
|
||||||
["tmux", "new-session", "-d", "-s", TMUX_SESSION,
|
|
||||||
f"cd {REPO_ROOT} && claude --permission-mode bypassPermissions"],
|
|
||||||
check=True,
|
|
||||||
)
|
|
||||||
print(f" Created TMUX session '{TMUX_SESSION}'")
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def send_to_tmux(command: str) -> bool:
|
|
||||||
"""发送命令到 TMUX session"""
|
|
||||||
TMUX_SESSION = "wiki-ingest"
|
|
||||||
subprocess.run(
|
|
||||||
["tmux", "send-keys", "-t", TMUX_SESSION, f"{command}\n"],
|
|
||||||
check=True,
|
|
||||||
)
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def wait_for_completion(seconds: int = 120) -> bool:
|
|
||||||
"""等待 Claude Code 完成处理"""
|
|
||||||
import time
|
|
||||||
print(f" Waiting {seconds}s for Claude Code to complete...")
|
|
||||||
time.sleep(seconds)
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def cleanup_tmux() -> bool:
|
|
||||||
"""清理 TMUX session"""
|
|
||||||
TMUX_SESSION = "wiki-ingest"
|
|
||||||
subprocess.run(
|
|
||||||
["tmux", "kill-session", "-t", TMUX_SESSION],
|
|
||||||
capture_output=True,
|
|
||||||
)
|
|
||||||
print(f" Killed TMUX session '{TMUX_SESSION}'")
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def find_orphan_entity_concept(manifest: dict) -> tuple[list, list]:
|
def find_orphan_entity_concept(manifest: dict) -> tuple[list, list]:
|
||||||
"""检测未被任何 source page 引用的 entity 和 concept"""
|
"""检测未被任何 source page 引用的 entity 和 concept"""
|
||||||
# 从所有 source 内容中提取 [[wikilinks]]
|
|
||||||
import re
|
import re
|
||||||
wikilink_pattern = re.compile(r"\[\[([^\]]+)\]\]")
|
wikilink_pattern = re.compile(r"\[\[([^\]]+)\]\]")
|
||||||
|
|
||||||
@@ -245,11 +143,9 @@ def find_orphan_entity_concept(manifest: dict) -> tuple[list, list]:
|
|||||||
elif name.startswith("concepts/"):
|
elif name.startswith("concepts/"):
|
||||||
referenced_concepts.add(Path(name).stem)
|
referenced_concepts.add(Path(name).stem)
|
||||||
elif "/" not in name:
|
elif "/" not in name:
|
||||||
# 裸 wikilink,可能是 entity 或 concept
|
|
||||||
referenced_entities.add(name)
|
referenced_entities.add(name)
|
||||||
referenced_concepts.add(name)
|
referenced_concepts.add(name)
|
||||||
|
|
||||||
# 检查 entity 目录
|
|
||||||
orphan_entities = []
|
orphan_entities = []
|
||||||
entities_dir = WIKI_DIR / "entities"
|
entities_dir = WIKI_DIR / "entities"
|
||||||
if entities_dir.exists():
|
if entities_dir.exists():
|
||||||
@@ -257,7 +153,6 @@ def find_orphan_entity_concept(manifest: dict) -> tuple[list, list]:
|
|||||||
if f.stem not in referenced_entities:
|
if f.stem not in referenced_entities:
|
||||||
orphan_entities.append(f.name)
|
orphan_entities.append(f.name)
|
||||||
|
|
||||||
# 检查 concept 目录
|
|
||||||
orphan_concepts = []
|
orphan_concepts = []
|
||||||
concepts_dir = WIKI_DIR / "concepts"
|
concepts_dir = WIKI_DIR / "concepts"
|
||||||
if concepts_dir.exists():
|
if concepts_dir.exists():
|
||||||
@@ -275,7 +170,6 @@ def check_changes(manifest: dict, raw_files: dict) -> dict:
|
|||||||
changes = {"new": [], "updated": [], "deleted": [], "unchanged": []}
|
changes = {"new": [], "updated": [], "deleted": [], "unchanged": []}
|
||||||
manifest_files = manifest.get("files", {})
|
manifest_files = manifest.get("files", {})
|
||||||
|
|
||||||
# 遍历当前 raw 文件
|
|
||||||
for rel_path, info in raw_files.items():
|
for rel_path, info in raw_files.items():
|
||||||
if rel_path not in manifest_files:
|
if rel_path not in manifest_files:
|
||||||
changes["new"].append({"rel_path": rel_path, **info})
|
changes["new"].append({"rel_path": rel_path, **info})
|
||||||
@@ -288,7 +182,6 @@ def check_changes(manifest: dict, raw_files: dict) -> dict:
|
|||||||
else:
|
else:
|
||||||
changes["unchanged"].append(rel_path)
|
changes["unchanged"].append(rel_path)
|
||||||
|
|
||||||
# 遍历 manifest,找已删除的
|
|
||||||
for rel_path in manifest_files:
|
for rel_path in manifest_files:
|
||||||
abs_path = REPO_ROOT / rel_path
|
abs_path = REPO_ROOT / rel_path
|
||||||
if not abs_path.exists():
|
if not abs_path.exists():
|
||||||
@@ -308,15 +201,12 @@ def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = Fal
|
|||||||
print(f" Wiki: {WIKI_DIR}\n")
|
print(f" Wiki: {WIKI_DIR}\n")
|
||||||
print(f" Mode: {'DRY-RUN (preview only)' if dry_run else 'LIVE SYNC'}")
|
print(f" Mode: {'DRY-RUN (preview only)' if dry_run else 'LIVE SYNC'}")
|
||||||
|
|
||||||
# Step 1: load manifest
|
|
||||||
manifest = load_manifest()
|
manifest = load_manifest()
|
||||||
log("manifest.json loaded", "info")
|
log("manifest.json loaded", "info")
|
||||||
|
|
||||||
# Step 2: scan raw/
|
|
||||||
raw_files = scan_raw()
|
raw_files = scan_raw()
|
||||||
log(f"raw/ scan: {len(raw_files)} .md files found", "info")
|
log(f"raw/ scan: {len(raw_files)} .md files found", "info")
|
||||||
|
|
||||||
# Step 3: check changes
|
|
||||||
changes = check_changes(manifest, raw_files)
|
changes = check_changes(manifest, raw_files)
|
||||||
total_changes = len(changes["new"]) + len(changes["updated"]) + len(changes["deleted"])
|
total_changes = len(changes["new"]) + len(changes["updated"]) + len(changes["deleted"])
|
||||||
|
|
||||||
@@ -324,7 +214,6 @@ def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = Fal
|
|||||||
log("No changes detected — wiki is up to date.", "success")
|
log("No changes detected — wiki is up to date.", "success")
|
||||||
return
|
return
|
||||||
|
|
||||||
# ─── Report ───
|
|
||||||
if not json_mode:
|
if not json_mode:
|
||||||
print(f"\n{bold('--- Changes ---')}")
|
print(f"\n{bold('--- Changes ---')}")
|
||||||
print(f" {green('+')} New: {len(changes['new'])}")
|
print(f" {green('+')} New: {len(changes['new'])}")
|
||||||
@@ -336,12 +225,9 @@ def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = Fal
|
|||||||
if not json_mode:
|
if not json_mode:
|
||||||
print(f"\n {bold('New Files:')}")
|
print(f"\n {bold('New Files:')}")
|
||||||
for f in changes["new"]:
|
for f in changes["new"]:
|
||||||
|
slug = build_slug_from_path(f["rel_path"])
|
||||||
if json_mode:
|
if json_mode:
|
||||||
print(json.dumps({
|
print(json.dumps({"event": "new_detected", "rel_path": f["rel_path"], "slug": slug}))
|
||||||
"event": "new_detected",
|
|
||||||
"rel_path": f["rel_path"],
|
|
||||||
"slug": build_slug_from_path(f["rel_path"]),
|
|
||||||
}))
|
|
||||||
else:
|
else:
|
||||||
log(f"{green('[+')} {f['rel_path']}", "normal")
|
log(f"{green('[+')} {f['rel_path']}", "normal")
|
||||||
|
|
||||||
@@ -349,12 +235,9 @@ def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = Fal
|
|||||||
if not json_mode:
|
if not json_mode:
|
||||||
print(f"\n {bold('Updated Files:')}")
|
print(f"\n {bold('Updated Files:')}")
|
||||||
for f in changes["updated"]:
|
for f in changes["updated"]:
|
||||||
|
slug = manifest["files"].get(f["rel_path"], {}).get("slug") or build_slug_from_path(f["rel_path"])
|
||||||
if json_mode:
|
if json_mode:
|
||||||
print(json.dumps({
|
print(json.dumps({"event": "updated_detected", "rel_path": f["rel_path"], "slug": slug}))
|
||||||
"event": "updated_detected",
|
|
||||||
"rel_path": f["rel_path"],
|
|
||||||
"slug": manifest["files"].get(f["rel_path"], {}).get("slug") or build_slug_from_path(f["rel_path"]),
|
|
||||||
}))
|
|
||||||
else:
|
else:
|
||||||
log(f"{yellow('[~]')} {f['rel_path']} (hash changed)", "normal")
|
log(f"{yellow('[~]')} {f['rel_path']} (hash changed)", "normal")
|
||||||
|
|
||||||
@@ -363,10 +246,7 @@ def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = Fal
|
|||||||
print(f"\n {bold('Deleted Files:')}")
|
print(f"\n {bold('Deleted Files:')}")
|
||||||
for f in changes["deleted"]:
|
for f in changes["deleted"]:
|
||||||
if json_mode:
|
if json_mode:
|
||||||
print(json.dumps({
|
print(json.dumps({"event": "deleted_detected", "rel_path": f["rel_path"]}))
|
||||||
"event": "deleted_detected",
|
|
||||||
"rel_path": f["rel_path"],
|
|
||||||
}))
|
|
||||||
else:
|
else:
|
||||||
log(f"{red('[-]')} {f['rel_path']}", "normal")
|
log(f"{red('[-]')} {f['rel_path']}", "normal")
|
||||||
|
|
||||||
@@ -375,32 +255,24 @@ def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = Fal
|
|||||||
return
|
return
|
||||||
|
|
||||||
# ─── Apply Sync ───
|
# ─── Apply Sync ───
|
||||||
# 注意:call_ingest 现在返回 needs_manual_tmux,提示 Hermes 通过 TMUX 手动执行
|
|
||||||
# 这里只做 manifest 记录,不实际调用 Claude Code
|
|
||||||
if not json_mode:
|
if not json_mode:
|
||||||
print(f"\n{bold('--- Applying Sync ---')}")
|
print(f"\n{bold('--- Applying Sync ---')}")
|
||||||
print(" Note: Ingest execution requires manual TMUX workflow")
|
|
||||||
print(" Use llm-wiki-sync skill for actual ingestion")
|
|
||||||
print()
|
|
||||||
|
|
||||||
updated_manifest = manifest.copy()
|
updated_manifest = manifest.copy()
|
||||||
updated_manifest["files"] = manifest.get("files", {}).copy()
|
updated_manifest["files"] = manifest.get("files", {}).copy()
|
||||||
|
|
||||||
# 标记新增和更新的文件为待处理(由 Hermes 手动执行)
|
|
||||||
pending_files = []
|
pending_files = []
|
||||||
|
|
||||||
# ① 新增 → 标记待处理
|
# ① 新增 → 加入 manifest
|
||||||
for f in changes["new"]:
|
for f in changes["new"]:
|
||||||
rel_path = f["rel_path"]
|
rel_path = f["rel_path"]
|
||||||
abs_path = f["abs_path"]
|
|
||||||
slug = build_slug_from_path(rel_path)
|
slug = build_slug_from_path(rel_path)
|
||||||
|
|
||||||
if json_mode:
|
if json_mode:
|
||||||
print(json.dumps({"event": "pending", "rel_path": rel_path, "slug": slug, "action": "new"}))
|
print(json.dumps({"event": "pending", "rel_path": rel_path, "slug": slug, "action": "new"}))
|
||||||
|
|
||||||
pending_files.append({"rel_path": rel_path, "abs_path": abs_path, "slug": slug, "action": "new"})
|
pending_files.append({"rel_path": rel_path, "abs_path": f["abs_path"], "slug": slug, "action": "new"})
|
||||||
|
|
||||||
# 新文件加入 manifest,标记为待摄入
|
|
||||||
updated_manifest["files"][rel_path] = {
|
updated_manifest["files"][rel_path] = {
|
||||||
"hash": f["hash"],
|
"hash": f["hash"],
|
||||||
"modified": f.get("modified"),
|
"modified": f.get("modified"),
|
||||||
@@ -410,38 +282,43 @@ def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = Fal
|
|||||||
"ingested_at": None,
|
"ingested_at": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
# ② 修改 → 标记待处理
|
# ② 修改 → 更新 manifest
|
||||||
for f in changes["updated"]:
|
for f in changes["updated"]:
|
||||||
rel_path = f["rel_path"]
|
rel_path = f["rel_path"]
|
||||||
abs_path = f["abs_path"]
|
old_entry = manifest["files"].get(rel_path, {})
|
||||||
old_slug = manifest["files"].get(rel_path, {}).get("slug") or build_slug_from_path(rel_path)
|
slug = old_entry.get("slug") or build_slug_from_path(rel_path)
|
||||||
|
|
||||||
if json_mode:
|
if json_mode:
|
||||||
print(json.dumps({"event": "pending", "rel_path": rel_path, "slug": old_slug, "action": "updated"}))
|
print(json.dumps({"event": "pending", "rel_path": rel_path, "slug": slug, "action": "updated"}))
|
||||||
|
|
||||||
pending_files.append({"rel_path": rel_path, "abs_path": abs_path, "slug": old_slug, "action": "updated"})
|
pending_files.append({"rel_path": rel_path, "abs_path": f["abs_path"], "slug": slug, "action": "updated"})
|
||||||
|
|
||||||
# ③ 删除 → 保留 wiki 内容,仅从 manifest 移除
|
updated_manifest["files"][rel_path] = {
|
||||||
|
**old_entry,
|
||||||
|
"hash": f["hash"],
|
||||||
|
"modified": f.get("modified"),
|
||||||
|
"ingested": False,
|
||||||
|
"ingested_at": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ③ 删除 → 保留 wiki 内容,从 manifest 移除
|
||||||
deleted_files = []
|
deleted_files = []
|
||||||
for f in changes["deleted"]:
|
for f in changes["deleted"]:
|
||||||
rel_path = f["rel_path"]
|
rel_path = f["rel_path"]
|
||||||
source_path = f.get("source_path")
|
source_path = f.get("source_path")
|
||||||
log(f"Deleted: {rel_path}", "warn")
|
if not json_mode:
|
||||||
if source_path:
|
log(f"Deleted: {rel_path}", "warn")
|
||||||
sp = WIKI_DIR / source_path
|
if source_path:
|
||||||
log(f" Wiki source kept: {sp}", "warn")
|
sp = WIKI_DIR / source_path
|
||||||
|
log(f" Wiki source kept: {sp}", "warn")
|
||||||
if rel_path in updated_manifest["files"]:
|
if rel_path in updated_manifest["files"]:
|
||||||
del updated_manifest["files"][rel_path]
|
del updated_manifest["files"][rel_path]
|
||||||
deleted_files.append(rel_path)
|
deleted_files.append(rel_path)
|
||||||
|
|
||||||
# Step 4: Save manifest(不保存 pending files,等待实际执行后更新)
|
# 保存 manifest
|
||||||
# 注意:不再在 --sync 时自动更新 manifest,而是由 Hermes 手动更新
|
|
||||||
# 这样可以追踪哪些文件需要处理
|
|
||||||
# 只保存已删除的文件变更
|
|
||||||
save_manifest(updated_manifest)
|
save_manifest(updated_manifest)
|
||||||
log(f"\nmanifest.json updated ({len(updated_manifest['files'])} entries)", "success")
|
log(f"\nmanifest.json updated ({len(updated_manifest['files'])} entries)", "success")
|
||||||
|
|
||||||
# 输出 pending files 列表,供 Hermes 手动执行
|
|
||||||
if json_mode:
|
if json_mode:
|
||||||
print(json.dumps({
|
print(json.dumps({
|
||||||
"event": "sync_complete",
|
"event": "sync_complete",
|
||||||
@@ -454,7 +331,7 @@ def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = Fal
|
|||||||
"deleted_files": deleted_files,
|
"deleted_files": deleted_files,
|
||||||
}))
|
}))
|
||||||
|
|
||||||
# Step 5: Orphan detection
|
# Orphan detection
|
||||||
orphan_entities, orphan_concepts = find_orphan_entity_concept(updated_manifest)
|
orphan_entities, orphan_concepts = find_orphan_entity_concept(updated_manifest)
|
||||||
if not json_mode:
|
if not json_mode:
|
||||||
if orphan_entities or orphan_concepts:
|
if orphan_entities or orphan_concepts:
|
||||||
@@ -471,44 +348,43 @@ def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = Fal
|
|||||||
else:
|
else:
|
||||||
log("No orphan entity/concept detected.", "success")
|
log("No orphan entity/concept detected.", "success")
|
||||||
|
|
||||||
# Step 6: JSON summary(已在上面输出 pending_files)
|
|
||||||
# 旧的 summary 输出已删除,因为不再自动执行 ingest
|
|
||||||
print(f"\n{bold('Done.')}")
|
print(f"\n{bold('Done.')}")
|
||||||
print(f"\n Pending files for manual TMUX ingestion: {len(pending_files)}")
|
print(f"\n Pending files for ingestion: {len(pending_files)}")
|
||||||
print(" Use llm-wiki-sync skill to process these files.")
|
|
||||||
|
|
||||||
|
|
||||||
def run_check():
|
def run_check():
|
||||||
"""只预览变化,不执行"""
|
"""只预览变化,不执行(输出为标准 Markdown)"""
|
||||||
manifest = load_manifest()
|
manifest = load_manifest()
|
||||||
raw_files = scan_raw()
|
raw_files = scan_raw()
|
||||||
changes = check_changes(manifest, raw_files)
|
changes = check_changes(manifest, raw_files)
|
||||||
total = len(changes["new"]) + len(changes["updated"]) + len(changes["deleted"])
|
total = len(changes["new"]) + len(changes["updated"]) + len(changes["deleted"])
|
||||||
|
|
||||||
print(f"\n{bold('=== Wiki Sync Check')} (preview mode)\n")
|
# Markdown header and summary
|
||||||
print(f" Raw files: {len(raw_files)}")
|
print("# Wiki Sync Check\n")
|
||||||
print(f" Manifest entries: {len(manifest.get('files', {}))}")
|
print(f"- Raw files: {len(raw_files)}")
|
||||||
print(f" {green('+')} New: {len(changes['new'])}")
|
print(f"- Manifest entries: {len(manifest.get('files', {}))}")
|
||||||
print(f" {yellow('~')} Updated: {len(changes['updated'])}")
|
print(f"- New: {len(changes['new'])}")
|
||||||
print(f" {red('-')} Deleted: {len(changes['deleted'])}")
|
print(f"- Updated: {len(changes['updated'])}")
|
||||||
|
print(f"- Deleted: {len(changes['deleted'])}\n")
|
||||||
|
|
||||||
if total > 0:
|
if total > 0:
|
||||||
if changes["new"]:
|
if changes["new"]:
|
||||||
print(f"\n {bold('New Files:')}")
|
print("## New Files")
|
||||||
for f in changes["new"]:
|
for f in changes["new"]:
|
||||||
print(f" {green('[+]')} {f['rel_path']}")
|
print(f"- {f['rel_path']}")
|
||||||
|
print()
|
||||||
if changes["updated"]:
|
if changes["updated"]:
|
||||||
print(f"\n {bold('Updated Files:')}")
|
print("## Updated Files")
|
||||||
for f in changes["updated"]:
|
for f in changes["updated"]:
|
||||||
print(f" {yellow('[~]')} {f['rel_path']} (was {f['old_hash']}, now {f['hash']})")
|
print(f"- {f['rel_path']} (was {f['old_hash']}, now {f['hash']})")
|
||||||
|
print()
|
||||||
if changes["deleted"]:
|
if changes["deleted"]:
|
||||||
print(f"\n {bold('Deleted Files:')}")
|
print("## Deleted Files")
|
||||||
for f in changes["deleted"]:
|
for f in changes["deleted"]:
|
||||||
print(f" {red('[-]')} {f['rel_path']}")
|
print(f"- {f['rel_path']}")
|
||||||
|
print()
|
||||||
else:
|
else:
|
||||||
print(f"\n {green('No changes — wiki is in sync.')}")
|
print("No changes — wiki is in sync.\n")
|
||||||
|
|
||||||
print()
|
|
||||||
|
|
||||||
|
|
||||||
def run_rebuild():
|
def run_rebuild():
|
||||||
@@ -526,7 +402,6 @@ def run_rebuild():
|
|||||||
]
|
]
|
||||||
|
|
||||||
files = manifest.get("files", {})
|
files = manifest.get("files", {})
|
||||||
# 按 modified 时间倒序
|
|
||||||
sorted_files = sorted(files.items(), key=lambda x: x[1].get("modified", ""), reverse=True)
|
sorted_files = sorted(files.items(), key=lambda x: x[1].get("modified", ""), reverse=True)
|
||||||
|
|
||||||
for rel_path, info in sorted_files:
|
for rel_path, info in sorted_files:
|
||||||
@@ -544,7 +419,6 @@ def run_rebuild():
|
|||||||
index_file.write_text("".join(index_lines), encoding="utf-8")
|
index_file.write_text("".join(index_lines), encoding="utf-8")
|
||||||
print(f" {green('✓')} index.md rebuilt with {len(sorted_files)} sources")
|
print(f" {green('✓')} index.md rebuilt with {len(sorted_files)} sources")
|
||||||
|
|
||||||
# Orphan report
|
|
||||||
orphan_entities, orphan_concepts = find_orphan_entity_concept(manifest)
|
orphan_entities, orphan_concepts = find_orphan_entity_concept(manifest)
|
||||||
if orphan_entities:
|
if orphan_entities:
|
||||||
print(f" {dim('?')} Orphan entities: {len(orphan_entities)}")
|
print(f" {dim('?')} Orphan entities: {len(orphan_entities)}")
|
||||||
@@ -557,8 +431,6 @@ def run_rebuild():
|
|||||||
# ─── CLI 入口 ───────────────────────────────────────────────
|
# ─── CLI 入口 ───────────────────────────────────────────────
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import argparse
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="Wiki ↔ Raw 三向同步工具",
|
description="Wiki ↔ Raw 三向同步工具",
|
||||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
@@ -598,20 +470,86 @@ if __name__ == "__main__":
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="JSON 行输出模式(供调用方解析)",
|
help="JSON 行输出模式(供调用方解析)",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--limit",
|
||||||
|
type=int,
|
||||||
|
default=None,
|
||||||
|
help="与 --pending --json 配合使用:限制返回的条目数(1 返回单条,>1 返回多条)。默认不限制(返回全部)。",
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.rebuild:
|
if args.rebuild:
|
||||||
run_rebuild()
|
run_rebuild()
|
||||||
elif args.pending:
|
elif args.pending:
|
||||||
# 列出待摄取的文件
|
|
||||||
manifest = load_manifest()
|
manifest = load_manifest()
|
||||||
pending = [k for k, v in manifest["files"].items() if not v.get("ingested")]
|
pending = [(k, v) for k, v in manifest["files"].items() if not v.get("ingested")]
|
||||||
print(f"=== Pending Ingest Files ({len(pending)}) ===\n")
|
if args.json:
|
||||||
for i, path in enumerate(pending, 1):
|
total = len(pending)
|
||||||
print(f"{i:3}. {path}")
|
# 未指定 limit -> 返回全部(files 列表)
|
||||||
|
if args.limit is None:
|
||||||
|
payload = {
|
||||||
|
"event": "pending_list",
|
||||||
|
"count": total,
|
||||||
|
"files": [
|
||||||
|
{
|
||||||
|
"rel_path": k,
|
||||||
|
"slug": v.get("slug", build_slug_from_path(k)),
|
||||||
|
"source_path": v.get("source_path"),
|
||||||
|
"modified": v.get("modified"),
|
||||||
|
"hash": v.get("hash"),
|
||||||
|
}
|
||||||
|
for k, v in pending
|
||||||
|
],
|
||||||
|
}
|
||||||
|
elif args.limit <= 0:
|
||||||
|
payload = {"event": "pending_list", "count": total, "files": []}
|
||||||
|
elif args.limit == 1:
|
||||||
|
first = pending[0] if pending else (None, None)
|
||||||
|
if first[0] is None:
|
||||||
|
payload = {"event": "pending_list", "count": 0, "file": None}
|
||||||
|
else:
|
||||||
|
k, v = first
|
||||||
|
payload = {
|
||||||
|
"event": "pending_list",
|
||||||
|
"count": total,
|
||||||
|
"file": {
|
||||||
|
"rel_path": k,
|
||||||
|
"slug": v.get("slug", build_slug_from_path(k)),
|
||||||
|
"source_path": v.get("source_path"),
|
||||||
|
"modified": v.get("modified"),
|
||||||
|
"hash": v.get("hash"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
# 返回前 N 条 as files array
|
||||||
|
n = min(args.limit, total)
|
||||||
|
payload = {
|
||||||
|
"event": "pending_list",
|
||||||
|
"count": total,
|
||||||
|
"files": [
|
||||||
|
{
|
||||||
|
"rel_path": k,
|
||||||
|
"slug": v.get("slug", build_slug_from_path(k)),
|
||||||
|
"source_path": v.get("source_path"),
|
||||||
|
"modified": v.get("modified"),
|
||||||
|
"hash": v.get("hash"),
|
||||||
|
}
|
||||||
|
for k, v in pending[:n]
|
||||||
|
],
|
||||||
|
}
|
||||||
|
print(json.dumps(payload))
|
||||||
|
else:
|
||||||
|
# 控制台输出也支持 --limit
|
||||||
|
total = len(pending)
|
||||||
|
n = total if args.limit is None else max(0, args.limit)
|
||||||
|
print(f"=== Pending Ingest Files ({total}) ===\n")
|
||||||
|
if n == 0:
|
||||||
|
print(" (no items to show)")
|
||||||
|
else:
|
||||||
|
for i, (path, info) in enumerate(pending[:n], 1):
|
||||||
|
print(f"{i:3}. {path}")
|
||||||
elif args.reset_failed:
|
elif args.reset_failed:
|
||||||
# 重置失败的 ingest 状态
|
|
||||||
manifest = load_manifest()
|
manifest = load_manifest()
|
||||||
reset_count = 0
|
reset_count = 0
|
||||||
for k, v in manifest["files"].items():
|
for k, v in manifest["files"].items():
|
||||||
@@ -634,5 +572,5 @@ if __name__ == "__main__":
|
|||||||
print("\n示例:")
|
print("\n示例:")
|
||||||
print(" python tools/sync.py --check # 预览变化")
|
print(" python tools/sync.py --check # 预览变化")
|
||||||
print(" python tools/sync.py --sync # 执行同步")
|
print(" python tools/sync.py --sync # 执行同步")
|
||||||
print(" python tools/sync.py --sync -v # 详细模式")
|
print(" python tools/sync.py --sync -v # 详细模式")
|
||||||
print(" python tools/sync.py --rebuild # 重建 index")
|
print(" python tools/sync.py --rebuild # 重建 index")
|
||||||
|
|||||||
Reference in New Issue
Block a user