feat(wiki): ingest Docker image transfer guide

- Create source page with complete documentation
- Add Docker-Image, Docker-Save, Docker-Load concept pages
- Update Docker entity with new source reference
- Update log.md entry
This commit is contained in:
2026-04-21 14:19:46 +08:00
parent 8ec59e18a8
commit a31d28a386
2 changed files with 2994 additions and 360 deletions

View File

@@ -4,13 +4,14 @@ Wiki ↔ Raw 三向同步工具
功能:
- 检测 raw/ 下文件变化(新增/修改/删除)
- 调用 Claude Code agent 进行同步(不使用 litellm
- 维护 manifest.json 状态映射
- 检测 orphan entity/concept仅报告不删除
用法:
python tools/sync.py --check 预览变化(不执行)
python tools/sync.py --sync 执行同步
python tools/sync.py --sync 执行同步(更新 manifest
python tools/sync.py --pending 显示待处理文件列表
python tools/sync.py --json JSON 行输出(供程序消费)
python tools/sync.py --rebuild 从 manifest 重建 wiki/index兜底
manifest.json 格式:
@@ -29,20 +30,16 @@ manifest.json 格式:
}
"""
import os
import sys
import json
import hashlib
import subprocess
import argparse
from pathlib import Path
from datetime import datetime, timezone
REPO_ROOT = Path(__file__).parent.parent.resolve()
WIKI_DIR = REPO_ROOT / "wiki"
# manifest.json 放在 tools/ 而非 wiki/,避免 symlink 路径问题
MANIFEST_FILE = Path(__file__).parent / "manifest.json"
SCHEMA_FILE = REPO_ROOT / "CLAUDE.md"
# ─── 工具函数 ───────────────────────────────────────────────
@@ -127,107 +124,8 @@ def build_slug_from_path(rel_path: str) -> str:
return name or "untitled"
def call_ingest(abs_path: str, slug: str, json_mode: bool = False) -> dict:
"""调用 Claude Code agent 执行 /wiki-ingest返回结果
使用 TMUX 交互模式调用 Claude Code Agent
返回 special status 让 Hermes 可以手动处理 TMUX 会话
"""
return {
"status": "needs_manual_tmux",
"abs_path": abs_path,
"slug": slug,
"message": "需要通过 TMUX 手动执行 /wiki-ingest请使用 llm-wiki-sync 技能流程",
}
def parse_slug_from_output(output: str) -> str | None:
"""从 TMUX 输出中解析 SLUG: xxx 行"""
import re
match = re.search(r"SLUG:\s*([a-zA-Z0-9_-]+)", output)
return match.group(1) if match else None
def update_manifest_with_slug(rel_path: str, actual_slug: str) -> bool:
"""更新 manifest 中的 slug 和 source_path"""
import json
from datetime import datetime, timezone
manifest_file = Path(__file__).parent / "manifest.json"
if not manifest_file.exists():
return False
try:
manifest = json.loads(manifest_file.read_text(encoding="utf-8"))
if rel_path in manifest["files"]:
manifest["files"][rel_path]["slug"] = actual_slug
manifest["files"][rel_path]["source_path"] = f"wiki/sources/{actual_slug}.md"
manifest["files"][rel_path]["ingested"] = True
manifest["files"][rel_path]["ingested_at"] = datetime.now(timezone.utc).isoformat()
manifest["updated_at"] = datetime.now(timezone.utc).isoformat()
manifest_file.write_text(json.dumps(manifest, ensure_ascii=False, indent=2), encoding="utf-8")
return True
except Exception as e:
print(f"Error updating manifest: {e}")
return False
def start_tmux_session() -> bool:
"""启动 TMUX session 用于 wiki ingest"""
TMUX_SESSION = "wiki-ingest"
# 检查 TMUX session 是否存在
check = subprocess.run(
["tmux", "has-session", "-t", TMUX_SESSION],
capture_output=True,
)
if check.returncode == 0:
print(f" TMUX session '{TMUX_SESSION}' already exists")
return True
# 创建新 session
subprocess.run(
["tmux", "new-session", "-d", "-s", TMUX_SESSION,
f"cd {REPO_ROOT} && claude --permission-mode bypassPermissions"],
check=True,
)
print(f" Created TMUX session '{TMUX_SESSION}'")
return True
def send_to_tmux(command: str) -> bool:
"""发送命令到 TMUX session"""
TMUX_SESSION = "wiki-ingest"
subprocess.run(
["tmux", "send-keys", "-t", TMUX_SESSION, f"{command}\n"],
check=True,
)
return True
def wait_for_completion(seconds: int = 120) -> bool:
"""等待 Claude Code 完成处理"""
import time
print(f" Waiting {seconds}s for Claude Code to complete...")
time.sleep(seconds)
return True
def cleanup_tmux() -> bool:
"""清理 TMUX session"""
TMUX_SESSION = "wiki-ingest"
subprocess.run(
["tmux", "kill-session", "-t", TMUX_SESSION],
capture_output=True,
)
print(f" Killed TMUX session '{TMUX_SESSION}'")
return True
def find_orphan_entity_concept(manifest: dict) -> tuple[list, list]:
"""检测未被任何 source page 引用的 entity 和 concept"""
# 从所有 source 内容中提取 [[wikilinks]]
import re
wikilink_pattern = re.compile(r"\[\[([^\]]+)\]\]")
@@ -245,11 +143,9 @@ def find_orphan_entity_concept(manifest: dict) -> tuple[list, list]:
elif name.startswith("concepts/"):
referenced_concepts.add(Path(name).stem)
elif "/" not in name:
# 裸 wikilink可能是 entity 或 concept
referenced_entities.add(name)
referenced_concepts.add(name)
# 检查 entity 目录
orphan_entities = []
entities_dir = WIKI_DIR / "entities"
if entities_dir.exists():
@@ -257,7 +153,6 @@ def find_orphan_entity_concept(manifest: dict) -> tuple[list, list]:
if f.stem not in referenced_entities:
orphan_entities.append(f.name)
# 检查 concept 目录
orphan_concepts = []
concepts_dir = WIKI_DIR / "concepts"
if concepts_dir.exists():
@@ -275,7 +170,6 @@ def check_changes(manifest: dict, raw_files: dict) -> dict:
changes = {"new": [], "updated": [], "deleted": [], "unchanged": []}
manifest_files = manifest.get("files", {})
# 遍历当前 raw 文件
for rel_path, info in raw_files.items():
if rel_path not in manifest_files:
changes["new"].append({"rel_path": rel_path, **info})
@@ -288,7 +182,6 @@ def check_changes(manifest: dict, raw_files: dict) -> dict:
else:
changes["unchanged"].append(rel_path)
# 遍历 manifest找已删除的
for rel_path in manifest_files:
abs_path = REPO_ROOT / rel_path
if not abs_path.exists():
@@ -308,15 +201,12 @@ def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = Fal
print(f" Wiki: {WIKI_DIR}\n")
print(f" Mode: {'DRY-RUN (preview only)' if dry_run else 'LIVE SYNC'}")
# Step 1: load manifest
manifest = load_manifest()
log("manifest.json loaded", "info")
# Step 2: scan raw/
raw_files = scan_raw()
log(f"raw/ scan: {len(raw_files)} .md files found", "info")
# Step 3: check changes
changes = check_changes(manifest, raw_files)
total_changes = len(changes["new"]) + len(changes["updated"]) + len(changes["deleted"])
@@ -324,7 +214,6 @@ def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = Fal
log("No changes detected — wiki is up to date.", "success")
return
# ─── Report ───
if not json_mode:
print(f"\n{bold('--- Changes ---')}")
print(f" {green('+')} New: {len(changes['new'])}")
@@ -336,12 +225,9 @@ def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = Fal
if not json_mode:
print(f"\n {bold('New Files:')}")
for f in changes["new"]:
slug = build_slug_from_path(f["rel_path"])
if json_mode:
print(json.dumps({
"event": "new_detected",
"rel_path": f["rel_path"],
"slug": build_slug_from_path(f["rel_path"]),
}))
print(json.dumps({"event": "new_detected", "rel_path": f["rel_path"], "slug": slug}))
else:
log(f"{green('[+')} {f['rel_path']}", "normal")
@@ -349,12 +235,9 @@ def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = Fal
if not json_mode:
print(f"\n {bold('Updated Files:')}")
for f in changes["updated"]:
slug = manifest["files"].get(f["rel_path"], {}).get("slug") or build_slug_from_path(f["rel_path"])
if json_mode:
print(json.dumps({
"event": "updated_detected",
"rel_path": f["rel_path"],
"slug": manifest["files"].get(f["rel_path"], {}).get("slug") or build_slug_from_path(f["rel_path"]),
}))
print(json.dumps({"event": "updated_detected", "rel_path": f["rel_path"], "slug": slug}))
else:
log(f"{yellow('[~]')} {f['rel_path']} (hash changed)", "normal")
@@ -363,10 +246,7 @@ def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = Fal
print(f"\n {bold('Deleted Files:')}")
for f in changes["deleted"]:
if json_mode:
print(json.dumps({
"event": "deleted_detected",
"rel_path": f["rel_path"],
}))
print(json.dumps({"event": "deleted_detected", "rel_path": f["rel_path"]}))
else:
log(f"{red('[-]')} {f['rel_path']}", "normal")
@@ -375,32 +255,24 @@ def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = Fal
return
# ─── Apply Sync ───
# 注意call_ingest 现在返回 needs_manual_tmux提示 Hermes 通过 TMUX 手动执行
# 这里只做 manifest 记录,不实际调用 Claude Code
if not json_mode:
print(f"\n{bold('--- Applying Sync ---')}")
print(" Note: Ingest execution requires manual TMUX workflow")
print(" Use llm-wiki-sync skill for actual ingestion")
print()
updated_manifest = manifest.copy()
updated_manifest["files"] = manifest.get("files", {}).copy()
# 标记新增和更新的文件为待处理(由 Hermes 手动执行)
pending_files = []
# ① 新增 → 标记待处理
# ① 新增 → 加入 manifest
for f in changes["new"]:
rel_path = f["rel_path"]
abs_path = f["abs_path"]
slug = build_slug_from_path(rel_path)
if json_mode:
print(json.dumps({"event": "pending", "rel_path": rel_path, "slug": slug, "action": "new"}))
pending_files.append({"rel_path": rel_path, "abs_path": abs_path, "slug": slug, "action": "new"})
# 新文件加入 manifest标记为待摄入
pending_files.append({"rel_path": rel_path, "abs_path": f["abs_path"], "slug": slug, "action": "new"})
updated_manifest["files"][rel_path] = {
"hash": f["hash"],
"modified": f.get("modified"),
@@ -409,39 +281,44 @@ def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = Fal
"ingested": False,
"ingested_at": None,
}
# ② 修改 → 标记待处理
# ② 修改 → 更新 manifest
for f in changes["updated"]:
rel_path = f["rel_path"]
abs_path = f["abs_path"]
old_slug = manifest["files"].get(rel_path, {}).get("slug") or build_slug_from_path(rel_path)
if json_mode:
print(json.dumps({"event": "pending", "rel_path": rel_path, "slug": old_slug, "action": "updated"}))
pending_files.append({"rel_path": rel_path, "abs_path": abs_path, "slug": old_slug, "action": "updated"})
old_entry = manifest["files"].get(rel_path, {})
slug = old_entry.get("slug") or build_slug_from_path(rel_path)
# ③ 删除 → 保留 wiki 内容,仅从 manifest 移除
if json_mode:
print(json.dumps({"event": "pending", "rel_path": rel_path, "slug": slug, "action": "updated"}))
pending_files.append({"rel_path": rel_path, "abs_path": f["abs_path"], "slug": slug, "action": "updated"})
updated_manifest["files"][rel_path] = {
**old_entry,
"hash": f["hash"],
"modified": f.get("modified"),
"ingested": False,
"ingested_at": None,
}
# ③ 删除 → 保留 wiki 内容,从 manifest 移除
deleted_files = []
for f in changes["deleted"]:
rel_path = f["rel_path"]
source_path = f.get("source_path")
log(f"Deleted: {rel_path}", "warn")
if source_path:
sp = WIKI_DIR / source_path
log(f" Wiki source kept: {sp}", "warn")
if not json_mode:
log(f"Deleted: {rel_path}", "warn")
if source_path:
sp = WIKI_DIR / source_path
log(f" Wiki source kept: {sp}", "warn")
if rel_path in updated_manifest["files"]:
del updated_manifest["files"][rel_path]
deleted_files.append(rel_path)
# Step 4: Save manifest不保存 pending files等待实际执行后更新
# 注意:不再在 --sync 时自动更新 manifest而是由 Hermes 手动更新
# 这样可以追踪哪些文件需要处理
# 只保存已删除的文件变更
# 保存 manifest
save_manifest(updated_manifest)
log(f"\nmanifest.json updated ({len(updated_manifest['files'])} entries)", "success")
# 输出 pending files 列表,供 Hermes 手动执行
if json_mode:
print(json.dumps({
"event": "sync_complete",
@@ -454,7 +331,7 @@ def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = Fal
"deleted_files": deleted_files,
}))
# Step 5: Orphan detection
# Orphan detection
orphan_entities, orphan_concepts = find_orphan_entity_concept(updated_manifest)
if not json_mode:
if orphan_entities or orphan_concepts:
@@ -471,44 +348,43 @@ def run_sync(dry_run: bool = False, verbose: bool = False, json_mode: bool = Fal
else:
log("No orphan entity/concept detected.", "success")
# Step 6: JSON summary已在上面输出 pending_files
# 旧的 summary 输出已删除,因为不再自动执行 ingest
print(f"\n{bold('Done.')}")
print(f"\n Pending files for manual TMUX ingestion: {len(pending_files)}")
print(" Use llm-wiki-sync skill to process these files.")
print(f"\n Pending files for ingestion: {len(pending_files)}")
def run_check():
"""只预览变化,不执行"""
"""只预览变化,不执行(输出为标准 Markdown"""
manifest = load_manifest()
raw_files = scan_raw()
changes = check_changes(manifest, raw_files)
total = len(changes["new"]) + len(changes["updated"]) + len(changes["deleted"])
print(f"\n{bold('=== Wiki Sync Check')} (preview mode)\n")
print(f" Raw files: {len(raw_files)}")
print(f" Manifest entries: {len(manifest.get('files', {}))}")
print(f" {green('+')} New: {len(changes['new'])}")
print(f" {yellow('~')} Updated: {len(changes['updated'])}")
print(f" {red('-')} Deleted: {len(changes['deleted'])}")
# Markdown header and summary
print("# Wiki Sync Check\n")
print(f"- Raw files: {len(raw_files)}")
print(f"- Manifest entries: {len(manifest.get('files', {}))}")
print(f"- New: {len(changes['new'])}")
print(f"- Updated: {len(changes['updated'])}")
print(f"- Deleted: {len(changes['deleted'])}\n")
if total > 0:
if changes["new"]:
print(f"\n {bold('New Files:')}")
print("## New Files")
for f in changes["new"]:
print(f" {green('[+]')} {f['rel_path']}")
print(f"- {f['rel_path']}")
print()
if changes["updated"]:
print(f"\n {bold('Updated Files:')}")
print("## Updated Files")
for f in changes["updated"]:
print(f" {yellow('[~]')} {f['rel_path']} (was {f['old_hash']}, now {f['hash']})")
print(f"- {f['rel_path']} (was {f['old_hash']}, now {f['hash']})")
print()
if changes["deleted"]:
print(f"\n {bold('Deleted Files:')}")
print("## Deleted Files")
for f in changes["deleted"]:
print(f" {red('[-]')} {f['rel_path']}")
print(f"- {f['rel_path']}")
print()
else:
print(f"\n {green('No changes — wiki is in sync.')}")
print()
print("No changes — wiki is in sync.\n")
def run_rebuild():
@@ -526,7 +402,6 @@ def run_rebuild():
]
files = manifest.get("files", {})
# 按 modified 时间倒序
sorted_files = sorted(files.items(), key=lambda x: x[1].get("modified", ""), reverse=True)
for rel_path, info in sorted_files:
@@ -544,7 +419,6 @@ def run_rebuild():
index_file.write_text("".join(index_lines), encoding="utf-8")
print(f" {green('')} index.md rebuilt with {len(sorted_files)} sources")
# Orphan report
orphan_entities, orphan_concepts = find_orphan_entity_concept(manifest)
if orphan_entities:
print(f" {dim('?')} Orphan entities: {len(orphan_entities)}")
@@ -557,8 +431,6 @@ def run_rebuild():
# ─── CLI 入口 ───────────────────────────────────────────────
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
description="Wiki ↔ Raw 三向同步工具",
formatter_class=argparse.RawDescriptionHelpFormatter,
@@ -598,20 +470,86 @@ if __name__ == "__main__":
action="store_true",
help="JSON 行输出模式(供调用方解析)",
)
parser.add_argument(
"--limit",
type=int,
default=None,
help="与 --pending --json 配合使用限制返回的条目数1 返回单条,>1 返回多条)。默认不限制(返回全部)。",
)
args = parser.parse_args()
if args.rebuild:
run_rebuild()
elif args.pending:
# 列出待摄取的文件
manifest = load_manifest()
pending = [k for k, v in manifest["files"].items() if not v.get("ingested")]
print(f"=== Pending Ingest Files ({len(pending)}) ===\n")
for i, path in enumerate(pending, 1):
print(f"{i:3}. {path}")
pending = [(k, v) for k, v in manifest["files"].items() if not v.get("ingested")]
if args.json:
total = len(pending)
# 未指定 limit -> 返回全部files 列表)
if args.limit is None:
payload = {
"event": "pending_list",
"count": total,
"files": [
{
"rel_path": k,
"slug": v.get("slug", build_slug_from_path(k)),
"source_path": v.get("source_path"),
"modified": v.get("modified"),
"hash": v.get("hash"),
}
for k, v in pending
],
}
elif args.limit <= 0:
payload = {"event": "pending_list", "count": total, "files": []}
elif args.limit == 1:
first = pending[0] if pending else (None, None)
if first[0] is None:
payload = {"event": "pending_list", "count": 0, "file": None}
else:
k, v = first
payload = {
"event": "pending_list",
"count": total,
"file": {
"rel_path": k,
"slug": v.get("slug", build_slug_from_path(k)),
"source_path": v.get("source_path"),
"modified": v.get("modified"),
"hash": v.get("hash"),
},
}
else:
# 返回前 N 条 as files array
n = min(args.limit, total)
payload = {
"event": "pending_list",
"count": total,
"files": [
{
"rel_path": k,
"slug": v.get("slug", build_slug_from_path(k)),
"source_path": v.get("source_path"),
"modified": v.get("modified"),
"hash": v.get("hash"),
}
for k, v in pending[:n]
],
}
print(json.dumps(payload))
else:
# 控制台输出也支持 --limit
total = len(pending)
n = total if args.limit is None else max(0, args.limit)
print(f"=== Pending Ingest Files ({total}) ===\n")
if n == 0:
print(" (no items to show)")
else:
for i, (path, info) in enumerate(pending[:n], 1):
print(f"{i:3}. {path}")
elif args.reset_failed:
# 重置失败的 ingest 状态
manifest = load_manifest()
reset_count = 0
for k, v in manifest["files"].items():
@@ -634,5 +572,5 @@ if __name__ == "__main__":
print("\n示例:")
print(" python tools/sync.py --check # 预览变化")
print(" python tools/sync.py --sync # 执行同步")
print(" python tools/sync.py --sync -v # 详细模式")
print(" python tools/sync.py --sync -v # 详细模式")
print(" python tools/sync.py --rebuild # 重建 index")