Merge pull request #15 from watsonctl/feat/batch-ingest
feat(ingest): support batch processing of multiple files or directories
This commit is contained in:
@@ -190,6 +190,40 @@ Return ONLY a valid JSON object with these fields (no markdown fences, no prose
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python tools/ingest.py <path-to-source>")
|
||||
print("Usage: python tools/ingest.py <path-to-source> [path2 ...] [dir1 ...]")
|
||||
sys.exit(1)
|
||||
ingest(sys.argv[1])
|
||||
|
||||
paths_to_process = []
|
||||
for arg in sys.argv[1:]:
|
||||
p = Path(arg)
|
||||
if p.is_file() and p.suffix == ".md":
|
||||
paths_to_process.append(p)
|
||||
elif p.is_dir():
|
||||
for f in p.rglob("*.md"):
|
||||
if f.is_file():
|
||||
paths_to_process.append(f)
|
||||
else:
|
||||
import glob
|
||||
for f in glob.glob(arg, recursive=True):
|
||||
g_p = Path(f)
|
||||
if g_p.is_file() and g_p.suffix == ".md":
|
||||
paths_to_process.append(g_p)
|
||||
|
||||
# Deduplicate while preserving order
|
||||
unique_paths = []
|
||||
seen = set()
|
||||
for p in paths_to_process:
|
||||
abs_p = p.resolve()
|
||||
if abs_p not in seen:
|
||||
seen.add(abs_p)
|
||||
unique_paths.append(p)
|
||||
|
||||
if not unique_paths:
|
||||
print("Error: no markdown files found to ingest.")
|
||||
sys.exit(1)
|
||||
|
||||
if len(unique_paths) > 1:
|
||||
print(f"Batch mode: found {len(unique_paths)} files to ingest.")
|
||||
|
||||
for p in unique_paths:
|
||||
ingest(str(p))
|
||||
|
||||
Reference in New Issue
Block a user