feat: add CronJob and CronJobRun models with bulk upsert API and admin

- CronJob: maps jobs.json (schedule, payload, delivery, state fields)
- CronJobRun: stores runs/*.jsonl per-job execution history with usage/tokens
- cron_bulk_upsert service: atomic upsert with GET_OR_CREATE for idempotency
- POST /api/cron/bulk_upsert/ endpoint
- Django Admin: CronJobAdmin with CronJobRunInline, CronJobRunAdmin
- sync_sessions.py --cron mode: SSH read jobs.json + runs/*.jsonl, incremental sync
- 0003_cronjob_cronjobrun migration
This commit is contained in:
ishenwei
2026-04-13 13:34:24 +08:00
parent 1a1985a270
commit 74458b4fab
8 changed files with 511 additions and 18 deletions

View File

@@ -6,17 +6,26 @@ Scans local agent sessions directories, parses JSONL files,
and pushes structured JSON to the Django API.
Usage:
# Session sync (existing)
python sync_sessions.py --remote-url http://macmini:8000/api/sessions/bulk_upsert/
# Cron job sync (new)
python sync_sessions.py --cron \
--remote-url http://macmini:8000/api/cron/bulk_upsert/ \
--cron-ssh macmini \
--cron-jobs-path /Users/weishen/openclaw/cron/jobs.json \
--cron-runs-path /Users/weishen/openclaw/cron/runs/
Cron:
0 2 * * * cd /path/to/scripts && python sync_sessions.py --remote-url <url>
0 3 * * * cd /path/to/scripts && python sync_sessions.py --cron --remote-url <cron-url> ...
"""
import argparse
import json
import os
import subprocess
import sys
import time
import urllib.error
import urllib.request
from pathlib import Path
@@ -28,6 +37,37 @@ from pathlib import Path
SESSIONS_DIR_NAME = "sessions"
STATE_FILE = ".sync_state"
DELETED_SUFFIX = ".deleted."
CRON_STATE_FILE = ".sync_cron_state"
# ─────────────────────────────────────────────────────────────────
# SSH Helper
# ─────────────────────────────────────────────────────────────────
def ssh_read_file(host, remote_path):
"""Read a remote file via SSH and return content as string."""
result = subprocess.run(
["ssh", host, f"cat {remote_path}"],
capture_output=True,
text=True,
timeout=60,
)
if result.returncode != 0:
raise RuntimeError(f"SSH read failed for {host}:{remote_path}: {result.stderr}")
return result.stdout
def ssh_list_files(host, remote_dir, pattern="*.jsonl"):
"""List remote files matching pattern via SSH."""
result = subprocess.run(
["ssh", host, f"ls {remote_dir}/{pattern}"],
capture_output=True,
text=True,
timeout=30,
)
if result.returncode != 0:
return []
return [f.strip() for f in result.stdout.strip().split("\n") if f.strip()]
# ─────────────────────────────────────────────────────────────────
@@ -109,7 +149,7 @@ def get_new_files(root_path):
# ─────────────────────────────────────────────────────────────────
# JSONL Parser
# JSONL Parser (Session mode)
# ─────────────────────────────────────────────────────────────────
def parse_jsonl(file_path):
@@ -118,12 +158,9 @@ def parse_jsonl(file_path):
messages = []
tool_calls = []
# State tracking for model/thinking changes
current_model_provider = ""
current_model_id = ""
current_thinking_level = ""
# Tool results lookup by tool_call_id
tool_results = {}
events = []
@@ -141,7 +178,6 @@ def parse_jsonl(file_path):
if not events:
return sessions, messages, tool_calls
# First pass: extract session metadata
session_event = None
for event in events:
event_type = event.get("type", "")
@@ -157,14 +193,12 @@ def parse_jsonl(file_path):
session_cwd = session_event.get("cwd", "")
session_version = events[-1].get("version", 0) if events else 0
# Determine start and end time from all events
timestamps = []
for event in events:
ts = event.get("timestamp", "")
if ts:
timestamps.append(ts)
# Second pass: process events
message_seq = 0
total_tokens = 0
total_cost = 0.0
@@ -183,14 +217,12 @@ def parse_jsonl(file_path):
current_thinking_level = event.get("thinkingLevel", "")
elif event_type == "message":
# Nested structure: message data is inside "message" object
message_obj = event.get("message", {})
role = message_obj.get("role", "")
msg_id = event.get("id", "")
parent_id = event.get("parentId", "")
msg_timestamp = event.get("timestamp", "")
# Extract text content (skip thinking) from nested content
content_items = message_obj.get("content", [])
text_parts = []
tc_list = []
@@ -200,7 +232,6 @@ def parse_jsonl(file_path):
text_parts.append(item.get("text", ""))
elif item.get("type") == "toolCall":
tc_list.append(item)
# Skip thinking types
content_text = "\n".join(text_parts)
@@ -258,7 +289,6 @@ def parse_jsonl(file_path):
messages.append(msg_data)
message_seq += 1
# Extract tool calls from assistant messages
tc_seq = 0
for tc in tc_list:
tool_call_data = {
@@ -269,7 +299,6 @@ def parse_jsonl(file_path):
"arguments": tc.get("arguments", {}),
"seq": tc_seq,
}
# Enrich with tool result if available
tr = tool_results.get(tool_call_data["tool_call_id"], {})
tool_call_data["result_text"] = tr.get("result_text", "")
tool_call_data["is_error"] = tr.get("is_error", False)
@@ -279,7 +308,6 @@ def parse_jsonl(file_path):
tool_call_count += 1
tc_seq += 1
# Build session record
start_time = timestamps[0] if timestamps else session_timestamp
end_time = timestamps[-1] if timestamps else session_timestamp
@@ -306,6 +334,127 @@ def parse_jsonl(file_path):
return sessions, messages, tool_calls
# ─────────────────────────────────────────────────────────────────
# Cron Sync Mode
# ─────────────────────────────────────────────────────────────────
def get_cron_state(state_file_path):
"""Read cron sync state, return {run_file: mtime}."""
p = Path(state_file_path)
if not p.exists():
return {}
try:
with open(p) as f:
return json.load(f)
except (json.JSONDecodeError, IOError):
return {}
def save_cron_state(state_file_path, state):
"""Write cron sync state."""
p = Path(state_file_path)
with open(p, "w") as f:
json.dump(state, f)
def sync_cron_jobs(args):
"""Sync cron jobs from openclaw cron data."""
ssh_host = args.cron_ssh
jobs_path = args.cron_jobs_path
runs_path = args.cron_runs_path.rstrip("/")
print(f"Fetching jobs.json from {ssh_host}:{jobs_path}...")
try:
jobs_raw = ssh_read_file(ssh_host, jobs_path)
jobs_data = json.loads(jobs_raw)
except Exception as e:
print(f"ERROR reading jobs.json: {e}")
return
jobs = jobs_data.get("jobs", [])
job_ids = {j["id"] for j in jobs}
print(f" Found {len(jobs)} jobs")
# Find runs files, filter to only those matching known job IDs
print(f"Scanning runs directory {ssh_host}:{runs_path}/...")
all_run_files = ssh_list_files(ssh_host, runs_path, "*.jsonl")
run_files = [f for f in all_run_files if Path(f).stem in job_ids]
print(f" Found {len(run_files)} run files matching known job IDs")
# Load sync state
state_file = Path.home() / ".sync_cron_state"
prev_state = get_cron_state(str(state_file))
new_runs = []
new_state = {}
for run_file in run_files:
remote_full = f"{runs_path}/{run_file}"
# Get mtime via SSH
result = subprocess.run(
["ssh", ssh_host, f"stat -c %Y {remote_full}"],
capture_output=True, text=True, timeout=10,
)
if result.returncode != 0:
continue
try:
mtime = int(result.stdout.strip())
except ValueError:
continue
old_mtime = prev_state.get(remote_full, 0)
if mtime > old_mtime:
new_runs.append(remote_full)
new_state[remote_full] = mtime
if not new_runs:
print("No new or modified run files found.")
save_cron_state(str(state_file), new_state)
return
print(f"Parsing {len(new_runs)} new/modified run file(s)...")
all_runs = []
for run_file in new_runs:
print(f" Parsing: {run_file}")
try:
raw = ssh_read_file(ssh_host, run_file)
for line in raw.strip().split("\n"):
line = line.strip()
if not line:
continue
try:
run_obj = json.loads(line)
all_runs.append(run_obj)
except json.JSONDecodeError:
continue
except Exception as e:
print(f" ERROR reading {run_file}: {e}")
continue
if not all_runs:
print("No run records parsed.")
save_cron_state(str(state_file), new_state)
return
# Save new state
save_cron_state(str(state_file), new_state)
payload = {
"source_node": os.environ.get("SOURCE_NODE", ssh_host),
"jobs": jobs,
"runs": all_runs,
}
print(f"Pushing {len(jobs)} jobs and {len(all_runs)} runs to {args.remote_url}...")
try:
result = push_to_api(args.remote_url, payload)
print(f" OK: jobs_upserted={result.get('jobs_upserted', 0)}, "
f"runs_upserted={result.get('runs_upserted', 0)}")
except Exception as e:
print(f" FAILED to push cron data: {e}")
# ─────────────────────────────────────────────────────────────────
# HTTP Client
# ─────────────────────────────────────────────────────────────────
@@ -333,13 +482,12 @@ def push_to_api(remote_url, payload):
raise
# ─────────────────────────────────────────────────────────────────
# Main
# ─────────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description="Sync OpenClaw sessions to Django API")
parser = argparse.ArgumentParser(description="Sync OpenClaw sessions or cron data to Django API")
parser.add_argument(
"--remote-url",
required=True,
@@ -350,8 +498,34 @@ def main():
default=".",
help="Root path containing agents/ directory (default: current dir)",
)
parser.add_argument(
"--cron",
action="store_true",
help="Sync cron jobs and runs instead of session files",
)
parser.add_argument(
"--cron-ssh",
default="macmini",
help="SSH host for cron data (default: macmini)",
)
parser.add_argument(
"--cron-jobs-path",
default="/Users/weishen/openclaw/cron/jobs.json",
help="Remote path to jobs.json",
)
parser.add_argument(
"--cron-runs-path",
default="/Users/weishen/openclaw/cron/runs/",
help="Remote directory containing run JSONL files",
)
args = parser.parse_args()
if args.cron:
sync_cron_jobs(args)
return
# Original session sync mode
new_files = get_new_files(args.root_path)
if not new_files:
print("No new or modified session files found.")
@@ -363,7 +537,6 @@ def main():
total_messages = 0
total_tool_calls = 0
# Group by agent_name (batch per agent)
agent_batches = {}
for agent_name, jsonl_path in new_files:
agent_batches.setdefault(agent_name, []).append(jsonl_path)

View File

@@ -2,7 +2,7 @@ from django.contrib import admin
from django.http import HttpResponse
from openclaw.export import export_daily_markdown
from openclaw.models import Session, Message, ToolCall
from openclaw.models import Session, Message, ToolCall, CronJob, CronJobRun
class MessageInline(admin.TabularInline):
@@ -73,3 +73,43 @@ class ToolCallAdmin(admin.ModelAdmin):
list_display = ("tool_name", "tool_call_id", "session", "is_error", "duration_ms")
list_filter = ("tool_name", "is_error", "exit_code")
ordering = ("-created_at",)
class CronJobRunInline(admin.TabularInline):
model = CronJobRun
extra = 0
fields = ("run_at", "status", "duration_ms", "delivery_status", "model", "tokens_total")
readonly_fields = fields
ordering = ("-run_at",)
max_num = 10
def has_add_permission(self, request, obj=None):
return False
def has_delete_permission(self, request, obj=None):
return False
@admin.register(CronJob)
class CronJobAdmin(admin.ModelAdmin):
list_display = (
"job_id", "name", "agent_id", "enabled",
"schedule_expr", "state_last_run_status", "state_last_run_at",
)
list_filter = ("enabled", "agent_id", "state_last_status", "state_last_run_status")
search_fields = ("job_id", "name")
ordering = ("-updated_at",)
inlines = [CronJobRunInline]
readonly_fields = ("job_id", "created_at", "updated_at")
@admin.register(CronJobRun)
class CronJobRunAdmin(admin.ModelAdmin):
list_display = (
"job", "run_at", "status", "duration_ms",
"delivery_status", "model", "tokens_total",
)
list_filter = ("status", "delivery_status", "run_at", "model")
search_fields = ("session_id", "session_key", "summary")
ordering = ("-run_at",)
readonly_fields = ("job", "run_at", "session_id", "created_at")

View File

@@ -0,0 +1,105 @@
from datetime import datetime, timezone
from django.db import transaction
from openclaw.models import CronJob, CronJobRun
def _ms_to_dt(ms):
if not ms:
return None
return datetime.fromtimestamp(ms / 1000, tz=timezone.utc)
class CronBulkUpsertService:
@staticmethod
@transaction.atomic
def upsert(payload):
source_node = payload.get("source_node", "unknown")
jobs_data = payload.get("jobs", [])
runs_data = payload.get("runs", [])
jobs_upserted = 0
runs_upserted = 0
# Upsert jobs
for job_data in jobs_data:
job_id = job_data["id"]
sched = job_data.get("schedule", {})
pay = job_data.get("payload", {})
deliv = job_data.get("delivery", {})
state = job_data.get("state", {})
defaults = {
"agent_id": job_data.get("agentId", ""),
"name": job_data.get("name", ""),
"session_key": job_data.get("sessionKey", ""),
"enabled": job_data.get("enabled", True),
"schedule_kind": sched.get("kind", ""),
"schedule_expr": sched.get("expr", ""),
"schedule_tz": sched.get("tz", ""),
"payload_kind": pay.get("kind", ""),
"payload_message": pay.get("message", ""),
"delivery_mode": deliv.get("mode", ""),
"delivery_channel": deliv.get("channel", ""),
"delivery_to": deliv.get("to", ""),
"state_next_run_at": _ms_to_dt(state.get("nextRunAtMs")),
"state_last_run_at": _ms_to_dt(state.get("lastRunAtMs")),
"state_last_run_status": state.get("lastRunStatus", ""),
"state_last_duration_ms": state.get("lastDurationMs"),
"state_last_status": state.get("lastStatus", ""),
"state_consecutive_errors": state.get("consecutiveErrors", 0),
"created_at_ms": job_data.get("createdAtMs"),
"updated_at_ms": job_data.get("updatedAtMs"),
"raw_json": job_data,
}
_, created = CronJob.objects.update_or_create(
job_id=job_id,
defaults=defaults,
)
if created:
jobs_upserted += 1
# Build job lookup
job_ids = [j["id"] for j in jobs_data]
job_lookup = {j.job_id: j for j in CronJob.objects.filter(job_id__in=job_ids)}
# Upsert runs
for run_data in runs_data:
job_id = run_data.get("jobId")
job = job_lookup.get(job_id)
if not job:
continue
usage = run_data.get("usage", {})
defaults = {
"ts": run_data.get("ts"),
"run_at": _ms_to_dt(run_data.get("runAtMs")),
"action": run_data.get("action", ""),
"status": run_data.get("status", ""),
"error": run_data.get("error", ""),
"summary": run_data.get("summary", ""),
"delivered": run_data.get("delivered", False),
"delivery_status": run_data.get("deliveryStatus", ""),
"session_id": run_data.get("sessionId", ""),
"session_key": run_data.get("sessionKey", ""),
"duration_ms": run_data.get("durationMs"),
"next_run_at": _ms_to_dt(run_data.get("nextRunAtMs")),
"model": run_data.get("model", ""),
"provider": run_data.get("provider", ""),
"tokens_input": usage.get("input_tokens", 0),
"tokens_output": usage.get("output_tokens", 0),
"tokens_total": usage.get("total_tokens", 0),
"raw_json": run_data,
}
_, created = CronJobRun.objects.update_or_create(
job=job,
run_at=defaults["run_at"],
session_id=run_data.get("sessionId", ""),
defaults=defaults,
)
if created:
runs_upserted += 1
return {
"jobs_upserted": jobs_upserted,
"runs_upserted": runs_upserted,
}

View File

@@ -0,0 +1,19 @@
import json
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
from django.views.decorators.http import require_http_methods
from openclaw.cron_service import CronBulkUpsertService
@csrf_exempt
@require_http_methods(["POST"])
def cron_bulk_upsert(request):
try:
payload = json.loads(request.body)
except json.JSONDecodeError:
return JsonResponse({"error": "Invalid JSON"}, status=400)
result = CronBulkUpsertService.upsert(payload)
return JsonResponse({"status": "ok", **result})

View File

@@ -0,0 +1,79 @@
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('openclaw', '0002_add_hypertables'),
]
operations = [
migrations.CreateModel(
name='CronJob',
fields=[
('job_id', models.CharField(max_length=64, primary_key=True, serialize=False)),
('agent_id', models.CharField(blank=True, default='', max_length=128)),
('name', models.CharField(blank=True, default='', max_length=256)),
('session_key', models.CharField(blank=True, default='', max_length=256)),
('enabled', models.BooleanField(default=True)),
('schedule_kind', models.CharField(blank=True, default='', max_length=32)),
('schedule_expr', models.CharField(blank=True, default='', max_length=64)),
('schedule_tz', models.CharField(blank=True, default='', max_length=64)),
('payload_kind', models.CharField(blank=True, default='', max_length=32)),
('payload_message', models.TextField(blank=True, default='')),
('delivery_mode', models.CharField(blank=True, default='', max_length=32)),
('delivery_channel', models.CharField(blank=True, default='', max_length=32)),
('delivery_to', models.CharField(blank=True, default='', max_length=128)),
('state_next_run_at', models.DateTimeField(blank=True, null=True)),
('state_last_run_at', models.DateTimeField(blank=True, null=True)),
('state_last_run_status', models.CharField(blank=True, default='', max_length=16)),
('state_last_duration_ms', models.IntegerField(blank=True, null=True)),
('state_last_status', models.CharField(blank=True, default='', max_length=16)),
('state_consecutive_errors', models.IntegerField(default=0)),
('created_at_ms', models.BigIntegerField(blank=True, null=True)),
('updated_at_ms', models.BigIntegerField(blank=True, null=True)),
('raw_json', models.JSONField(blank=True, default=dict)),
('created_at', models.DateTimeField(auto_now_add=True)),
('updated_at', models.DateTimeField(auto_now=True)),
],
options={
'db_table': 'cron_jobs',
'ordering': ['-updated_at'],
},
),
migrations.CreateModel(
name='CronJobRun',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('ts', models.BigIntegerField(help_text='event timestamp ms')),
('run_at', models.DateTimeField(blank=True, null=True)),
('action', models.CharField(blank=True, default='', max_length=32)),
('status', models.CharField(blank=True, default='', max_length=16)),
('error', models.TextField(blank=True, default='')),
('summary', models.TextField(blank=True, default='')),
('delivered', models.BooleanField(default=False)),
('delivery_status', models.CharField(blank=True, default='', max_length=32)),
('session_id', models.CharField(blank=True, default='', max_length=64)),
('session_key', models.CharField(blank=True, default='', max_length=256)),
('duration_ms', models.IntegerField(blank=True, null=True)),
('next_run_at', models.DateTimeField(blank=True, null=True)),
('model', models.CharField(blank=True, default='', max_length=128)),
('provider', models.CharField(blank=True, default='', max_length=64)),
('tokens_input', models.IntegerField(default=0)),
('tokens_output', models.IntegerField(default=0)),
('tokens_total', models.IntegerField(default=0)),
('raw_json', models.JSONField(blank=True, default=dict)),
('created_at', models.DateTimeField(auto_now_add=True)),
('job', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='runs', to='openclaw.cronjob')),
],
options={
'db_table': 'cron_job_runs',
'ordering': ['-run_at'],
},
),
migrations.AddConstraint(
model_name='cronjobrun',
constraint=models.UniqueConstraint(fields=('job', 'run_at', 'session_id'), name='cron_run_unique'),
),
]

View File

@@ -97,3 +97,77 @@ class ToolCall(models.Model):
def __str__(self):
return f"ToolCall({self.tool_name} {self.tool_call_id})"
class CronJob(models.Model):
job_id = models.CharField(max_length=64, primary_key=True)
agent_id = models.CharField(max_length=128, blank=True, default="")
name = models.CharField(max_length=256, blank=True, default="")
session_key = models.CharField(max_length=256, blank=True, default="")
enabled = models.BooleanField(default=True)
schedule_kind = models.CharField(max_length=32, blank=True, default="")
schedule_expr = models.CharField(max_length=64, blank=True, default="")
schedule_tz = models.CharField(max_length=64, blank=True, default="")
payload_kind = models.CharField(max_length=32, blank=True, default="")
payload_message = models.TextField(blank=True, default="")
delivery_mode = models.CharField(max_length=32, blank=True, default="")
delivery_channel = models.CharField(max_length=32, blank=True, default="")
delivery_to = models.CharField(max_length=128, blank=True, default="")
state_next_run_at = models.DateTimeField(null=True, blank=True)
state_last_run_at = models.DateTimeField(null=True, blank=True)
state_last_run_status = models.CharField(max_length=16, blank=True, default="")
state_last_duration_ms = models.IntegerField(null=True, blank=True)
state_last_status = models.CharField(max_length=16, blank=True, default="")
state_consecutive_errors = models.IntegerField(default=0)
created_at_ms = models.BigIntegerField(null=True, blank=True)
updated_at_ms = models.BigIntegerField(null=True, blank=True)
raw_json = models.JSONField(default=dict, blank=True)
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
db_table = "cron_jobs"
ordering = ["-updated_at"]
def __str__(self):
return f"CronJob({self.job_id} {self.name})"
class CronJobRun(models.Model):
job = models.ForeignKey(
CronJob, on_delete=models.CASCADE, related_name="runs"
)
ts = models.BigIntegerField(help_text="event timestamp ms")
run_at = models.DateTimeField(null=True, blank=True)
action = models.CharField(max_length=32, blank=True, default="")
status = models.CharField(max_length=16, blank=True, default="")
error = models.TextField(blank=True, default="")
summary = models.TextField(blank=True, default="")
delivered = models.BooleanField(default=False)
delivery_status = models.CharField(max_length=32, blank=True, default="")
session_id = models.CharField(max_length=64, blank=True, default="")
session_key = models.CharField(max_length=256, blank=True, default="")
duration_ms = models.IntegerField(null=True, blank=True)
next_run_at = models.DateTimeField(null=True, blank=True)
model = models.CharField(max_length=128, blank=True, default="")
provider = models.CharField(max_length=64, blank=True, default="")
tokens_input = models.IntegerField(default=0)
tokens_output = models.IntegerField(default=0)
tokens_total = models.IntegerField(default=0)
raw_json = models.JSONField(default=dict, blank=True)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = "cron_job_runs"
ordering = ["-run_at"]
constraints = [
models.UniqueConstraint(
fields=["job", "run_at", "session_id"],
name="cron_run_unique",
)
]
def __str__(self):
return f"CronJobRun({self.job.job_id} {self.run_at})"

View File

@@ -1,4 +1,5 @@
{% extends "admin/base.html" %}
{% load humanize %}
{% block title %}{{ title }} | OpenClaw Archive{% endblock %}

View File

@@ -1,6 +1,8 @@
from django.urls import path
from openclaw.views import sessions_bulk_upsert
from openclaw.cron_views import cron_bulk_upsert
urlpatterns = [
path("sessions/bulk_upsert/", sessions_bulk_upsert, name="sessions_bulk_upsert"),
path("cron/bulk_upsert/", cron_bulk_upsert, name="cron_bulk_upsert"),
]