diff --git a/README.md b/README.md index cf027b8..2137c61 100644 --- a/README.md +++ b/README.md @@ -19,13 +19,14 @@ Browse and export Claude Code chat history — Web GUI and CLI. - **Smooth transitions** — staggered card/message animations, crossfade content swaps - **Scroll-to-top button** in bottom-right corner - **Per-model badges** in session header -- **Bulk export** — download all sessions as a zip +- **Bulk export** — download all sessions, incremental updates, or latest-day slice as a zip; if there is nothing to export, the API returns **422** with JSON body `{"error": "Nothing to export", "since": ""}` (the `since` field echoes your request: `"all"`, `"last"`, or `"incremental"`) instead of an empty zip ### CLI Export - Standalone script to export all sessions to Markdown with YAML frontmatter - Rich Markdown: token usage, tool calls, thinking blocks, model info, timestamps -- `--since last` flag for incremental export (only new/updated sessions) -- `--project` flag to export a specific project +- `--since last` — export every session that overlaps the **latest UTC calendar day** present in your history (default zip name: `claude-code-export-last-MM-DD-YYYY-MM-DD.zip` — the first `MM-DD` is that latest UTC day, and `YYYY-MM-DD` is the export date) +- `--since incremental` — export only sessions **new or changed since the last export** (file mtime + saved state) +- `--project` flag to export a subset of projects ## Quick Start @@ -60,7 +61,7 @@ python app.py --base-dir /path/to/claude/projects ```bash # Activate venv first (see above), then: -# List all projects (shows directory names you can use with --project) +# List all projects (first column is a friendly name; --project accepts that or the dir slug) python scripts/export.py list # Export all sessions as zip @@ -69,14 +70,17 @@ python scripts/export.py # Export to specific directory, no zip python scripts/export.py --out ./exports --no-zip -# Incremental export (only new sessions since last run) +# Latest calendar day (UTC): all sessions active on that day; zip pattern claude-code-export-last-MM-DD-YYYY-MM-DD.zip (e.g. claude-code-export-last-04-06-2026-05-08.zip — 04-06 = latest UTC day, 2026-05-08 = export date) python scripts/export.py --since last -# Export specific project only (substring match on directory name) +# Incremental (only new/updated sessions since last run, using export state) +python scripts/export.py --since incremental + +# Export specific project only (substring on friendly name from list and/or dir name under ~/.claude/projects/) python scripts/export.py --project boost-capy ``` -The `--project` flag matches against the directory names under `~/.claude/projects/`. These are path-based names like `F--boost-capy` or `d--harbor-forge`. You can use any substring — for example `boost-capy` will match `F--boost-capy`. Run `python scripts/export.py list` to see all available project names. +The `--project` flag matches a **case-insensitive substring** of either the **Project** column from `list` (derived from the session working directory) or the internal directory name under `~/.claude/projects/` (for example `F--boost-capy` or `d--harbor-forge`). A substring like `boost-capy` matches `F--boost-capy`; you can also paste the friendly name shown in `list`. ## Data Source diff --git a/api/export_api.py b/api/export_api.py index 475ba96..a03653e 100644 --- a/api/export_api.py +++ b/api/export_api.py @@ -8,120 +8,245 @@ from flask import Blueprint, current_app, jsonify, request, send_file -from utils.session_path import get_claude_projects_dir, list_projects, list_sessions +from utils.export_state_store import ( + EXPORT_STATE_FILE, + atomic_write_export_state, + export_state_lock, + load_export_state_from_disk, +) +from utils.session_path import ( + get_claude_projects_dir, + list_projects, + list_sessions, +) from utils.jsonl_parser import parse_session from utils.session_stats import compute_stats from utils.md_exporter import session_to_markdown from utils.json_exporter import session_to_json from utils.exclusion_rules import is_session_excluded from utils.slugify import slugify +from utils.export_day_filter import collect_sessions_for_latest_activity_day export_bp = Blueprint("export", __name__) -_STATE_FILE = os.path.join(os.path.expanduser("~"), ".claude-code-chat-browser", "export_state.json") +# Tests monkeypatch this path; keep in sync with utils.export_state_store. +_STATE_FILE = EXPORT_STATE_FILE + + +def _state_lock(): + return export_state_lock(_STATE_FILE) + + +def _load_state_from_disk() -> dict: + return load_export_state_from_disk(_STATE_FILE) + + +def _atomic_write_state(state: dict) -> None: + atomic_write_export_state(state, _STATE_FILE) def _read_state() -> dict: - if os.path.exists(_STATE_FILE): - try: - with open(_STATE_FILE) as f: - return json.load(f) - except Exception: - pass - return {} + with _state_lock(): + return _load_state_from_disk() -def _write_state(sessions_map: dict, count: int): - os.makedirs(os.path.dirname(_STATE_FILE), exist_ok=True) - state = _read_state() - state["lastExportTime"] = datetime.now().isoformat() - state["exportedCount"] = count - state.setdefault("sessions", {}).update(sessions_map) - with open(_STATE_FILE, "w") as f: - json.dump(state, f, indent=2) +def _write_state(sessions_map: dict, count: int) -> None: + """Persist merge of *sessions_map* and update last-export metadata (*count* = this run only).""" + with _state_lock(): + state = _load_state_from_disk() + state["lastExportTime"] = datetime.now().isoformat() + state["exportedCount"] = count + state.setdefault("sessions", {}).update(sessions_map) + _atomic_write_state(state) @export_bp.route("/api/export/state") def get_export_state(): state = _read_state() - return jsonify({ - "last_export_time": state.get("lastExportTime"), - "export_count": state.get("exportedCount", 0), - }) + n = state.get("exportedCount", 0) + return jsonify( + { + "last_export_time": state.get("lastExportTime"), + # Sessions exported in the last completed bulk export (not a lifetime total). + "last_export_session_count": n, + "export_count": n, + } + ) @export_bp.route("/api/export", methods=["POST"]) def bulk_export(): - body = request.get_json(silent=True) or {} - since = "last" if body.get("since") == "last" else "all" + body = request.get_json(silent=True) + if body is None: + body = {} + if not isinstance(body, dict): + return jsonify({"error": "Invalid request body"}), 400 - base = current_app.config.get("CLAUDE_PROJECTS_DIR") or get_claude_projects_dir() + since = body.get("since", "all") + if since not in ("all", "last", "incremental"): + return jsonify({"error": "Invalid since mode", "since": since}), 400 + + base = ( + current_app.config.get("CLAUDE_PROJECTS_DIR") + or get_claude_projects_dir() + ) projects = list_projects(base) rules = current_app.config.get("EXCLUSION_RULES") or [] state = _read_state() - last_export_sessions: dict = state.get("sessions", {}) if since == "last" else {} + last_export_sessions: dict = ( + state.get("sessions", {}) if since == "incremental" else {} + ) buf = io.BytesIO() count = 0 manifest = [] new_sessions_map: dict = {} + latest_day = None + with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf: - for project in projects: - sessions = list_sessions(project["path"]) - for sess_info in sessions: + if since == "last": + d, rows, _n = collect_sessions_for_latest_activity_day( + projects, + list_sessions=list_sessions, + parse_session=parse_session, + is_session_excluded=is_session_excluded, + rules=rules, + ) + latest_day = d + for project, sess_info, session, _st, _en in rows: sid = sess_info["id"] try: - if since == "last": - prev_mtime = last_export_sessions.get(sid, 0) - curr_mtime = sess_info.get("modified", 0) - if curr_mtime and curr_mtime <= prev_mtime: - continue - - session = parse_session(sess_info["path"]) - if session["title"] == "Untitled Session": - continue - - if is_session_excluded( - rules, - session, - project.get("display_name") or project["name"], - ): - continue - stats = compute_stats(session) md = session_to_markdown(session, stats) title_slug = slugify(session["title"], default="session") short_id = sid[:8] proj_slug = slugify(project["name"], default="project") ts = session["metadata"].get("first_timestamp", "") - ts_file = ts[:19].replace(":", "-") if ts else "0000-00-00T00-00-00" - rel_path = f"{proj_slug}/{ts_file}__{title_slug}__{short_id}.md" + ts_file = ( + ts[:19].replace(":", "-") + if ts + else "0000-00-00T00-00-00" + ) + rel_path = ( + f"{proj_slug}/{ts_file}__{title_slug}__{short_id}.md" + ) zf.writestr(rel_path, md) - manifest.append({ - "session_id": sid, - "title": session["title"], - "project": project["name"], - "tokens": session["metadata"]["total_input_tokens"] - + session["metadata"]["total_output_tokens"], - "tool_calls": session["metadata"]["total_tool_calls"], - "cost_estimate_usd": stats.get("cost_estimate_usd"), - }) + manifest.append( + { + "session_id": sid, + "title": session["title"], + "project": project["name"], + "tokens": session["metadata"]["total_input_tokens"] + + session["metadata"]["total_output_tokens"], + "tool_calls": session["metadata"][ + "total_tool_calls" + ], + "cost_estimate_usd": stats.get( + "cost_estimate_usd" + ), + } + ) new_sessions_map[sid] = sess_info.get("modified", 0) count += 1 except Exception as e: - current_app.logger.warning("Failed to export %s: %s", sid[:10], e) + current_app.logger.warning( + "Failed to export %s: %s", sid[:10], e + ) continue + else: + for project in projects: + sessions = list_sessions(project["path"]) + for sess_info in sessions: + sid = sess_info["id"] + try: + if since == "incremental": + prev_mtime = last_export_sessions.get(sid, 0) + curr_mtime = sess_info.get("modified", 0) + if curr_mtime and curr_mtime <= prev_mtime: + continue + + session = parse_session(sess_info["path"]) + if session["title"] == "Untitled Session": + continue + + if is_session_excluded( + rules, + session, + project.get("display_name") or project["name"], + ): + continue + + stats = compute_stats(session) + md = session_to_markdown(session, stats) + title_slug = slugify( + session["title"], default="session" + ) + short_id = sid[:8] + proj_slug = slugify(project["name"], default="project") + ts = session["metadata"].get("first_timestamp", "") + ts_file = ( + ts[:19].replace(":", "-") + if ts + else "0000-00-00T00-00-00" + ) + rel_path = f"{proj_slug}/{ts_file}__{title_slug}__{short_id}.md" + zf.writestr(rel_path, md) + manifest.append( + { + "session_id": sid, + "title": session["title"], + "project": project["name"], + "tokens": session["metadata"][ + "total_input_tokens" + ] + + session["metadata"]["total_output_tokens"], + "tool_calls": session["metadata"][ + "total_tool_calls" + ], + "cost_estimate_usd": stats.get( + "cost_estimate_usd" + ), + } + ) + new_sessions_map[sid] = sess_info.get("modified", 0) + count += 1 + except Exception as e: + current_app.logger.warning( + "Failed to export %s: %s", sid[:10], e + ) + continue if manifest: - manifest_str = "\n".join(json.dumps(e, default=str) for e in manifest) + manifest_str = "\n".join( + json.dumps(e, default=str) for e in manifest + ) zf.writestr("manifest.jsonl", manifest_str) if count > 0: _write_state(new_sessions_map, count) + if count == 0: + return ( + jsonify( + { + "error": "Nothing to export", + "since": since, + } + ), + 422, + ) + buf.seek(0) date_tag = datetime.now().strftime("%Y-%m-%d") - suffix = "-since-last" if since == "last" else "" + if since == "last": + if latest_day is not None: + suffix = f"-last-{latest_day.strftime('%m-%d')}" + else: + suffix = "-last" + elif since == "incremental": + suffix = "-incremental" + else: + suffix = "" return send_file( buf, mimetype="application/zip", @@ -134,7 +259,11 @@ def bulk_export(): def export_session(project_name, session_id): import os from utils.session_path import safe_join - base = current_app.config.get("CLAUDE_PROJECTS_DIR") or get_claude_projects_dir() + + base = ( + current_app.config.get("CLAUDE_PROJECTS_DIR") + or get_claude_projects_dir() + ) try: filepath = safe_join(base, project_name, f"{session_id}.jsonl") except ValueError: @@ -171,5 +300,3 @@ def export_session(project_name, session_id): as_attachment=True, download_name=f"{title_slug}.md", ) - - diff --git a/scripts/export.py b/scripts/export.py index 60969f8..37372d9 100644 --- a/scripts/export.py +++ b/scripts/export.py @@ -8,7 +8,8 @@ export.py stats # token/cost totals export.py stats --session UUID # single session breakdown export.py --format json --no-zip # JSON files instead of zip - export.py --since last # only sessions changed since last run + export.py --since incremental # only sessions new/changed since last run (mtime) + export.py --since last # all sessions active on latest UTC calendar day """ import argparse @@ -34,16 +35,122 @@ is_session_excluded, ) from utils.slugify import slugify - +from utils.export_day_filter import collect_sessions_for_latest_activity_day +from utils.export_state_store import ( + atomic_write_export_state, + export_state_lock, + load_export_state_from_disk, +) STATE_DIR = os.path.join(os.path.expanduser("~"), ".claude-code-chat-browser") STATE_FILE = os.path.join(STATE_DIR, "export_state.json") +def _project_matches(project: dict, needle: str) -> bool: + """True if needle matches internal dir name or display_name (substring, case-insensitive).""" + if not needle: + return True + n = needle.lower() + if n in project["name"].lower(): + return True + disp = project.get("display_name") or "" + return n in disp.lower() + + +def _zip_export_basename( + project_filter: str | None, + projects: list[dict], + date_tag: str, + *, + since: str = "all", + latest_day=None, +) -> str: + """Zip filename (no directory): project slug and/or latest-day slug when set.""" + from datetime import date + + parts: list[str] = [] + if project_filter: + if len(projects) == 1: + p0 = projects[0] + parts.append( + slugify(p0.get("display_name") or p0["name"], default="project") + ) + else: + parts.append( + f"{slugify(project_filter, default='project')}-n{len(projects)}" + ) + if since == "last" and latest_day is not None and isinstance( + latest_day, date + ): + parts.append(f"last-{latest_day.strftime('%m-%d')}") + if parts: + return f"claude-code-export-{'-'.join(parts)}-{date_tag}.zip" + return f"claude-code-export-{date_tag}.zip" + + +def _prefixed_export_option_overrides(argv: list[str]) -> dict[str, object]: + """Recover export flags written *before* the ``export`` subcommand. + + When the same flag is registered on both the root parser and the ``export`` + subparser, argparse can drop values from the segment before ``export`` and + apply the subparser defaults instead (e.g. ``--since incremental export`` becomes + ``since=all``). Parse that prefix here so incremental export still works. + """ + if "export" not in argv: + return {} + pre = argv[: argv.index("export")] + opts: dict[str, object] = {} + i = 0 + while i < len(pre): + a = pre[i] + if a == "--since" and i + 1 < len(pre) and pre[i + 1] in ( + "all", + "last", + "incremental", + ): + opts["since"] = pre[i + 1] + i += 2 + continue + if a == "--out" and i + 1 < len(pre): + opts["out"] = pre[i + 1] + i += 2 + continue + if a == "--no-zip": + opts["no_zip"] = True + i += 1 + continue + if a in ("-e", "--exclude-rules") and i + 1 < len(pre): + opts["exclude_rules"] = pre[i + 1] + i += 2 + continue + if a == "--base-dir" and i + 1 < len(pre): + opts["base_dir"] = pre[i + 1] + i += 2 + continue + if a == "--project" and i + 1 < len(pre): + opts["project"] = pre[i + 1] + i += 2 + continue + if a == "--format" and i + 1 < len(pre) and pre[i + 1] in ("md", "json", "both"): + opts["format"] = pre[i + 1] + i += 2 + continue + if a == "--session" and i + 1 < len(pre): + opts["session"] = pre[i + 1] + i += 2 + continue + i += 1 + return opts + + def main(): parser = build_parser() args = parser.parse_args() + if getattr(args, "command", None) == "export": + for key, val in _prefixed_export_option_overrides(sys.argv[1:]).items(): + setattr(args, key, val) + command = getattr(args, "command", None) or "export" if command == "list": @@ -63,7 +170,7 @@ def cmd_list(args): projects = list_projects(base_dir) if project_filter: - projects = [p for p in projects if project_filter in p["name"]] + projects = [p for p in projects if _project_matches(p, project_filter)] if not projects: print("No projects found.") @@ -196,7 +303,7 @@ def _aggregate_stats(base_dir: str, project_filter: str, fmt: str): by project.""" projects = list_projects(base_dir) if project_filter: - projects = [p for p in projects if project_filter in p["name"]] + projects = [p for p in projects if _project_matches(p, project_filter)] totals = { "projects": len(projects), @@ -282,6 +389,66 @@ def _aggregate_stats(base_dir: str, project_filter: str, fmt: str): print(f" Est. cost: ~${totals['total_cost']:.2f} USD") +def _append_export_for_session( + project: dict, + sess_info: dict, + session: dict, + fmt: str, + all_exports: list, + manifest: list, + last_export: dict, +) -> None: + """Append markdown/json entries and manifest row; update *last_export* mtime.""" + sid = sess_info["id"] + stats = compute_stats(session) + meta = session["metadata"] + ts = meta.get("first_timestamp", "") + if not ts: + from datetime import datetime as _dt + + ts = _dt.fromtimestamp(sess_info["modified"]).strftime( + "%Y-%m-%dT%H:%M:%S" + ) + meta["first_timestamp"] = ts + date_str = ts[:10] + ts_file = ts[:19].replace(":", "-") + title_slug = slugify(session["title"], default="session") + short_id = sid[:8] + project_slug = slugify(project["name"], default="project") + + if fmt in ("md", "both"): + md = session_to_markdown(session, stats) + rel_path = os.path.join( + date_str, project_slug, f"{ts_file}__{title_slug}__{short_id}.md" + ) + all_exports.append((rel_path, md)) + + if fmt in ("json", "both"): + js = session_to_json(session, stats) + rel_path = os.path.join( + date_str, project_slug, f"{ts_file}__{title_slug}__{short_id}.json" + ) + all_exports.append((rel_path, js)) + + manifest.append({ + "session_id": sid, + "title": session["title"], + "project": project["name"], + "updated_at": meta.get("last_timestamp", ""), + "models": meta.get("models_used", []), + "tokens": meta["total_input_tokens"] + meta["total_output_tokens"], + "tool_calls": meta["total_tool_calls"], + "files_touched": stats.get("files_touched", {}).get( + "total_unique", 0 + ), + "commands_run": len(stats.get("commands_run", [])), + "cost_estimate_usd": stats.get("cost_estimate_usd"), + "wall_clock_seconds": meta.get("session_wall_time_seconds"), + }) + + last_export[sid] = sess_info["modified"] + + def cmd_export(args): """The main export command. Writes md/json files, optionally zipped.""" base_dir = getattr(args, "base_dir", None) or get_claude_projects_dir() @@ -298,8 +465,8 @@ def cmd_export(args): rules = load_rules(resolve_exclusion_rules_path(exclusion_rules_path)) - state = _load_state() if since == "last" else {} - last_export = state.get("sessions", {}) + state = _load_state() if since == "incremental" else {} + last_export = dict(state.get("sessions", {})) # Single session export if session_filter: @@ -313,7 +480,7 @@ def cmd_export(args): projects = list_projects(base_dir) if project_filter: - projects = [p for p in projects if project_filter in p["name"]] + projects = [p for p in projects if _project_matches(p, project_filter)] if not projects: print("No projects found.") @@ -325,83 +492,82 @@ def cmd_export(args): manifest = [] total_sessions = 0 skipped = 0 + skipped_mtime_unchanged = 0 + latest_day = None + + if since == "last": + d, rows, total_sessions = collect_sessions_for_latest_activity_day( + projects, + list_sessions=list_sessions, + parse_session=parse_session, + is_session_excluded=is_session_excluded, + rules=rules, + ) + if d is None: + print("Nothing to export (no qualifying sessions in scope).") + return + latest_day = d + print( + f"Latest activity end-date (UTC): {d.isoformat()} — " + f"exporting sessions that overlap that calendar day." + ) + if not rows: + print( + f"No sessions overlap {d.isoformat()} (UTC); nothing to export." + ) + return + skipped = total_sessions - len(rows) + for project, sess_info, session, _st, _en in rows: + _append_export_for_session( + project, + sess_info, + session, + fmt, + all_exports, + manifest, + last_export, + ) + else: + for project in projects: + sessions = list_sessions(project["path"]) + for sess_info in sessions: + total_sessions += 1 + sid = sess_info["id"] + + if since == "incremental": + prev_mtime = last_export.get(sid, 0) + if sess_info["modified"] <= prev_mtime: + skipped += 1 + skipped_mtime_unchanged += 1 + continue + + try: + session = parse_session(sess_info["path"]) + except Exception as e: + print(f" Warning: failed to parse {sid}: {e}") + continue - for project in projects: - sessions = list_sessions(project["path"]) - for sess_info in sessions: - total_sessions += 1 - sid = sess_info["id"] - - if since == "last": - prev_mtime = last_export.get(sid, 0) - if sess_info["modified"] <= prev_mtime: + if session["title"] == "Untitled Session": skipped += 1 continue - try: - session = parse_session(sess_info["path"]) - except Exception as e: - print(f" Warning: failed to parse {sid}: {e}") - continue - - if session["title"] == "Untitled Session": - skipped += 1 - continue - - if is_session_excluded( - rules, - session, - project.get("display_name") or project["name"], - ): - skipped += 1 - continue - - stats = compute_stats(session) - meta = session["metadata"] - ts = meta.get("first_timestamp", "") - if not ts: - from datetime import datetime as _dt - ts = _dt.fromtimestamp(sess_info["modified"]).strftime( - "%Y-%m-%dT%H:%M:%S" - ) - meta["first_timestamp"] = ts - date_str = ts[:10] - ts_file = ts[:19].replace(":", "-") # 2026-02-10T01-46-15 - title_slug = slugify(session["title"], default="session") - short_id = sid[:8] - project_slug = slugify(project["name"], default="project") - - if fmt in ("md", "both"): - md = session_to_markdown(session, stats) - rel_path = os.path.join( - date_str, project_slug, f"{ts_file}__{title_slug}__{short_id}.md" - ) - all_exports.append((rel_path, md)) + if is_session_excluded( + rules, + session, + project.get("display_name") or project["name"], + ): + skipped += 1 + continue - if fmt in ("json", "both"): - js = session_to_json(session, stats) - rel_path = os.path.join( - date_str, project_slug, f"{ts_file}__{title_slug}__{short_id}.json" + _append_export_for_session( + project, + sess_info, + session, + fmt, + all_exports, + manifest, + last_export, ) - all_exports.append((rel_path, js)) - - manifest.append({ - "session_id": sid, - "title": session["title"], - "project": project["name"], - "updated_at": meta.get("last_timestamp", ""), - "models": meta.get("models_used", []), - "tokens": meta["total_input_tokens"] + meta["total_output_tokens"], - "tool_calls": meta["total_tool_calls"], - "files_touched": stats.get("files_touched", {}).get( - "total_unique", 0 - ), - "commands_run": len(stats.get("commands_run", [])), - "cost_estimate_usd": stats.get("cost_estimate_usd"), - "wall_clock_seconds": meta.get("session_wall_time_seconds"), - }) - - last_export[sid] = sess_info["modified"] exported = len(all_exports) print( @@ -411,6 +577,18 @@ def cmd_export(args): if not all_exports: print("Nothing to export.") + if since == "incremental": + last_t = state.get("lastExportTime") + if last_t: + print(f"Last export: {last_t}") + last_dir = state.get("exportDir") + if last_dir: + print(f"Last export directory: {last_dir}") + if skipped_mtime_unchanged > 0: + print( + "All sessions on disk were already at or before the last " + "recorded export time (nothing new to write)." + ) return os.makedirs(out_dir, exist_ok=True) @@ -428,7 +606,13 @@ def cmd_export(args): print(f"Exported {exported} file(s) to {out_dir}") else: date_tag = datetime.now().strftime("%Y-%m-%d") - zip_name = f"claude-code-export-{date_tag}.zip" + zip_name = _zip_export_basename( + project_filter, + projects, + date_tag, + since=since, + latest_day=latest_day, + ) zip_path = os.path.join(out_dir, zip_name) with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: for rel_path, content in all_exports: @@ -482,9 +666,9 @@ def build_parser() -> argparse.ArgumentParser: parser.add_argument("--base-dir", default=None, help="Override Claude Code projects directory") parser.add_argument("--project", default=None, - help="Filter by project name (substring match)") - parser.add_argument("--since", choices=["all", "last"], default=None, - help="Export all or only new since last run") + help="Filter by project (substring on list display name or dir name)") + parser.add_argument("--since", choices=["all", "last", "incremental"], default=None, + help="'last' = latest UTC calendar day; 'incremental' = new since last export (mtime)") parser.add_argument("--out", default=None, help="Output directory (default: current dir)") parser.add_argument("--no-zip", action="store_true", default=False, @@ -507,7 +691,7 @@ def build_parser() -> argparse.ArgumentParser: # List subcommand list_p = subparsers.add_parser("list", help="List projects and sessions") list_p.add_argument("--project", default=None, - help="Filter/select project") + help="Filter/select project (display name or dir name substring)") list_p.add_argument("--base-dir", default=None, help="Override Claude Code projects directory") @@ -518,14 +702,14 @@ def build_parser() -> argparse.ArgumentParser: stats_p.add_argument("--format", choices=["text", "json"], default="text", help="Output format (default: text)") stats_p.add_argument("--project", default=None, - help="Filter by project name") + help="Filter by project (display name or dir name substring)") stats_p.add_argument("--base-dir", default=None, help="Override Claude Code projects directory") # Export subcommand (explicit) export_p = subparsers.add_parser("export", help="Export sessions") - export_p.add_argument("--since", choices=["all", "last"], default="all", - help="Export all or only new since last run") + export_p.add_argument("--since", choices=["all", "last", "incremental"], default="all", + help="'last' = latest UTC day; 'incremental' = new since last export") export_p.add_argument("--out", default=None, help="Output directory (default: current dir)") export_p.add_argument("--no-zip", action="store_true", @@ -535,7 +719,7 @@ def build_parser() -> argparse.ArgumentParser: export_p.add_argument("--session", default=None, help="Export single session by UUID prefix") export_p.add_argument("--project", default=None, - help="Filter by project name") + help="Filter by project (display name or dir name substring)") export_p.add_argument("--base-dir", default=None, help="Override Claude Code projects directory") export_p.add_argument( @@ -586,28 +770,28 @@ def _load_state() -> dict: {"": , ...} """ - if not os.path.isfile(STATE_FILE): - return {} - with open(STATE_FILE, "r") as f: - data = json.load(f) - # Migrate: if the file has neither "sessions" nor "lastExportTime" it is - # the old flat dict of session_id → mtime. - if "sessions" not in data and "lastExportTime" not in data: - return {"sessions": data} - return data + with export_state_lock(STATE_FILE): + return load_export_state_from_disk(STATE_FILE) def _save_state(sessions: dict, count: int, out_dir: str): - """Persist export state with standardised fields matching cursor-chat-browser.""" - os.makedirs(STATE_DIR, exist_ok=True) - state = { - "lastExportTime": datetime.now().isoformat(), - "exportedCount": count, - "exportDir": out_dir, - "sessions": sessions, - } - with open(STATE_FILE, "w") as f: - json.dump(state, f, indent=2) + """Persist export state with standardised fields matching cursor-chat-browser. + + Merges ``sessions`` into any concurrent updates on disk (same lock/atomic + path as the web API). + """ + with export_state_lock(STATE_FILE): + disk = load_export_state_from_disk(STATE_FILE) + disk["lastExportTime"] = datetime.now().isoformat() + disk["exportedCount"] = count + disk["exportDir"] = out_dir + base = disk.get("sessions") + if not isinstance(base, dict): + base = {} + merged = dict(base) + merged.update(sessions) + disk["sessions"] = merged + atomic_write_export_state(disk, STATE_FILE) def _die(msg: str): @@ -617,3 +801,4 @@ def _die(msg: str): if __name__ == "__main__": main() + \ No newline at end of file diff --git a/static/js/app.js b/static/js/app.js index c3edcd8..646f92e 100644 --- a/static/js/app.js +++ b/static/js/app.js @@ -226,14 +226,15 @@ async function showProjects() { const d = new Date(state.last_export_time); if (!isNaN(d.getTime())) { hasPreviousExport = true; - lastExportHtml = `

Last export: ${d.toLocaleString()} (${state.export_count || 0} sessions)

`; + const sessionCount = state.last_export_session_count ?? state.export_count ?? 0; + lastExportHtml = `

Last export: ${d.toLocaleString()} (${sessionCount} sessions in last export)

`; } } } catch(e) {} } const sinceBtnHtml = hasPreviousExport - ? `` @@ -769,13 +770,13 @@ async function doSearch() { // ==================== Export ==================== function bulkExport(since = 'all') { - const label = since === 'last' ? 'Export new sessions since last export?' : 'Export all sessions as a zip file?'; + const label = since === 'incremental' ? 'Export new sessions since last export?' : 'Export all sessions as a zip file?'; showConfirm(label, async () => { - const suffix = since === 'last' ? '-since-last' : ''; + const suffix = since === 'incremental' ? '-incremental' : ''; const fname = `claude-code-export${suffix}-${new Date().toISOString().slice(0, 10)}.zip`; const handle = await getFileHandle(fname, [{ description: 'ZIP archive', accept: { 'application/zip': ['.zip'] } }]); if (!handle) return; - const btnId = since === 'last' ? '#btn-export-since' : '#btn-export-all'; + const btnId = since === 'incremental' ? '#btn-export-since' : '#btn-export-all'; const btn = document.querySelector(btnId); const origText = btn ? btn.textContent.trim() : ''; if (btn) { btn.disabled = true; btn.textContent = 'Exporting...'; } @@ -785,7 +786,17 @@ function bulkExport(since = 'all') { headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ since }), }); - if (!res.ok) throw new Error(`Export failed: ${res.status}`); + const ct = res.headers.get('Content-Type') || ''; + if (!res.ok) { + let msg = `Export failed: ${res.status}`; + if (ct.includes('application/json')) { + try { + const errBody = await res.json(); + if (errBody.error) msg = errBody.error; + } catch (_) { /* ignore */ } + } + throw new Error(msg); + } const blob = await res.blob(); await writeToHandle(handle, blob, fname); showProjects(); // Refresh to show updated last-export timestamp diff --git a/tests/test_cli_args.py b/tests/test_cli_args.py index 1da5452..627e8c5 100644 --- a/tests/test_cli_args.py +++ b/tests/test_cli_args.py @@ -62,6 +62,41 @@ def test_since_subcommand_last(self): args = self._parse(["export", "--since", "last"]) assert args.since == "last" + def test_since_incremental(self): + args = self._parse(["--since", "incremental"]) + assert args.since == "incremental" + + def test_since_before_export_subcommand_recovered(self): + """Flags before ``export`` must not be lost to subparser defaults.""" + from scripts import export as export_mod + + argv = ["--since", "last", "export"] + args = self._parse(argv) + assert args.since == "all" # argparse quirk without recovery + for k, v in export_mod._prefixed_export_option_overrides(argv).items(): + setattr(args, k, v) + assert args.since == "last" + + def test_since_incremental_before_export_recovered(self): + from scripts import export as export_mod + + argv = ["--since", "incremental", "export"] + args = self._parse(argv) + assert args.since == "all" + for k, v in export_mod._prefixed_export_option_overrides(argv).items(): + setattr(args, k, v) + assert args.since == "incremental" + + def test_prefixed_out_before_export(self): + from scripts import export as export_mod + + argv = ["--out", "/tmp/z", "export"] + args = self._parse(argv) + assert args.out is None + for k, v in export_mod._prefixed_export_option_overrides(argv).items(): + setattr(args, k, v) + assert args.out == "/tmp/z" + # -- --out ------------------------------------------------------------------ def test_out_default_is_none(self): diff --git a/tests/test_export_api_bulk.py b/tests/test_export_api_bulk.py new file mode 100644 index 0000000..fc5718b --- /dev/null +++ b/tests/test_export_api_bulk.py @@ -0,0 +1,85 @@ +"""Tests for bulk export HTTP behavior (empty export / state JSON).""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(REPO_ROOT)) + +from flask import Flask # noqa: E402 + +from api.export_api import export_bp # noqa: E402 + + +@pytest.fixture +def isolated_state(tmp_path, monkeypatch): + path = tmp_path / "export_state.json" + monkeypatch.setattr("api.export_api._STATE_FILE", str(path)) + return path + + +def test_bulk_export_invalid_since_returns_400(isolated_state, tmp_path): + app = Flask(__name__) + app.config["TESTING"] = True + app.config["CLAUDE_PROJECTS_DIR"] = str(tmp_path) + app.register_blueprint(export_bp) + client = app.test_client() + resp = client.post("/api/export", json={"since": "lst"}) + assert resp.status_code == 400 + body = resp.get_json() + assert body["error"] == "Invalid since mode" + assert body["since"] == "lst" + + +def test_bulk_export_non_object_json_returns_400(isolated_state, tmp_path): + app = Flask(__name__) + app.config["TESTING"] = True + app.config["CLAUDE_PROJECTS_DIR"] = str(tmp_path) + app.register_blueprint(export_bp) + client = app.test_client() + resp = client.post( + "/api/export", + data=json.dumps(["all"]), + content_type="application/json", + ) + assert resp.status_code == 400 + assert resp.get_json()["error"] == "Invalid request body" + + +def test_bulk_export_empty_returns_422_json(isolated_state, tmp_path): + app = Flask(__name__) + app.config["TESTING"] = True + app.config["CLAUDE_PROJECTS_DIR"] = str(tmp_path) + app.register_blueprint(export_bp) + + client = app.test_client() + resp = client.post("/api/export", json={"since": "all"}) + assert resp.status_code == 422 + body = resp.get_json() + assert body["error"] == "Nothing to export" + assert body["since"] == "all" + + +def test_export_state_json_fields(isolated_state): + isolated_state.write_text( + json.dumps({ + "lastExportTime": "2026-01-01T12:00:00", + "exportedCount": 5, + "sessions": {}, + }), + encoding="utf-8", + ) + app = Flask(__name__) + app.config["TESTING"] = True + app.register_blueprint(export_bp) + client = app.test_client() + resp = client.get("/api/export/state") + assert resp.status_code == 200 + body = resp.get_json() + assert body["last_export_session_count"] == 5 + assert body["export_count"] == 5 diff --git a/tests/test_export_day_filter.py b/tests/test_export_day_filter.py new file mode 100644 index 0000000..4d15a7e --- /dev/null +++ b/tests/test_export_day_filter.py @@ -0,0 +1,121 @@ +"""Unit tests for utils/export_day_filter.py.""" + +import logging +from datetime import date + +import pytest + +from utils.export_day_filter import ( + collect_sessions_for_latest_activity_day, + day_overlaps_session, + iso_timestamp_to_date, + session_calendar_bounds, +) + + +def test_iso_timestamp_to_date(): + assert iso_timestamp_to_date("2026-04-06T12:00:00Z") == date(2026, 4, 6) + assert iso_timestamp_to_date(None) is None + + +def test_session_calendar_bounds_uses_mtime_when_no_ts(): + st, en = session_calendar_bounds(None, None, 1_700_000_000.0) + assert st == en + + +def test_day_overlaps_session(): + assert day_overlaps_session(date(2026, 4, 1), date(2026, 4, 10), date(2026, 4, 6)) + assert not day_overlaps_session(date(2026, 4, 1), date(2026, 4, 5), date(2026, 4, 6)) + + +def test_collect_latest_day_filters_by_overlap(): + def list_sessions(path): + return [ + {"id": "a", "path": "p1", "modified": 0.0}, + {"id": "b", "path": "p2", "modified": 0.0}, + ] + + def parse_session(path): + if path == "p1": + return { + "title": "One", + "metadata": { + "first_timestamp": "2026-04-05T10:00:00Z", + "last_timestamp": "2026-04-06T11:00:00Z", + }, + } + return { + "title": "Two", + "metadata": { + "first_timestamp": "2026-04-01T10:00:00Z", + "last_timestamp": "2026-04-05T12:00:00Z", + }, + } + + projects = [{"name": "proj", "path": "/x", "display_name": "P"}] + d, rows, n = collect_sessions_for_latest_activity_day( + projects, + list_sessions=list_sessions, + parse_session=parse_session, + is_session_excluded=lambda *a, **k: False, + rules=[], + ) + assert d == date(2026, 4, 6) + assert n == 2 + assert len(rows) == 1 + assert rows[0][2]["title"] == "One" + + +def test_collect_latest_day_logs_parse_failure(caplog): + """Parse errors must be visible: they can change which day wins ``d = max(...)``.""" + + def list_sessions(path): + return [ + {"id": "a", "path": "broken.jsonl", "modified": 0.0}, + {"id": "b", "path": "good.jsonl", "modified": 0.0}, + ] + + def parse_session(path): + if path == "broken.jsonl": + raise ValueError("simulated corrupt jsonl") + return { + "title": "OK", + "metadata": { + "first_timestamp": "2026-04-05T10:00:00Z", + "last_timestamp": "2026-04-05T12:00:00Z", + }, + } + + projects = [{"name": "proj", "path": "/x", "display_name": "P"}] + with caplog.at_level(logging.ERROR, logger="utils.export_day_filter"): + d, rows, n = collect_sessions_for_latest_activity_day( + projects, + list_sessions=list_sessions, + parse_session=parse_session, + is_session_excluded=lambda *a, **k: False, + rules=[], + ) + assert "broken.jsonl" in caplog.text + assert "simulated corrupt jsonl" in caplog.text + assert d == date(2026, 4, 5) + assert n == 2 + assert len(rows) == 1 + + +def test_collect_latest_day_abort_on_parse_error(): + def list_sessions(path): + return [{"id": "a", "path": "bad.jsonl", "modified": 0.0}] + + def parse_session(path): + raise RuntimeError("fail fast") + + projects = [{"name": "proj", "path": "/x", "display_name": "P"}] + with pytest.raises(RuntimeError, match="fail fast"): + collect_sessions_for_latest_activity_day( + projects, + list_sessions=list_sessions, + parse_session=parse_session, + is_session_excluded=lambda *a, **k: False, + rules=[], + abort_on_parse_error=True, + ) diff --git a/tests/test_export_project_filter.py b/tests/test_export_project_filter.py new file mode 100644 index 0000000..0bc8e9d --- /dev/null +++ b/tests/test_export_project_filter.py @@ -0,0 +1,114 @@ +"""Tests for _project_matches (CLI --project vs list display names).""" + +import sys +import types +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO_ROOT)) + +import scripts.export as export + + +class TestProjectMatches: + def test_matches_internal_name_substring(self): + p = {"name": "F--boost-capy", "display_name": "Boost"} + assert export._project_matches(p, "boost-capy") + + def test_matches_display_name_only(self): + p = {"name": "abc-uuid-hashed-dir", "display_name": "MyRepo"} + assert export._project_matches(p, "repo") + assert export._project_matches(p, "MyRepo") + + def test_case_insensitive(self): + p = {"name": "X--FooBar", "display_name": "Bar"} + assert export._project_matches(p, "FOO") + assert export._project_matches(p, "bar") + + def test_no_match(self): + p = {"name": "internal-only", "display_name": "Visible"} + assert not export._project_matches(p, "nomatch-xyz") + + def test_empty_needle_matches_all(self): + p = {"name": "a", "display_name": "b"} + assert export._project_matches(p, "") + + +class TestZipExportBasename: + def test_no_project_filter(self): + assert ( + export._zip_export_basename(None, [], "2026-05-08") + == "claude-code-export-2026-05-08.zip" + ) + + def test_single_project_uses_display_name(self): + projects = [{"name": "internal-hash", "display_name": "My Repo"}] + assert ( + export._zip_export_basename("anything", projects, "2026-05-08") + == "claude-code-export-my-repo-2026-05-08.zip" + ) + + def test_single_project_falls_back_to_name(self): + p = {"name": "F--boost-only"} + assert ( + export._zip_export_basename("x", [p], "2026-05-08") + == "claude-code-export-f-boost-only-2026-05-08.zip" + ) + + def test_multiple_projects_uses_filter_and_count(self): + projects = [{"name": "a"}, {"name": "b"}, {"name": "c"}] + assert ( + export._zip_export_basename("P3856r5", projects, "2026-05-08") + == "claude-code-export-p3856r5-n3-2026-05-08.zip" + ) + + def test_zip_basename_last_day_slug(self): + from datetime import date + + assert ( + export._zip_export_basename( + None, + [], + "2026-05-08", + since="last", + latest_day=date(2026, 4, 6), + ) + == "claude-code-export-last-04-06-2026-05-08.zip" + ) + + +def test_since_last_empty_export_prints_last_metadata(monkeypatch, tmp_path, capsys): + """When --since incremental exports nothing, show lastExportTime / exportDir from state.""" + state_path = tmp_path / "export_state.json" + export.STATE_FILE = str(state_path) + export.STATE_DIR = str(tmp_path) + + export._save_state({"sess-1": 1.0}, count=1, out_dir="/tmp/prev-exports") + + proj_dir = tmp_path / "proj" + proj_dir.mkdir() + fake_project = { + "name": "internal-name", + "path": str(proj_dir), + "display_name": "Display", + } + + monkeypatch.setattr(export, "list_projects", lambda base: [fake_project]) + monkeypatch.setattr(export, "list_sessions", lambda path: []) + + args = types.SimpleNamespace( + base_dir=str(tmp_path), + out=str(tmp_path), + since="incremental", + no_zip=False, + project=None, + format="md", + session=None, + exclude_rules=None, + ) + export.cmd_export(args) + out = capsys.readouterr().out + assert "Nothing to export." in out + assert "Last export:" in out + assert "Last export directory:" in out + assert "/tmp/prev-exports" in out diff --git a/tests/test_export_state_store.py b/tests/test_export_state_store.py new file mode 100644 index 0000000..5d47d8a --- /dev/null +++ b/tests/test_export_state_store.py @@ -0,0 +1,83 @@ +"""Tests for utils/export_state_store.load_export_state_from_disk validation.""" + +from __future__ import annotations + +import json +from pathlib import Path + +from utils.export_state_store import load_export_state_from_disk + + +def test_load_rejects_non_object_json(tmp_path: Path): + p = tmp_path / "export_state.json" + p.write_text(json.dumps([1, 2, 3]), encoding="utf-8") + assert load_export_state_from_disk(str(p)) == {} + + +def test_load_rejects_null_json(tmp_path: Path): + p = tmp_path / "export_state.json" + p.write_text("null", encoding="utf-8") + assert load_export_state_from_disk(str(p)) == {} + + +def test_load_sanitizes_non_dict_sessions(tmp_path: Path): + p = tmp_path / "export_state.json" + p.write_text( + json.dumps( + { + "lastExportTime": "2026-01-01T00:00:00", + "exportedCount": 1, + "sessions": [], + } + ), + encoding="utf-8", + ) + out = load_export_state_from_disk(str(p)) + assert out["sessions"] == {} + assert out["lastExportTime"] == "2026-01-01T00:00:00" + assert out["exportedCount"] == 1 + + +def test_load_adds_sessions_when_missing_but_has_last_export(tmp_path: Path): + p = tmp_path / "export_state.json" + p.write_text( + json.dumps({"lastExportTime": "2026-01-01T00:00:00", "exportedCount": 0}), + encoding="utf-8", + ) + out = load_export_state_from_disk(str(p)) + assert out["sessions"] == {} + assert out["lastExportTime"] == "2026-01-01T00:00:00" + + +def test_load_legacy_flat_dict_unchanged_shape(tmp_path: Path): + p = tmp_path / "export_state.json" + legacy = {"uuid-one": 1740000000.0} + p.write_text(json.dumps(legacy), encoding="utf-8") + out = load_export_state_from_disk(str(p)) + assert out == {"sessions": legacy} + + +def test_export_state_lock_windows_branch_uses_msvcrt_when_no_fcntl( + monkeypatch, tmp_path: Path +): + """When ``fcntl`` is absent, use ``msvcrt.locking`` (cross-process on Windows).""" + import utils.export_state_store as mod + + monkeypatch.setattr(mod, "fcntl", None) + calls: list[tuple[int, int]] = [] + + class FakeMsvcrt: + LK_LOCK = 1 + LK_UNLCK = 2 + + @staticmethod + def locking(fd, mode, nbytes): + calls.append((mode, nbytes)) + + monkeypatch.setattr(mod, "msvcrt", FakeMsvcrt) + + state_file = tmp_path / "export_state.json" + state_file.write_text("{}", encoding="utf-8") + with mod.export_state_lock(str(state_file)): + assert (FakeMsvcrt.LK_LOCK, 1) in calls + assert calls[-1] == (FakeMsvcrt.LK_UNLCK, 1) diff --git a/utils/export_day_filter.py b/utils/export_day_filter.py new file mode 100644 index 0000000..a8c406a --- /dev/null +++ b/utils/export_day_filter.py @@ -0,0 +1,99 @@ +"""Calendar-day export helpers for ``--since last`` (latest chat day).""" + +from __future__ import annotations + +import logging +from datetime import date, datetime, timezone + +logger = logging.getLogger(__name__) + + +def iso_timestamp_to_date(ts: str | None) -> date | None: + """First 10 chars of an ISO timestamp as a UTC calendar date.""" + if not ts or not isinstance(ts, str): + return None + s = ts.strip() + if len(s) < 10: + return None + try: + return date.fromisoformat(s[:10]) + except ValueError: + return None + + +def session_calendar_bounds( + first_ts: str | None, last_ts: str | None, file_mtime: float +) -> tuple[date, date]: + """Inclusive calendar range for a session (UTC from ISO; mtime as UTC date fallback).""" + end = iso_timestamp_to_date(last_ts) + start = iso_timestamp_to_date(first_ts) + mtime_d = datetime.fromtimestamp(file_mtime, tz=timezone.utc).date() + if end is None: + end = mtime_d + if start is None: + start = end + if start > end: + start, end = end, start + return start, end + + +def day_overlaps_session(start: date, end: date, day: date) -> bool: + """True if calendar *day* falls within [start, end] inclusive.""" + return start <= day <= end + + +def collect_sessions_for_latest_activity_day( + projects: list[dict], + *, + list_sessions, + parse_session, + is_session_excluded, + rules, + abort_on_parse_error: bool = False, +) -> tuple[date | None, list[tuple[dict, dict, dict, date, date]], int]: + """Parse sessions in *projects*, skip untitled/excluded, return (D, rows, n_scanned). + + *D* is the latest session **end** calendar date (UTC) from successfully + parsed sessions only (``d = max(...)`` over parsed rows). Parse failures are + logged and skipped unless *abort_on_parse_error* is true, in which case the + first failure is re-raised. + + Each row is ``(project, sess_info, session, start_date, end_date)`` for + sessions that overlap *D*. *n_scanned* counts every ``.jsonl`` file visited. + """ + parsed: list[tuple[dict, dict, dict, date, date]] = [] + total_scan = 0 + for project in projects: + for sess_info in list_sessions(project["path"]): + total_scan += 1 + try: + session = parse_session(sess_info["path"]) + except Exception as e: + logger.error( + "Failed to parse session for latest-day selection %s: %s: %s", + sess_info["path"], + type(e).__name__, + e, + ) + if abort_on_parse_error: + raise + continue + if session["title"] == "Untitled Session": + continue + if is_session_excluded( + rules, + session, + project.get("display_name") or project["name"], + ): + continue + st, en = session_calendar_bounds( + session["metadata"].get("first_timestamp"), + session["metadata"].get("last_timestamp"), + sess_info["modified"], + ) + parsed.append((project, sess_info, session, st, en)) + if not parsed: + return None, [], total_scan + d = max(r[4] for r in parsed) + overlapping = [r for r in parsed if day_overlaps_session(r[3], r[4], d)] + return d, overlapping, total_scan diff --git a/utils/export_state_store.py b/utils/export_state_store.py new file mode 100644 index 0000000..4040340 --- /dev/null +++ b/utils/export_state_store.py @@ -0,0 +1,135 @@ +"""Shared export_state.json locking and atomic I/O for API and CLI.""" + +from __future__ import annotations + +import json +import os +import tempfile +import threading +from contextlib import contextmanager + +try: + import fcntl +except ImportError: + fcntl = None + +try: + import msvcrt +except ImportError: + msvcrt = None + +# Only when neither fcntl nor msvcrt exists (very rare): same-process only. +_fallback_locks: dict[str, threading.Lock] = {} +_fallback_locks_guard = threading.Lock() + +EXPORT_STATE_FILE = os.path.join( + os.path.expanduser("~"), ".claude-code-chat-browser", "export_state.json" +) + + +def _fallback_lock_for(path: str) -> threading.Lock: + with _fallback_locks_guard: + if path not in _fallback_locks: + _fallback_locks[path] = threading.Lock() + return _fallback_locks[path] + + +@contextmanager +def export_state_lock(state_path: str | None = None): + """Serialize export_state.json reads/writes across processes. + + POSIX: ``flock`` on a sidecar ``*.lock`` file. Windows: ``msvcrt.locking`` on + the same sidecar (byte-range lock). If neither is available, falls back to + a per-path ``threading.Lock`` (same process only). + """ + path = EXPORT_STATE_FILE if state_path is None else state_path + if fcntl is not None: + lock_path = path + ".lock" + dir_name = os.path.dirname(lock_path) + if dir_name: + os.makedirs(dir_name, exist_ok=True) + lock_fp = open(lock_path, "a+", encoding="utf-8") + try: + fcntl.flock(lock_fp.fileno(), fcntl.LOCK_EX) + yield + finally: + fcntl.flock(lock_fp.fileno(), fcntl.LOCK_UN) + lock_fp.close() + elif msvcrt is not None: + lock_path = path + ".lock" + dir_name = os.path.dirname(lock_path) + if dir_name: + os.makedirs(dir_name, exist_ok=True) + if not os.path.exists(lock_path): + with open(lock_path, "wb") as f: + f.write(b"\x00") + lock_fp = open(lock_path, "r+b") + try: + if os.path.getsize(lock_path) == 0: + lock_fp.write(b"\x00") + lock_fp.flush() + lock_fp.seek(0) + msvcrt.locking(lock_fp.fileno(), msvcrt.LK_LOCK, 1) + try: + yield + finally: + lock_fp.seek(0) + msvcrt.locking(lock_fp.fileno(), msvcrt.LK_UNLCK, 1) + finally: + lock_fp.close() + else: + with _fallback_lock_for(path): + yield + + +def load_export_state_from_disk(state_path: str | None = None) -> dict: + """Load state from disk (call under :func:`export_state_lock` for consistency). + + Migrates legacy flat ``{session_id: mtime, ...}`` to ``{"sessions": ...}``. + Returns a dict with a mapping ``sessions``; malformed top-level values or + a non-dict ``sessions`` entry are sanitized so callers always see a dict. + """ + path = EXPORT_STATE_FILE if state_path is None else state_path + if not os.path.isfile(path): + return {} + try: + with open(path, encoding="utf-8") as f: + data = json.load(f) + except Exception: + return {} + if not isinstance(data, dict): + return {} + if "sessions" not in data and "lastExportTime" not in data: + return {"sessions": data} + if not isinstance(data.get("sessions"), dict): + data = dict(data) + data["sessions"] = {} + return data + + +def atomic_write_export_state(state: dict, state_path: str | None = None) -> None: + """Write *state* atomically (serialize, temp file + fsync + replace). + + Call under :func:`export_state_lock` matching *state_path*. + """ + path = EXPORT_STATE_FILE if state_path is None else state_path + dir_name = os.path.dirname(path) or "." + os.makedirs(dir_name, exist_ok=True) + try: + payload = json.dumps(state, indent=2) + except (TypeError, ValueError) as e: + raise ValueError(f"export state is not JSON-serializable: {e}") from e + fd, tmp_path = tempfile.mkstemp(dir=dir_name, suffix=".tmp") + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + f.write(payload) + f.flush() + os.fsync(f.fileno()) + os.replace(tmp_path, path) + except BaseException: + try: + if os.path.exists(tmp_path): + os.unlink(tmp_path) + except OSError: + pass + raise