easyvibecoding
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 34 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎skills/hermes-sci/package/pyproject.toml‎
Lines changed: 8 additions & 0 deletions b/‎skills/hermes-sci/package/pyproject.toml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎skills/hermes-sci/package/tests/__init__.py‎ b/‎skills/hermes-sci/package/tests/__init__.py‎
diff --git a/‎skills/hermes-sci/package/tests/test_cli.py‎
Lines changed: 86 additions & 0 deletions b/‎skills/hermes-sci/package/tests/test_cli.py‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎skills/hermes-sci/package/tests/test_dedup_tables.py‎
Lines changed: 95 additions & 0 deletions b/‎skills/hermes-sci/package/tests/test_dedup_tables.py‎
Lines changed: 95 additions & 0 deletions
diff --git a/‎skills/hermes-sci/package/tests/test_progress.py‎
Lines changed: 79 additions & 0 deletions b/‎skills/hermes-sci/package/tests/test_progress.py‎
Lines changed: 79 additions & 0 deletions
@@ -0,0 +1,34 @@
+name: ci
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  test:
+    name: pytest (py${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+    defaults:
+      run:
+        working-directory: skills/hermes-sci/package
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: pip
+
+      - name: Install package + dev deps
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[dev]"
+
+      - name: Run pytest
+        run: pytest -v
@@ -29,6 +29,9 @@ full = [
     "anthropic>=0.30",
     "pymupdf4llm>=0.0.10",
 ]
+dev = [
+    "pytest>=7.4",
+]
 
 [project.scripts]
 hermes-sci = "hermes_sci.cli:main"
@@ -37,6 +40,11 @@ hermes-sci = "hermes_sci.cli:main"
 where = ["."]
 include = ["hermes_sci*"]
 
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py"]
+addopts = "-ra -q"
+
 [tool.setuptools.package-data]
 hermes_sci = [
     "latex/*.tex.j2", "latex/*.sty", "latex/*.bib",
 
@@ -0,0 +1,86 @@
+"""CLI argparse + validate-results end-to-end (no network)."""
+from __future__ import annotations
+
+import json
+import pathlib
+import subprocess
+import sys
+
+import pytest
+
+from hermes_sci.cli import build_parser
+
+PKG_ROOT = pathlib.Path(__file__).resolve().parent.parent
+
+
+def test_parser_lists_all_subcommands():
+    p = build_parser()
+    ns = p.parse_args(["ideate", "--topic", "x", "-o", "i.json"])
+    assert ns.cmd == "ideate"
+    ns = p.parse_args(["writeup", "--ideas-json", "i.json", "-o", "out"])
+    assert ns.cmd == "writeup"
+    ns = p.parse_args(["review", "--paper", "p.pdf"])
+    assert ns.cmd == "review"
+    ns = p.parse_args(["pipeline", "--topic", "x", "-o", "out"])
+    assert ns.cmd == "pipeline"
+    ns = p.parse_args(["validate-results", "r.json"])
+    assert ns.cmd == "validate-results"
+
+
+@pytest.mark.parametrize("sink", ["human", "jsonl", "off"])
+def test_progress_flag_accepted_everywhere(sink):
+    p = build_parser()
+    ns = p.parse_args(["ideate", "--topic", "x", "-o", "i.json",
+                       "--progress", sink])
+    assert ns.progress == sink
+
+
+def test_progress_rejects_unknown_value():
+    p = build_parser()
+    with pytest.raises(SystemExit):
+        p.parse_args(["ideate", "--topic", "x", "-o", "i.json",
+                      "--progress", "spinner"])
+
+
+def test_coherence_default_false():
+    """Coherence is opt-in on every subcommand; pipeline previously had a
+    bug where it used args.no_coherence (never defined)."""
+    p = build_parser()
+    ns = p.parse_args(["pipeline", "--topic", "x", "-o", "out"])
+    assert ns.coherence is False
+    ns = p.parse_args(["pipeline", "--topic", "x", "-o", "out", "--coherence"])
+    assert ns.coherence is True
+
+
+def test_validate_results_good_exits_0(tmp_path):
+    doc = {"metrics": [{"name": "BLEU", "value": 28.3}]}
+    p = tmp_path / "r.json"
+    p.write_text(json.dumps(doc))
+    r = subprocess.run(
+        [sys.executable, "-m", "hermes_sci.cli", "validate-results", str(p)],
+        capture_output=True, text=True, cwd=str(PKG_ROOT),
+    )
+    assert r.returncode == 0, r.stderr
+    assert "matches results.json schema" in r.stdout
+
+
+def test_validate_results_bad_exits_1(tmp_path):
+    doc = {"metrics": [], "tables": [
+        {"id": "bad id", "headers": ["x"], "rows": []}]}
+    p = tmp_path / "bad.json"
+    p.write_text(json.dumps(doc))
+    r = subprocess.run(
+        [sys.executable, "-m", "hermes_sci.cli", "validate-results", str(p)],
+        capture_output=True, text=True, cwd=str(PKG_ROOT),
+    )
+    assert r.returncode == 1
+    assert "schema violation" in r.stderr
+
+
+def test_validate_results_missing_file_exits_2(tmp_path):
+    r = subprocess.run(
+        [sys.executable, "-m", "hermes_sci.cli",
+         "validate-results", str(tmp_path / "nope.json")],
+        capture_output=True, text=True, cwd=str(PKG_ROOT),
+    )
+    assert r.returncode == 2
@@ -0,0 +1,95 @@
+"""Cross-section table dedup via ownership + fingerprint."""
+from __future__ import annotations
+
+from hermes_sci.sanitize.tables import dedup_tables
+
+# Two identical tables with a shared label that both sections may emit.
+_TBL = r"""
+\begin{table}[h]
+\centering
+\caption{Results by input complexity}
+\label{tab:complexity}
+\begin{tabular}{|l|c|c|}
+\hline
+Complexity & Latency & BLEU \\
+\hline
+Simple & 19.4 & 28.1 \\
+Medium & 28.7 & 27.6 \\
+Hard   & 42.1 & 26.8 \\
+\hline
+\end{tabular}
+\end{table}
+""".strip()
+
+
+def _has_begin_table(s: str) -> bool:
+    return r"\begin{table}" in s
+
+
+def test_owning_section_wins():
+    sections = {
+        "experiments": "Prose.\n" + _TBL + "\nMore.",
+        "results": "Other prose.\n" + _TBL + "\nDone.",
+    }
+    out, events = dedup_tables(
+        sections, table_ownership={"tab:complexity": "experiments"}
+    )
+    assert _has_begin_table(out["experiments"])
+    assert not _has_begin_table(out["results"])
+    assert any(e["reason"] == "owning_section" for e in events)
+
+
+def test_duplicate_label_first_wins_when_no_ownership():
+    sections = {"experiments": _TBL, "results": _TBL}
+    out, events = dedup_tables(sections)  # no ownership map
+    assert _has_begin_table(out["experiments"])
+    assert not _has_begin_table(out["results"])
+    assert events[0]["reason"] == "duplicate_label"
+
+
+def test_fingerprint_catches_unlabeled_duplicate():
+    unlab = _TBL.replace(r"\label{tab:complexity}", "")
+    sections = {"experiments": unlab, "results": unlab}
+    out, events = dedup_tables(sections)
+    assert _has_begin_table(out["experiments"])
+    assert not _has_begin_table(out["results"])
+    assert events[0]["reason"] == "fingerprint"
+
+
+def test_different_tables_both_survive():
+    other = (_TBL
+             .replace("Results by input complexity", "Ablation over dropout rate")
+             .replace("tab:complexity", "tab:ablation"))
+    sections = {"experiments": _TBL, "results": other}
+    out, events = dedup_tables(sections)
+    assert _has_begin_table(out["experiments"])
+    assert _has_begin_table(out["results"])
+    assert events == []
+
+
+def test_no_tables_is_noop():
+    sections = {"method": "Prose only.", "experiments": "More prose."}
+    out, events = dedup_tables(sections)
+    assert out == sections
+    assert events == []
+
+
+def test_demotion_leaves_ref_resolvable_comment():
+    sections = {"experiments": _TBL, "results": _TBL}
+    out, _ = dedup_tables(
+        sections, table_ownership={"tab:complexity": "experiments"}
+    )
+    # Dropped block is replaced by a LaTeX comment citing the label so a
+    # nearby \ref{tab:complexity} still makes sense in the prose.
+    assert r"\ref{tab:complexity}" in out["results"]
+    assert out["results"].lstrip().startswith("%") or \
+        "% (duplicate" in out["results"]
+
+
+def test_owning_section_does_not_demote_first_hit():
+    sections = {"experiments": _TBL}
+    out, events = dedup_tables(
+        sections, table_ownership={"tab:complexity": "experiments"}
+    )
+    assert _has_begin_table(out["experiments"])
+    assert events == []
@@ -0,0 +1,79 @@
+"""Progress callback: sinks, safe dispatch, CLI flag resolution."""
+from __future__ import annotations
+
+import io
+import json
+
+import pytest
+
+from hermes_sci.progress import (
+    Progress,
+    _resolve_builtin,
+    emit,
+    human,
+    jsonl,
+    noop,
+)
+
+
+def test_progress_defaults():
+    p = Progress(kind="stage_start", stage="ideate")
+    assert p.message == ""
+    assert p.current == 0 and p.total == 0
+    assert p.meta == {}
+    assert p.ts > 0
+
+
+def test_noop_sink_is_silent(capsys):
+    noop(Progress(kind="stage_start", stage="ideate"))
+    assert capsys.readouterr().err == ""
+
+
+def test_human_sink_writes_to_provided_fd():
+    buf = io.StringIO()
+    human(Progress(kind="stage_start", stage="ideate", message="topic"), fd=buf)
+    human(Progress(kind="item", stage="section", current=2, total=5,
+                   message="method"), fd=buf)
+    human(Progress(kind="stage_end", stage="verify", message="6/8",
+                   meta={"duration_s": 12.4}), fd=buf)
+    out = buf.getvalue()
+    assert "→ ideate: topic" in out
+    assert "[2/5]" in out and "method" in out
+    assert "✓ verify" in out and "(12.4s)" in out
+
+
+def test_human_sink_handles_unknown_kind():
+    buf = io.StringIO()
+    # The type says Literal, but runtime unknowns shouldn't crash.
+    human(Progress(kind="wat", stage="ideate", message="x"), fd=buf)  # type: ignore[arg-type]
+    assert "wat" in buf.getvalue()
+
+
+def test_jsonl_is_parseable():
+    buf = io.StringIO()
+    jsonl(Progress(kind="item", stage="section", current=1, total=3,
+                   message="intro", meta={"model": "m1"}), fd=buf)
+    line = buf.getvalue().strip()
+    obj = json.loads(line)
+    assert obj["kind"] == "item"
+    assert obj["stage"] == "section"
+    assert obj["current"] == 1 and obj["total"] == 3
+    assert obj["meta"] == {"model": "m1"}
+
+
+def test_emit_swallows_callback_errors():
+    def bad(p):
+        raise RuntimeError("sink broke")
+    # Must not raise — a broken sink cannot crash the pipeline.
+    emit(bad, Progress(kind="stage_start", stage="ideate"))
+
+
+@pytest.mark.parametrize("name,expected", [
+    ("human", human),
+    ("jsonl", jsonl),
+    ("off", noop),
+    ("none", noop),
+    ("garbage", human),   # unknowns fall through to human
+])
+def test_resolve_builtin(name, expected):
+    assert _resolve_builtin(name) is expected