Add GET /run/trace/{run_id} endpoint (#272)

saathviksheerla · PGijsbers · web-flow · commit 5c30ef7bbbda · 2026-03-24T15:42:05.000+01:00
# Description Implements `GET /run/trace/{run_id}` as part of the run endpoints. Fixes: #40 Related: #36 Matches PHP API behavior for error codes (571/572) and response shape. Returns 412 with code 571 if run does not exist, 412 with code 572 if run exists but has no trace, and trace rows on success. # Checklist _Please check all that apply. You can mark items as N/A if they don't apply to your change._ Always: - [x] I have performed a self-review of my own pull request to ensure it contains all relevant information, and the proposed changes are minimal but sufficient to accomplish their task. Required for code changes: - [x] Tests pass locally - [x] I have commented my code in hard-to-understand areas, and provided or updated docstrings as needed - [x] I have added tests that cover the changes (only required if not already under coverage) If applicable: - [N/A] I have made corresponding changes to the documentation pages (`/docs`) Extra context: - [ ] This PR and the commits have been created autonomously by a bot/agent. --------- Co-authored-by: PGijsbers <p.gijsbers@tue.nl>
diff --git a/src/core/errors.py b/src/core/errors.py
@@ -399,3 +399,26 @@ class InternalError(ProblemDetailError):
     uri = "https://openml.org/problems/internal-error"
     title = "Internal Server Error"
     _default_status_code = HTTPStatus.INTERNAL_SERVER_ERROR
+
+
+# =============================================================================
+# Run Errors
+# =============================================================================
+
+
+class RunNotFoundError(ProblemDetailError):
+    """Raised when a run cannot be found."""
+
+    uri = "https://openml.org/problems/run-not-found"
+    title = "Run Not Found"
+    _default_status_code = HTTPStatus.NOT_FOUND
+    _default_code = 571
+
+
+class RunTraceNotFoundError(ProblemDetailError):
+    """Raised when trace data for a run cannot be found."""
+
+    uri = "https://openml.org/problems/run-trace-not-found"
+    title = "Run Trace Not Found"
+    _default_status_code = HTTPStatus.NOT_FOUND
+    _default_code = 572
diff --git a/src/database/runs.py b/src/database/runs.py
@@ -0,0 +1,40 @@
+"""Database queries for run-related data."""
+
+from collections.abc import Sequence
+from typing import cast
+
+from sqlalchemy import Row, text
+from sqlalchemy.ext.asyncio import AsyncConnection
+
+
+async def exist(id_: int, expdb: AsyncConnection) -> bool:
+    """Check if a run exists by ID."""
+    row = await expdb.execute(
+        text(
+            """
+            SELECT 1
+            FROM `run`
+            WHERE `rid` = :run_id
+            """,
+        ),
+        parameters={"run_id": id_},
+    )
+    return bool(row.one_or_none())
+
+
+async def get_trace(run_id: int, expdb: AsyncConnection) -> Sequence[Row]:
+    """Get trace rows for a run from the trace table."""
+    rows = await expdb.execute(
+        text(
+            """
+            SELECT `repeat`, `fold`, `iteration`, `setup_string`, `evaluation`, `selected`
+            FROM `trace`
+            WHERE `run_id` = :run_id
+            """,
+        ),
+        parameters={"run_id": run_id},
+    )
+    return cast(
+        "Sequence[Row]",
+        rows.all(),
+    )
diff --git a/src/main.py b/src/main.py
@@ -15,6 +15,7 @@
 from routers.openml.evaluations import router as evaluationmeasures_router
 from routers.openml.flows import router as flows_router
 from routers.openml.qualities import router as qualities_router
+from routers.openml.runs import router as run_router
 from routers.openml.setups import router as setup_router
 from routers.openml.study import router as study_router
 from routers.openml.tasks import router as task_router
@@ -70,6 +71,7 @@ def create_api() -> FastAPI:
     app.include_router(flows_router)
     app.include_router(study_router)
     app.include_router(setup_router)
+    app.include_router(run_router)
     return app
 
 
diff --git a/src/routers/openml/runs.py b/src/routers/openml/runs.py
@@ -0,0 +1,44 @@
+"""Endpoints for run-related data."""
+
+from typing import Annotated
+
+from fastapi import APIRouter, Depends
+from sqlalchemy.ext.asyncio import AsyncConnection
+
+import database.runs
+from core.errors import RunNotFoundError, RunTraceNotFoundError
+from routers.dependencies import expdb_connection
+from schemas.runs import RunTrace, TraceIteration
+
+router = APIRouter(prefix="/run", tags=["run"])
+
+
+@router.get("/trace/{run_id}")
+async def get_run_trace(
+    run_id: int,
+    expdb: Annotated[AsyncConnection, Depends(expdb_connection)],
+) -> RunTrace:
+    """Get trace data for a run by run ID."""
+    if not await database.runs.exist(run_id, expdb):
+        msg = f"Run {run_id} not found."
+        raise RunNotFoundError(msg)
+
+    trace_rows = await database.runs.get_trace(run_id, expdb)
+    if not trace_rows:
+        msg = f"No trace found for run {run_id}."
+        raise RunTraceNotFoundError(msg)
+
+    return RunTrace(
+        run_id=run_id,
+        trace=[
+            TraceIteration(
+                repeat=row.repeat,
+                fold=row.fold,
+                iteration=row.iteration,
+                setup_string=row.setup_string,
+                evaluation=row.evaluation,
+                selected=row.selected,
+            )
+            for row in trace_rows
+        ],
+    )
diff --git a/src/schemas/runs.py b/src/schemas/runs.py
@@ -0,0 +1,21 @@
+"""Pydantic schemas for run-related endpoints."""
+
+from pydantic import BaseModel
+
+
+class TraceIteration(BaseModel):
+    """A single trace iteration for a run."""
+
+    repeat: int
+    fold: int
+    iteration: int
+    setup_string: str | None
+    evaluation: float | None
+    selected: str
+
+
+class RunTrace(BaseModel):
+    """Trace data for a run."""
+
+    run_id: int
+    trace: list[TraceIteration]
diff --git a/tests/routers/openml/migration/runs_migration_test.py b/tests/routers/openml/migration/runs_migration_test.py
@@ -0,0 +1,80 @@
+"""Migration tests comparing PHP and Python API responses for run trace endpoints."""
+
+import asyncio
+from http import HTTPStatus
+from typing import Any
+
+import deepdiff
+import httpx
+import pytest
+
+from core.conversions import nested_num_to_str
+
+_SERVER_RUNS = [*range(24, 40), *range(134, 140), 999_999_999]
+
+
+@pytest.mark.parametrize("run_id", _SERVER_RUNS)
+async def test_get_run_trace_equal(
+    run_id: int,
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+) -> None:
+    """Test that Python and PHP run trace responses are equivalent after normalization."""
+    py_response, php_response = await asyncio.gather(
+        py_api.get(f"/run/trace/{run_id}"),
+        php_api.get(f"/run/trace/{run_id}"),
+    )
+    if php_response.status_code == HTTPStatus.OK:
+        _assert_trace_response_success(py_response, php_response)
+        return
+
+    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
+    assert py_response.status_code == HTTPStatus.NOT_FOUND
+
+    php_error = php_response.json()["error"]
+    py_error = py_response.json()
+    assert php_error["code"] == py_error["code"]
+    if php_error["code"] == "571":
+        assert php_error["message"] == "Run not found."
+        assert py_error["detail"] == f"Run {run_id} not found."
+    elif php_error["code"] == "572":
+        assert php_error["message"] == "No successful trace associated with this run."
+        assert py_error["detail"] == f"No trace found for run {run_id}."
+    else:
+        msg = f"Unknown error code {php_error['code']} for run {run_id}."
+        raise AssertionError(msg)
+
+
+def _assert_trace_response_success(
+    py_response: httpx.Response, php_response: httpx.Response
+) -> None:
+    assert py_response.status_code == HTTPStatus.OK
+    assert php_response.status_code == HTTPStatus.OK
+
+    new_json = py_response.json()
+
+    # PHP nests response under "trace" key — match that structure
+    new_json = {"trace": new_json}
+
+    # PHP uses "trace_iteration" key, Python uses "trace"
+    new_json["trace"]["trace_iteration"] = new_json["trace"].pop("trace")
+
+    # PHP returns all numeric values as strings — normalize Python response
+    new_json = nested_num_to_str(new_json)
+
+    def _sort_trace(payload: dict[str, Any]) -> dict[str, Any]:
+        """Sort trace iterations by (repeat, fold, iteration) for order-sensitive comparison."""
+        copied = payload.copy()
+        copied["trace"] = copied["trace"].copy()
+        copied["trace"]["trace_iteration"] = sorted(
+            copied["trace"]["trace_iteration"],
+            key=lambda row: (int(row["repeat"]), int(row["fold"]), int(row["iteration"])),
+        )
+        return copied
+
+    differences = deepdiff.diff.DeepDiff(
+        _sort_trace(new_json),
+        _sort_trace(php_response.json()),
+        ignore_order=False,
+    )
+    assert not differences
diff --git a/tests/routers/openml/runs_test.py b/tests/routers/openml/runs_test.py
@@ -0,0 +1,49 @@
+"""Tests for the GET /run/trace/{run_id} endpoint."""
+
+from http import HTTPStatus
+
+import httpx
+import pytest
+
+from core.errors import RunNotFoundError, RunTraceNotFoundError
+
+
+@pytest.mark.parametrize("run_id", [34])
+async def test_get_run_trace_success(run_id: int, py_api: httpx.AsyncClient) -> None:
+    """Test that trace data is returned for a run that has trace entries."""
+    response = await py_api.get(f"/run/trace/{run_id}")
+    assert response.status_code == HTTPStatus.OK
+    body = response.json()
+    assert body["run_id"] == run_id
+    assert isinstance(body["trace"], list)
+    assert len(body["trace"]) > 0
+    first = body["trace"][0]
+    assert isinstance(first["repeat"], int)
+    assert isinstance(first["fold"], int)
+    assert isinstance(first["iteration"], int)
+    assert first["selected"] in ("true", "false")
+    assert first["evaluation"] is None or isinstance(first["evaluation"], float)
+
+
+@pytest.mark.parametrize("run_id", [24])
+async def test_get_run_trace_no_trace(run_id: int, py_api: httpx.AsyncClient) -> None:
+    """Test that 412 is returned for a run that exists but has no trace."""
+    response = await py_api.get(f"/run/trace/{run_id}")
+    assert response.status_code == HTTPStatus.NOT_FOUND
+    body = response.json()
+    assert body["code"] == "572"  # RunTraceNotFoundError code
+    assert body["type"] == RunTraceNotFoundError.uri
+    assert body["title"] == RunTraceNotFoundError.title
+    assert body["status"] == HTTPStatus.NOT_FOUND
+
+
+@pytest.mark.parametrize("run_id", [999999])
+async def test_get_run_trace_run_not_found(run_id: int, py_api: httpx.AsyncClient) -> None:
+    """Test that 412 is returned when the run does not exist."""
+    response = await py_api.get(f"/run/trace/{run_id}")
+    assert response.status_code == HTTPStatus.NOT_FOUND
+    body = response.json()
+    assert body["code"] == "571"  # RunNotFoundError code
+    assert body["type"] == RunNotFoundError.uri
+    assert body["title"] == RunNotFoundError.title
+    assert body["status"] == HTTPStatus.NOT_FOUND