Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions src/database/runs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from collections.abc import Sequence
from typing import cast

from sqlalchemy import Connection, Row, text


def get_run(run_id: int, expdb: Connection) -> Row | None:
"""Check if a run exists. Used to distinguish 571 (run not found) from 572 (no trace)."""
return expdb.execute(
text(
"""
SELECT rid
FROM run
WHERE rid = :run_id
""",
),
parameters={"run_id": run_id},
).one_or_none()


def get_trace(run_id: int, expdb: Connection) -> Sequence[Row]:
"""Fetch all trace iterations for a run, ordered as PHP does: repeat, fold, iteration."""
return cast(
"Sequence[Row]",
expdb.execute(
text(
"""
SELECT `repeat`, `fold`, `iteration`, setup_string, evaluation, selected
FROM trace
WHERE run_id = :run_id
ORDER BY `repeat` ASC, `fold` ASC, `iteration` ASC
""",
),
parameters={"run_id": run_id},
).all(),
)
2 changes: 2 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from routers.openml.evaluations import router as evaluationmeasures_router
from routers.openml.flows import router as flows_router
from routers.openml.qualities import router as qualities_router
from routers.openml.runs import router as runs_router
from routers.openml.study import router as study_router
from routers.openml.tasks import router as task_router
from routers.openml.tasktype import router as ttype_router
Expand Down Expand Up @@ -55,6 +56,7 @@ def create_api() -> FastAPI:
app.include_router(task_router)
app.include_router(flows_router)
app.include_router(study_router)
app.include_router(runs_router)
return app


Expand Down
56 changes: 56 additions & 0 deletions src/routers/openml/runs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from http import HTTPStatus
from typing import Annotated

from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import Connection

import database.runs
from routers.dependencies import expdb_connection
from schemas.runs import RunTrace, RunTraceResponse, TraceIteration

router = APIRouter(prefix="/runs", tags=["runs"])


@router.get("/trace/{run_id}")
def get_run_trace(
run_id: int,
expdb: Annotated[Connection, Depends(expdb_connection)],
) -> RunTraceResponse:
"""Get the optimization trace for a run.

Returns all hyperparameter configurations tried during tuning, their
evaluations, and whether each was selected. Mirrors PHP API behavior.
"""
# 571: run does not exist at all
if not database.runs.get_run(run_id, expdb):
raise HTTPException(
status_code=HTTPStatus.PRECONDITION_FAILED,
detail={"code": "571", "message": "Run not found."},
)

trace_rows = database.runs.get_trace(run_id, expdb)

# 572: run exists but has no trace data
if not trace_rows:
raise HTTPException(
status_code=HTTPStatus.PRECONDITION_FAILED,
detail={"code": "572", "message": "No trace found for run."},
)

return RunTraceResponse(
trace=RunTrace(
# Cast to str: PHP returns run_id and all iteration fields as strings.
run_id=str(run_id),
trace_iteration=[
TraceIteration(
repeat=str(row.repeat),
fold=str(row.fold),
iteration=str(row.iteration),
setup_string=row.setup_string,
evaluation=row.evaluation,
selected=row.selected,
Comment on lines +45 to +51
Copy link
Contributor

@coderabbitai coderabbitai bot Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# First, locate and examine the TraceIteration schema definition
fd -t f "runs.py" -path "*/schemas/*" | head -5

Repository: openml/server-api

Length of output: 229


🏁 Script executed:

# Also search for TraceIteration class definition
rg "class TraceIteration" --type py -A 10

Repository: openml/server-api

Length of output: 488


🏁 Script executed:

# Check the test file for evaluation assertions
fd -t f "runs_test.py" -path "*/tests/*"

Repository: openml/server-api

Length of output: 229


🏁 Script executed:

# Look for database schema/migrations related to trace tables
fd -t f -e "py" -path "*migration*" | grep -i trace | head -10

Repository: openml/server-api

Length of output: 229


🏁 Script executed:

# Examine the actual code at lines 45-51 in the file under review
cat -n src/routers/openml/runs.py | sed -n '40,55p'

Repository: openml/server-api

Length of output: 759


🏁 Script executed:

# Find and examine the test file
find . -path "*/tests/*" -name "*runs*test*" -type f

Repository: openml/server-api

Length of output: 96


🏁 Script executed:

# Check for database schema definitions (migrations or fixtures)
find . -path "*migration*" -o -path "*fixture*" -o -path "*schema*" | grep -i "\.py$\|\.sql$" | head -20

Repository: openml/server-api

Length of output: 688


🏁 Script executed:

# Search for evaluation field references in tests
rg "evaluation" tests/ -A 2 -B 2 | head -50

Repository: openml/server-api

Length of output: 3343


🏁 Script executed:

# Look for the database model/ORM definition for the runs trace table
rg "class.*Trace\|run.*trace\|trace.*iteration" src/ --type py -i | grep -i "class\|table" | head -10

Repository: openml/server-api

Length of output: 43


🏁 Script executed:

# Look for SQLAlchemy ORM models
find src -name "*.py" | xargs rg "class.*Trace|Table.*trace" -i | head -20

Repository: openml/server-api

Length of output: 215


🏁 Script executed:

# Search for database models or declarative base
rg "declarative_base\|@dataclass\|class.*Base\|from sqlalchemy" src/ | head -20

Repository: openml/server-api

Length of output: 43


🏁 Script executed:

# Look in src/db or similar for model definitions
find src -type d -name "*db*" -o -name "*model*" -o -name "*orm*"

Repository: openml/server-api

Length of output: 83


🏁 Script executed:

# Check if there's any initialization SQL or schema file
find . -name "*.sql" -o -name "*init*.py" -path "*/db/*" | head -10

Repository: openml/server-api

Length of output: 128


🏁 Script executed:

# Search for trace table definition
rg "trace_iteration\|selected.*ENUM\|evaluation.*DECIMAL\|evaluation.*Float" -i

Repository: openml/server-api

Length of output: 43


🏁 Script executed:

# Search for trace table definition in the SQL schema files
rg "trace_iteration|CREATE TABLE.*trace" docker/mysql/data/ -i -A 15

Repository: openml/server-api

Length of output: 43


🏁 Script executed:

# Look for the specific evaluation column definition
rg "evaluation" docker/mysql/data/ -B 5 -A 2 | head -60

Repository: openml/server-api

Length of output: 43


🏁 Script executed:

# Check the size of the SQL files first
wc -l docker/mysql/data/openml.sql docker/mysql/data/openml_expdb.sql

Repository: openml/server-api

Length of output: 139


🏁 Script executed:

# Search for trace in the SQL files
grep -n -i "trace" docker/mysql/data/openml.sql docker/mysql/data/openml_expdb.sql | head -20

Repository: openml/server-api

Length of output: 43


🏁 Script executed:

# Look for run related tables
grep -n -i "run" docker/mysql/data/openml.sql docker/mysql/data/openml_expdb.sql | grep -i "create table" | head -10

Repository: openml/server-api

Length of output: 43


🏁 Script executed:

# Read the SQL files directly
cat docker/mysql/data/openml.sql
cat docker/mysql/data/openml_expdb.sql

Repository: openml/server-api

Length of output: 158


🏁 Script executed:

# Search for where the trace rows are queried - this will show us the table structure
rg "trace.*row\|select.*trace\|from.*trace" src/ -i -B 2 -A 2 | head -40

Repository: openml/server-api

Length of output: 43


🏁 Script executed:

# Show more context from runs.py to find where trace_rows comes from
cat -n src/routers/openml/runs.py | head -60

Repository: openml/server-api

Length of output: 2277


🏁 Script executed:

# Search for the query that selects trace_rows
rg "trace_rows\|trace.*select" src/routers/openml/runs.py -B 5 -A 5

Repository: openml/server-api

Length of output: 43


🏁 Script executed:

# Find the database.runs module
find src -path "*database*runs*" -name "*.py"

Repository: openml/server-api

Length of output: 81


🏁 Script executed:

# If not found, check the structure of database directory
find src -type d -name "database" -o -name "db"

Repository: openml/server-api

Length of output: 73


🏁 Script executed:

# Read the database/runs.py file
cat src/database/runs.py

Repository: openml/server-api

Length of output: 1128


🏁 Script executed:

# Look for any migrations or table definitions mentioning trace table
find . -name "*.py" -path "*migration*" | head -20

Repository: openml/server-api

Length of output: 401


🏁 Script executed:

# Search for trace table in all Python files
rg "trace" src/ -t py -B 2 -A 2 | grep -i "decimal\|float\|varchar\|numeric" | head -20

Repository: openml/server-api

Length of output: 43


🏁 Script executed:

# Check if there are any comments or docstrings about the trace table
rg "trace.*table\|evaluation.*column" src/ -i -B 3 -A 3

Repository: openml/server-api

Length of output: 43


🏁 Script executed:

# Let's check the test file to see if there's any fixture or test data that shows the type
cat -n tests/routers/openml/runs_test.py | head -100

Repository: openml/server-api

Length of output: 3383


Stringify evaluation before building TraceIteration.

TraceIteration.evaluation is declared as str, and tests explicitly assert it must be a string, but line 50 forwards the raw DB value. Lines 46–48 cast repeat, fold, and iteration with str() for PHP compatibility; evaluation should follow the same pattern. If that column is numeric in the DB, the current code relies on implicit Pydantic coercion instead of explicit serialization.

🛠️ Proposed fix
                 TraceIteration(
                     repeat=str(row.repeat),
                     fold=str(row.fold),
                     iteration=str(row.iteration),
                     setup_string=row.setup_string,
-                    evaluation=row.evaluation,
+                    evaluation=str(row.evaluation),
                     selected=row.selected,
                 )
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/routers/openml/runs.py` around lines 45 - 51, When constructing the
TraceIteration instance, explicitly cast the database value for evaluation to a
string instead of forwarding the raw value; update the TraceIteration(...) call
(the evaluation argument alongside repeat, fold, iteration) to use
str(row.evaluation) so TraceIteration.evaluation is always a string and matches
the model/tests.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

evaluation is declared as varchar(265) in the database and SQLAlchemy returns it as a plain string — verified via HEX(LEFT(evaluation,4)). Explicit str() cast is unnecessary here, same as selected

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@saathviksheerla, thanks for verifying that directly — if evaluation is varchar(265) and SQLAlchemy already returns it as a plain string, the explicit str() cast is indeed redundant. My suggestion was precautionary without confirmed knowledge of the column type; your hex verification settles it. No change needed here.


✏️ Learnings added
Learnt from: saathviksheerla
Repo: openml/server-api PR: 267
File: src/routers/openml/runs.py:45-51
Timestamp: 2026-03-08T07:59:51.515Z
Learning: In the openml/server-api Python codebase (src/routers/openml/runs.py), the `evaluation` column on the run trace table is declared as `varchar(265)` in the database. SQLAlchemy returns it as a plain Python string, so no explicit `str()` cast is required when constructing `TraceIteration(evaluation=row.evaluation, ...)`. This is analogous to `selected` (MySQL ENUM) which is also passed through as-is.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.


🧠 Learnings used
Learnt from: saathviksheerla
Repo: openml/server-api PR: 267
File: src/routers/openml/runs.py:0-0
Timestamp: 2026-03-08T07:45:07.969Z
Learning: In the openml/server-api Python codebase, non-boolean ENUM values stored as strings coming from the database should be treated as their literal string values, not coerced via truthiness checks. Ensure that fields like row.selected (with MySQL ENUM('true','false')) are passed through as the exact string (e.g., 'true' or 'false') and validated by Pydantic using Literal['true', 'false'] at the schema boundary. Do not apply a ternary like 'true' if row.selected else 'false'. Review places where the value is transformed before response: rely on the string value and let Pydantic enforce the literal type, and adjust any serialization if necessary to avoid implicit boolean conversion.

)
for row in trace_rows
],
),
)
22 changes: 22 additions & 0 deletions src/schemas/runs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from typing import Literal

from pydantic import BaseModel


class TraceIteration(BaseModel):
repeat: str
fold: str
iteration: str
setup_string: str
evaluation: str
selected: Literal["true", "false"]


class RunTrace(BaseModel):
run_id: str
trace_iteration: list[TraceIteration]


# Wraps RunTrace in {"trace": {...}} to match PHP API response structure.
class RunTraceResponse(BaseModel):
trace: RunTrace
71 changes: 71 additions & 0 deletions tests/routers/openml/runs_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from http import HTTPStatus

import pytest
from starlette.testclient import TestClient


@pytest.mark.parametrize("run_id", [34])
def test_get_run_trace(py_api: TestClient, run_id: int) -> None:
response = py_api.get(f"/runs/trace/{run_id}")
assert response.status_code == HTTPStatus.OK

body = response.json()
assert "trace" in body

trace = body["trace"]
assert trace["run_id"] == str(run_id)
assert "trace_iteration" in trace
assert len(trace["trace_iteration"]) > 0

# Verify structure and types of each iteration — PHP returns all fields as strings
for iteration in trace["trace_iteration"]:
assert "repeat" in iteration
assert "fold" in iteration
assert "iteration" in iteration
assert "setup_string" in iteration
assert "evaluation" in iteration
assert "selected" in iteration
assert isinstance(iteration["repeat"], str)
assert isinstance(iteration["fold"], str)
assert isinstance(iteration["iteration"], str)
assert isinstance(iteration["setup_string"], str)
assert isinstance(iteration["evaluation"], str)
assert iteration["selected"] in ("true", "false")


def test_get_run_trace_ordering(py_api: TestClient) -> None:
"""Trace iterations must be ordered by repeat, fold, iteration ASC — matches PHP."""
response = py_api.get("/runs/trace/34")
assert response.status_code == HTTPStatus.OK

iterations = response.json()["trace"]["trace_iteration"]
keys = [(int(i["repeat"]), int(i["fold"]), int(i["iteration"])) for i in iterations]
assert keys == sorted(keys)


def test_get_run_trace_run_not_found(py_api: TestClient) -> None:
"""Run does not exist at all — expect error 571."""
response = py_api.get("/runs/trace/999999")
assert response.status_code == HTTPStatus.PRECONDITION_FAILED
assert response.json()["detail"]["code"] == "571"


def test_get_run_trace_negative_id(py_api: TestClient) -> None:
"""Negative run_id can never exist — expect error 571."""
response = py_api.get("/runs/trace/-1")
assert response.status_code == HTTPStatus.PRECONDITION_FAILED
assert response.json()["detail"]["code"] == "571"


def test_get_run_trace_invalid_id(py_api: TestClient) -> None:
"""Non-integer run_id — FastAPI should reject with 422 before hitting our handler."""
response = py_api.get("/runs/trace/abc")
assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY


def test_get_run_trace_no_trace(py_api: TestClient) -> None:
"""Run exists but has no trace data — expect error 572.
Run 24 exists in the test DB but has no trace rows."""
response = py_api.get("/runs/trace/24")
assert response.status_code == HTTPStatus.PRECONDITION_FAILED
assert response.json()["detail"]["code"] == "572"
Loading