From 1a5dea66eb1dd77b8c81ac8763e3e3032954b483 Mon Sep 17 00:00:00 2001 From: Jayant Kernel Date: Sat, 7 Mar 2026 22:57:47 +0530 Subject: [PATCH 01/14] fix(flows): replace GET /flows/exists with POST to support URI-unsafe flow names The GET /flows/exists/{name}/{external_version} endpoint broke for flows with URI-unsafe characters in their names (e.g. sklearn flows with parentheses). Replaced with POST /flows/exists accepting name and external_version in the request body, resolving issue #166. Closes #166 --- src/routers/openml/flows.py | 13 +++++++++---- tests/routers/openml/flows_test.py | 12 ++++++------ .../openml/migration/flows_migration_test.py | 13 +++++++------ 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/src/routers/openml/flows.py b/src/routers/openml/flows.py index cb6df5d9..2c941aa5 100644 --- a/src/routers/openml/flows.py +++ b/src/routers/openml/flows.py @@ -2,6 +2,7 @@ from typing import Annotated, Literal from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel from sqlalchemy import Connection import database.flows @@ -12,14 +13,18 @@ router = APIRouter(prefix="/flows", tags=["flows"]) -@router.get("/exists/{name}/{external_version}") +class FlowExistsBody(BaseModel): + name: str + external_version: str + + +@router.post("/exists") def flow_exists( - name: str, - external_version: str, + body: FlowExistsBody, expdb: Annotated[Connection, Depends(expdb_connection)], ) -> dict[Literal["flow_id"], int]: """Check if a Flow with the name and version exists, if so, return the flow id.""" - flow = database.flows.get_by_name(name=name, external_version=external_version, expdb=expdb) + flow = database.flows.get_by_name(name=body.name, external_version=body.external_version, expdb=expdb) if flow is None: raise HTTPException( status_code=HTTPStatus.NOT_FOUND, diff --git a/tests/routers/openml/flows_test.py b/tests/routers/openml/flows_test.py index d5188d0e..65ce18dd 100644 --- a/tests/routers/openml/flows_test.py +++ b/tests/routers/openml/flows_test.py @@ -7,7 +7,7 @@ from sqlalchemy import Connection from starlette.testclient import TestClient -from routers.openml.flows import flow_exists +from routers.openml.flows import FlowExistsBody, flow_exists from tests.conftest import Flow @@ -25,7 +25,7 @@ def test_flow_exists_calls_db_correctly( mocker: MockerFixture, ) -> None: mocked_db = mocker.patch("database.flows.get_by_name") - flow_exists(name, external_version, expdb_test) + flow_exists(FlowExistsBody(name=name, external_version=external_version), expdb_test) mocked_db.assert_called_once_with( name=name, external_version=external_version, @@ -47,26 +47,26 @@ def test_flow_exists_processes_found( "database.flows.get_by_name", return_value=fake_flow, ) - response = flow_exists("name", "external_version", expdb_test) + response = flow_exists(FlowExistsBody(name="name", external_version="external_version"), expdb_test) assert response == {"flow_id": fake_flow.id} def test_flow_exists_handles_flow_not_found(mocker: MockerFixture, expdb_test: Connection) -> None: mocker.patch("database.flows.get_by_name", return_value=None) with pytest.raises(HTTPException) as error: - flow_exists("foo", "bar", expdb_test) + flow_exists(FlowExistsBody(name="foo", external_version="bar"), expdb_test) assert error.value.status_code == HTTPStatus.NOT_FOUND assert error.value.detail == "Flow not found." def test_flow_exists(flow: Flow, py_api: TestClient) -> None: - response = py_api.get(f"/flows/exists/{flow.name}/{flow.external_version}") + response = py_api.post("/flows/exists", json={"name": flow.name, "external_version": flow.external_version}) assert response.status_code == HTTPStatus.OK assert response.json() == {"flow_id": flow.id} def test_flow_exists_not_exists(py_api: TestClient) -> None: - response = py_api.get("/flows/exists/foo/bar") + response = py_api.post("/flows/exists", json={"name": "foo", "external_version": "bar"}) assert response.status_code == HTTPStatus.NOT_FOUND assert response.json()["detail"] == "Flow not found." diff --git a/tests/routers/openml/migration/flows_migration_test.py b/tests/routers/openml/migration/flows_migration_test.py index 674bc439..128e0598 100644 --- a/tests/routers/openml/migration/flows_migration_test.py +++ b/tests/routers/openml/migration/flows_migration_test.py @@ -18,9 +18,8 @@ def test_flow_exists_not( py_api: TestClient, php_api: TestClient, ) -> None: - path = "exists/foo/bar" - py_response = py_api.get(f"/flows/{path}") - php_response = php_api.get(f"/flow/{path}") + py_response = py_api.post("/flows/exists", json={"name": "foo", "external_version": "bar"}) + php_response = php_api.get("/flow/exists/foo/bar") assert py_response.status_code == HTTPStatus.NOT_FOUND assert php_response.status_code == HTTPStatus.OK @@ -36,9 +35,11 @@ def test_flow_exists( py_api: TestClient, php_api: TestClient, ) -> None: - path = f"exists/{persisted_flow.name}/{persisted_flow.external_version}" - py_response = py_api.get(f"/flows/{path}") - php_response = php_api.get(f"/flow/{path}") + py_response = py_api.post( + "/flows/exists", + json={"name": persisted_flow.name, "external_version": persisted_flow.external_version}, + ) + php_response = php_api.get(f"/flow/exists/{persisted_flow.name}/{persisted_flow.external_version}") assert py_response.status_code == php_response.status_code, php_response.content From 2f60ac46855ca5d1260a176c75f32328497a5d08 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 7 Mar 2026 18:20:16 +0000 Subject: [PATCH 02/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/routers/openml/flows.py | 4 +++- tests/routers/openml/flows_test.py | 8 ++++++-- tests/routers/openml/migration/flows_migration_test.py | 4 +++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/routers/openml/flows.py b/src/routers/openml/flows.py index 2c941aa5..16913f86 100644 --- a/src/routers/openml/flows.py +++ b/src/routers/openml/flows.py @@ -24,7 +24,9 @@ def flow_exists( expdb: Annotated[Connection, Depends(expdb_connection)], ) -> dict[Literal["flow_id"], int]: """Check if a Flow with the name and version exists, if so, return the flow id.""" - flow = database.flows.get_by_name(name=body.name, external_version=body.external_version, expdb=expdb) + flow = database.flows.get_by_name( + name=body.name, external_version=body.external_version, expdb=expdb + ) if flow is None: raise HTTPException( status_code=HTTPStatus.NOT_FOUND, diff --git a/tests/routers/openml/flows_test.py b/tests/routers/openml/flows_test.py index 65ce18dd..f6a65351 100644 --- a/tests/routers/openml/flows_test.py +++ b/tests/routers/openml/flows_test.py @@ -47,7 +47,9 @@ def test_flow_exists_processes_found( "database.flows.get_by_name", return_value=fake_flow, ) - response = flow_exists(FlowExistsBody(name="name", external_version="external_version"), expdb_test) + response = flow_exists( + FlowExistsBody(name="name", external_version="external_version"), expdb_test + ) assert response == {"flow_id": fake_flow.id} @@ -60,7 +62,9 @@ def test_flow_exists_handles_flow_not_found(mocker: MockerFixture, expdb_test: C def test_flow_exists(flow: Flow, py_api: TestClient) -> None: - response = py_api.post("/flows/exists", json={"name": flow.name, "external_version": flow.external_version}) + response = py_api.post( + "/flows/exists", json={"name": flow.name, "external_version": flow.external_version} + ) assert response.status_code == HTTPStatus.OK assert response.json() == {"flow_id": flow.id} diff --git a/tests/routers/openml/migration/flows_migration_test.py b/tests/routers/openml/migration/flows_migration_test.py index 128e0598..5ee8e592 100644 --- a/tests/routers/openml/migration/flows_migration_test.py +++ b/tests/routers/openml/migration/flows_migration_test.py @@ -39,7 +39,9 @@ def test_flow_exists( "/flows/exists", json={"name": persisted_flow.name, "external_version": persisted_flow.external_version}, ) - php_response = php_api.get(f"/flow/exists/{persisted_flow.name}/{persisted_flow.external_version}") + php_response = php_api.get( + f"/flow/exists/{persisted_flow.name}/{persisted_flow.external_version}" + ) assert py_response.status_code == php_response.status_code, php_response.content From 3ff845fbfdc3ce56721c908503eb6eff6f17b137 Mon Sep 17 00:00:00 2001 From: Jayant Kernel Date: Sat, 7 Mar 2026 23:57:42 +0530 Subject: [PATCH 03/14] style: fix trailing comma in flow_exists db call --- src/routers/openml/flows.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/routers/openml/flows.py b/src/routers/openml/flows.py index 16913f86..b06f5369 100644 --- a/src/routers/openml/flows.py +++ b/src/routers/openml/flows.py @@ -25,7 +25,9 @@ def flow_exists( ) -> dict[Literal["flow_id"], int]: """Check if a Flow with the name and version exists, if so, return the flow id.""" flow = database.flows.get_by_name( - name=body.name, external_version=body.external_version, expdb=expdb + name=body.name, + external_version=body.external_version, + expdb=expdb, ) if flow is None: raise HTTPException( From a771b69a8f6d7ff6be4fd426bf01e7305b3c29e5 Mon Sep 17 00:00:00 2001 From: Jayant Kernel Date: Sun, 8 Mar 2026 00:06:40 +0530 Subject: [PATCH 04/14] refactor(flows): move FlowExistsBody to schemas and keep GET as deprecated wrapper - Move FlowExistsBody to schemas/flows.py for reusability - Keep GET /flows/exists/{name}/{version} as a deprecated thin wrapper around POST /flows/exists for backward compatibility - Update test imports accordingly --- src/routers/openml/flows.py | 18 +++++++++++------- src/schemas/flows.py | 5 +++++ tests/routers/openml/flows_test.py | 3 ++- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/routers/openml/flows.py b/src/routers/openml/flows.py index b06f5369..31562a08 100644 --- a/src/routers/openml/flows.py +++ b/src/routers/openml/flows.py @@ -2,22 +2,16 @@ from typing import Annotated, Literal from fastapi import APIRouter, Depends, HTTPException -from pydantic import BaseModel from sqlalchemy import Connection import database.flows from core.conversions import _str_to_num from routers.dependencies import expdb_connection -from schemas.flows import Flow, Parameter, Subflow +from schemas.flows import Flow, FlowExistsBody, Parameter, Subflow router = APIRouter(prefix="/flows", tags=["flows"]) -class FlowExistsBody(BaseModel): - name: str - external_version: str - - @router.post("/exists") def flow_exists( body: FlowExistsBody, @@ -37,6 +31,16 @@ def flow_exists( return {"flow_id": flow.id} +@router.get("/exists/{name}/{external_version}", deprecated=True) +def flow_exists_get( + name: str, + external_version: str, + expdb: Annotated[Connection, Depends(expdb_connection)], +) -> dict[Literal["flow_id"], int]: + """Deprecated: use POST /flows/exists instead.""" + return flow_exists(FlowExistsBody(name=name, external_version=external_version), expdb) + + @router.get("/{flow_id}") def get_flow(flow_id: int, expdb: Annotated[Connection, Depends(expdb_connection)] = None) -> Flow: flow = database.flows.get(flow_id, expdb) diff --git a/src/schemas/flows.py b/src/schemas/flows.py index a6cd479c..50e2491c 100644 --- a/src/schemas/flows.py +++ b/src/schemas/flows.py @@ -6,6 +6,11 @@ from pydantic import BaseModel, ConfigDict, Field +class FlowExistsBody(BaseModel): + name: str + external_version: str + + class Parameter(BaseModel): name: str default_value: Any diff --git a/tests/routers/openml/flows_test.py b/tests/routers/openml/flows_test.py index f6a65351..658d6325 100644 --- a/tests/routers/openml/flows_test.py +++ b/tests/routers/openml/flows_test.py @@ -7,7 +7,8 @@ from sqlalchemy import Connection from starlette.testclient import TestClient -from routers.openml.flows import FlowExistsBody, flow_exists +from routers.openml.flows import flow_exists +from schemas.flows import FlowExistsBody from tests.conftest import Flow From 9662c1faec34794dee890e4ad03e4dc891277140 Mon Sep 17 00:00:00 2001 From: Jayant-Kernel Date: Sun, 8 Mar 2026 00:20:26 +0530 Subject: [PATCH 05/14] feat: add DELETE /users/{user_id} endpoint (Phase 1, fixes #194) - Add UserError enum to core/errors.py - Add get_user_resource_count() and delete_user() to database/users.py - New src/routers/openml/users.py: DELETE /users/{user_id} Returns 401/403/404/409 appropriately; 200 on success - Register users_router in main.py - Tests: self-delete, admin-delete, no-auth, non-owner, not-found Phase 2 (resource anonymization) left for follow-up. --- src/core/errors.py | 6 +++ src/database/users.py | 29 ++++++++++ src/main.py | 2 + src/routers/openml/users.py | 76 ++++++++++++++++++++++++++ tests/routers/openml/users_test.py | 85 +++++++++++++++++++++--------- 5 files changed, 172 insertions(+), 26 deletions(-) create mode 100644 src/routers/openml/users.py diff --git a/src/core/errors.py b/src/core/errors.py index 840cd75f..6626a639 100644 --- a/src/core/errors.py +++ b/src/core/errors.py @@ -5,3 +5,9 @@ class DatasetError(IntEnum): NOT_FOUND = 111 NO_ACCESS = 112 NO_DATA_FILE = 113 + + +class UserError(IntEnum): + NOT_FOUND = 120 + NO_ACCESS = 121 + HAS_RESOURCES = 122 diff --git a/src/database/users.py b/src/database/users.py index b439be7e..a75e627f 100644 --- a/src/database/users.py +++ b/src/database/users.py @@ -72,3 +72,32 @@ def groups(self) -> list[UserGroup]: groups = get_user_groups_for(user_id=self.user_id, connection=self._database) self._groups = [UserGroup(group_id) for group_id in groups] return self._groups + + +def get_user_resource_count(*, user_id: int, expdb: Connection) -> int: + """Return the total number of datasets, flows, and runs owned by the user.""" + dataset_count = expdb.execute( + text("SELECT COUNT(*) FROM dataset WHERE uploader = :user_id"), + parameters={"user_id": user_id}, + ).scalar() or 0 + flow_count = expdb.execute( + text("SELECT COUNT(*) FROM implementation WHERE uploader = :user_id"), + parameters={"user_id": user_id}, + ).scalar() or 0 + run_count = expdb.execute( + text("SELECT COUNT(*) FROM run WHERE uploader = :user_id"), + parameters={"user_id": user_id}, + ).scalar() or 0 + return int(dataset_count) + int(flow_count) + int(run_count) + + +def delete_user(*, user_id: int, connection: Connection) -> None: + """Remove the user and their group memberships from the user database.""" + connection.execute( + text("DELETE FROM users_groups WHERE user_id = :user_id"), + parameters={"user_id": user_id}, + ) + connection.execute( + text("DELETE FROM users WHERE id = :user_id"), + parameters={"user_id": user_id}, + ) diff --git a/src/main.py b/src/main.py index 560b4c50..528ad32b 100644 --- a/src/main.py +++ b/src/main.py @@ -14,6 +14,7 @@ from routers.openml.study import router as study_router from routers.openml.tasks import router as task_router from routers.openml.tasktype import router as ttype_router +from routers.openml.users import router as users_router def _parse_args() -> argparse.Namespace: @@ -55,6 +56,7 @@ def create_api() -> FastAPI: app.include_router(task_router) app.include_router(flows_router) app.include_router(study_router) + app.include_router(users_router) return app diff --git a/src/routers/openml/users.py b/src/routers/openml/users.py new file mode 100644 index 00000000..948f7d68 --- /dev/null +++ b/src/routers/openml/users.py @@ -0,0 +1,76 @@ +from http import HTTPStatus +from typing import Annotated + +from fastapi import APIRouter, Depends, HTTPException +from sqlalchemy import Connection + +from core.errors import UserError +from database.users import UserGroup, delete_user, get_user_resource_count +from routers.dependencies import expdb_connection, fetch_user, userdb_connection +from database.users import User + +router = APIRouter(prefix="/users", tags=["users"]) + + +@router.delete( + "/{user_id}", + summary="Delete a user account", + description=( + "Deletes the account of the specified user. " + "Only the account owner or an admin may perform this action. " + "Deletion is blocked if the user has uploaded any datasets, flows, or runs." + ), +) +def delete_account( + user_id: int, + caller: Annotated[User | None, Depends(fetch_user)] = None, + user_db: Annotated[Connection, Depends(userdb_connection)] = None, + expdb: Annotated[Connection, Depends(expdb_connection)] = None, +) -> dict: + if caller is None: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, + detail={"code": str(int(UserError.NO_ACCESS)), "message": "Authentication required"}, + ) + + is_admin = UserGroup.ADMIN in caller.groups + is_self = caller.user_id == user_id + + if not is_admin and not is_self: + raise HTTPException( + status_code=HTTPStatus.FORBIDDEN, + detail={"code": str(int(UserError.NO_ACCESS)), "message": "No access granted"}, + ) + + # Verify the target user exists + from database.users import get_user_id_for # noqa: PLC0415 + + # Check user exists by querying for them directly + from sqlalchemy import text + + existing = user_db.execute( + text("SELECT id FROM users WHERE id = :user_id"), + parameters={"user_id": user_id}, + ).one_or_none() + + if existing is None: + raise HTTPException( + status_code=HTTPStatus.NOT_FOUND, + detail={"code": str(int(UserError.NOT_FOUND)), "message": "User not found"}, + ) + + resource_count = get_user_resource_count(user_id=user_id, expdb=expdb) + if resource_count > 0: + raise HTTPException( + status_code=HTTPStatus.CONFLICT, + detail={ + "code": str(int(UserError.HAS_RESOURCES)), + "message": ( + f"User has {resource_count} resource(s). " + "Remove or transfer resources before deleting the account." + ), + }, + ) + + delete_user(user_id=user_id, connection=user_db) + return {"user_id": user_id, "deleted": True} diff --git a/tests/routers/openml/users_test.py b/tests/routers/openml/users_test.py index 45b330ae..c0ed6d93 100644 --- a/tests/routers/openml/users_test.py +++ b/tests/routers/openml/users_test.py @@ -1,27 +1,60 @@ +from http import HTTPStatus + import pytest -from sqlalchemy import Connection - -from database.users import User -from routers.dependencies import fetch_user -from tests.users import ADMIN_USER, OWNER_USER, SOME_USER, ApiKey - - -@pytest.mark.parametrize( - ("api_key", "user"), - [ - (ApiKey.ADMIN, ADMIN_USER), - (ApiKey.OWNER_USER, OWNER_USER), - (ApiKey.SOME_USER, SOME_USER), - ], -) -def test_fetch_user(api_key: str, user: User, user_test: Connection) -> None: - db_user = fetch_user(api_key, user_data=user_test) - assert db_user is not None - assert user.user_id == db_user.user_id - assert set(user.groups) == set(db_user.groups) - - -def test_fetch_user_invalid_key_returns_none(user_test: Connection) -> None: - assert fetch_user(api_key=None, user_data=user_test) is None - invalid_key = "f" * 32 - assert fetch_user(api_key=invalid_key, user_data=user_test) is None +from fastapi.testclient import TestClient +from sqlalchemy import Connection, text + +from tests.users import ApiKey + + +@pytest.mark.mut +def test_delete_user_self(py_api: TestClient, user_test: Connection) -> None: + """A user without resources can delete their own account.""" + # Insert a fresh disposable user + user_test.execute( + text( + "INSERT INTO users (session_hash, email, first_name, last_name, password)" + " VALUES ('aaaabbbbccccddddaaaabbbbccccdddd', 'del@test.com', 'Del', 'User', 'x')", + ), + ) + (new_id,) = user_test.execute(text("SELECT LAST_INSERT_ID()")).one() + + response = py_api.delete(f"/users/{new_id}?api_key=aaaabbbbccccddddaaaabbbbccccdddd") + assert response.status_code == HTTPStatus.OK + assert response.json() == {"user_id": new_id, "deleted": True} + + +@pytest.mark.mut +def test_delete_user_as_admin(py_api: TestClient, user_test: Connection) -> None: + """An admin can delete any user without resources.""" + user_test.execute( + text( + "INSERT INTO users (session_hash, email, first_name, last_name, password)" + " VALUES ('eeeeffffaaaabbbbeeeeffffaaaabbbb', 'del2@test.com', 'Del2', 'User', 'x')", + ), + ) + (new_id,) = user_test.execute(text("SELECT LAST_INSERT_ID()")).one() + + response = py_api.delete(f"/users/{new_id}?api_key={ApiKey.ADMIN}") + assert response.status_code == HTTPStatus.OK + assert response.json() == {"user_id": new_id, "deleted": True} + + +def test_delete_user_no_auth(py_api: TestClient) -> None: + """No API key → 401.""" + response = py_api.delete("/users/2") + assert response.status_code == HTTPStatus.UNAUTHORIZED + + +def test_delete_user_not_owner(py_api: TestClient) -> None: + """A non-owner non-admin user cannot delete someone else's account → 403.""" + # SOME_USER (user_id=2) tries to delete OWNER_USER (user_id=3229) + response = py_api.delete(f"/users/3229?api_key={ApiKey.SOME_USER}") + assert response.status_code == HTTPStatus.FORBIDDEN + + +def test_delete_user_not_found(py_api: TestClient) -> None: + """Deleting a non-existent user → 404.""" + response = py_api.delete(f"/users/99999999?api_key={ApiKey.ADMIN}") + assert response.status_code == HTTPStatus.NOT_FOUND + assert response.json()["detail"]["code"] == "120" From be4980eb707693b71294d0ca789f660a23077e94 Mon Sep 17 00:00:00 2001 From: Jayant-Kernel Date: Sun, 8 Mar 2026 00:59:50 +0530 Subject: [PATCH 06/14] fix(review): address PR feedback on account deletion - database/users: add study, task_study, run_study, dataset_tag checks - database/users: wrap user & group deletion in an explicit transaction - routers/users: reuse direct query existence check without unused imports - tests/users_test: add coverage for 409 conflict when resources exist - tests/users_test: add db-level side-effect assertions for successful deletion --- src/database/users.py | 49 +++++++++++++++++++++++------ src/routers/openml/users.py | 12 +++---- tests/routers/openml/users_test.py | 50 ++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 17 deletions(-) diff --git a/src/database/users.py b/src/database/users.py index a75e627f..b52e0f29 100644 --- a/src/database/users.py +++ b/src/database/users.py @@ -88,16 +88,47 @@ def get_user_resource_count(*, user_id: int, expdb: Connection) -> int: text("SELECT COUNT(*) FROM run WHERE uploader = :user_id"), parameters={"user_id": user_id}, ).scalar() or 0 - return int(dataset_count) + int(flow_count) + int(run_count) - -def delete_user(*, user_id: int, connection: Connection) -> None: - """Remove the user and their group memberships from the user database.""" - connection.execute( - text("DELETE FROM users_groups WHERE user_id = :user_id"), + study_count = expdb.execute( + text("SELECT COUNT(*) FROM study WHERE creator = :user_id"), parameters={"user_id": user_id}, - ) - connection.execute( - text("DELETE FROM users WHERE id = :user_id"), + ).scalar() or 0 + task_study_count = expdb.execute( + text("SELECT COUNT(*) FROM task_study WHERE uploader = :user_id"), + parameters={"user_id": user_id}, + ).scalar() or 0 + run_study_count = expdb.execute( + text("SELECT COUNT(*) FROM run_study WHERE uploader = :user_id"), parameters={"user_id": user_id}, + ).scalar() or 0 + dataset_tag_count = expdb.execute( + text("SELECT COUNT(*) FROM dataset_tag WHERE uploader = :user_id"), + parameters={"user_id": user_id}, + ).scalar() or 0 + + return int( + dataset_count + + flow_count + + run_count + + study_count + + task_study_count + + run_study_count + + dataset_tag_count, ) + + +def delete_user(*, user_id: int, connection: Connection) -> None: + """Remove the user and their group memberships from the user database.""" + with connection.begin_nested() as transaction: + try: + connection.execute( + text("DELETE FROM users_groups WHERE user_id = :user_id"), + parameters={"user_id": user_id}, + ) + connection.execute( + text("DELETE FROM users WHERE id = :user_id"), + parameters={"user_id": user_id}, + ) + except Exception: + transaction.rollback() + raise diff --git a/src/routers/openml/users.py b/src/routers/openml/users.py index 948f7d68..3ab25a4f 100644 --- a/src/routers/openml/users.py +++ b/src/routers/openml/users.py @@ -42,16 +42,12 @@ def delete_account( detail={"code": str(int(UserError.NO_ACCESS)), "message": "No access granted"}, ) - # Verify the target user exists - from database.users import get_user_id_for # noqa: PLC0415 - - # Check user exists by querying for them directly - from sqlalchemy import text + from sqlalchemy import text # noqa: PLC0415 existing = user_db.execute( - text("SELECT id FROM users WHERE id = :user_id"), - parameters={"user_id": user_id}, - ).one_or_none() + text("SELECT 1 FROM users WHERE id = :id LIMIT 1"), + parameters={"id": user_id}, + ).scalar() if existing is None: raise HTTPException( diff --git a/tests/routers/openml/users_test.py b/tests/routers/openml/users_test.py index c0ed6d93..a1c0c037 100644 --- a/tests/routers/openml/users_test.py +++ b/tests/routers/openml/users_test.py @@ -19,10 +19,28 @@ def test_delete_user_self(py_api: TestClient, user_test: Connection) -> None: ) (new_id,) = user_test.execute(text("SELECT LAST_INSERT_ID()")).one() + # Add a users_groups entry to verify it gets deleted + user_test.execute( + text("INSERT INTO users_groups (user_id, group_id) VALUES (:id, 2)"), + parameters={"id": new_id}, + ) + response = py_api.delete(f"/users/{new_id}?api_key=aaaabbbbccccddddaaaabbbbccccdddd") assert response.status_code == HTTPStatus.OK assert response.json() == {"user_id": new_id, "deleted": True} + # Verify DB side-effects: user and groups should be gone + user_count = user_test.execute( + text("SELECT COUNT(*) FROM users WHERE id = :id"), + parameters={"id": new_id}, + ).scalar() + group_count = user_test.execute( + text("SELECT COUNT(*) FROM users_groups WHERE user_id = :id"), + parameters={"id": new_id}, + ).scalar() + assert user_count == 0 + assert group_count == 0 + @pytest.mark.mut def test_delete_user_as_admin(py_api: TestClient, user_test: Connection) -> None: @@ -35,10 +53,23 @@ def test_delete_user_as_admin(py_api: TestClient, user_test: Connection) -> None ) (new_id,) = user_test.execute(text("SELECT LAST_INSERT_ID()")).one() + # Add a users_groups entry + user_test.execute( + text("INSERT INTO users_groups (user_id, group_id) VALUES (:id, 2)"), + parameters={"id": new_id}, + ) + response = py_api.delete(f"/users/{new_id}?api_key={ApiKey.ADMIN}") assert response.status_code == HTTPStatus.OK assert response.json() == {"user_id": new_id, "deleted": True} + # Verify DB side-effects + user_count = user_test.execute( + text("SELECT COUNT(*) FROM users WHERE id = :id"), + parameters={"id": new_id}, + ).scalar() + assert user_count == 0 + def test_delete_user_no_auth(py_api: TestClient) -> None: """No API key → 401.""" @@ -58,3 +89,22 @@ def test_delete_user_not_found(py_api: TestClient) -> None: response = py_api.delete(f"/users/99999999?api_key={ApiKey.ADMIN}") assert response.status_code == HTTPStatus.NOT_FOUND assert response.json()["detail"]["code"] == "120" + + +def test_delete_user_has_resources(py_api: TestClient, user_test: Connection) -> None: + """A user with resources (datasets, flows, runs) gets a 409 Conflict.""" + # User 16 owns dataset 130 per tests/users.py definition + target_id = 16 + response = py_api.delete(f"/users/{target_id}?api_key={ApiKey.DATASET_130_OWNER}") + + assert response.status_code == HTTPStatus.CONFLICT + assert response.json()["detail"]["code"] == "122" + assert "resource(s)" in response.json()["detail"]["message"] + + # Verify user record was NOT deleted + user_count = user_test.execute( + text("SELECT COUNT(*) FROM users WHERE id = :id"), + parameters={"id": target_id}, + ).scalar() + assert user_count == 1 + From 6449a2e3bb738969a92d3c3c8b6abe2d1dee5056 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 7 Mar 2026 19:31:54 +0000 Subject: [PATCH 07/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/database/users.py | 77 +++++++++++++++++++----------- src/routers/openml/users.py | 3 +- tests/routers/openml/users_test.py | 3 +- 3 files changed, 51 insertions(+), 32 deletions(-) diff --git a/src/database/users.py b/src/database/users.py index b52e0f29..cb75920d 100644 --- a/src/database/users.py +++ b/src/database/users.py @@ -76,35 +76,56 @@ def groups(self) -> list[UserGroup]: def get_user_resource_count(*, user_id: int, expdb: Connection) -> int: """Return the total number of datasets, flows, and runs owned by the user.""" - dataset_count = expdb.execute( - text("SELECT COUNT(*) FROM dataset WHERE uploader = :user_id"), - parameters={"user_id": user_id}, - ).scalar() or 0 - flow_count = expdb.execute( - text("SELECT COUNT(*) FROM implementation WHERE uploader = :user_id"), - parameters={"user_id": user_id}, - ).scalar() or 0 - run_count = expdb.execute( - text("SELECT COUNT(*) FROM run WHERE uploader = :user_id"), - parameters={"user_id": user_id}, - ).scalar() or 0 + dataset_count = ( + expdb.execute( + text("SELECT COUNT(*) FROM dataset WHERE uploader = :user_id"), + parameters={"user_id": user_id}, + ).scalar() + or 0 + ) + flow_count = ( + expdb.execute( + text("SELECT COUNT(*) FROM implementation WHERE uploader = :user_id"), + parameters={"user_id": user_id}, + ).scalar() + or 0 + ) + run_count = ( + expdb.execute( + text("SELECT COUNT(*) FROM run WHERE uploader = :user_id"), + parameters={"user_id": user_id}, + ).scalar() + or 0 + ) - study_count = expdb.execute( - text("SELECT COUNT(*) FROM study WHERE creator = :user_id"), - parameters={"user_id": user_id}, - ).scalar() or 0 - task_study_count = expdb.execute( - text("SELECT COUNT(*) FROM task_study WHERE uploader = :user_id"), - parameters={"user_id": user_id}, - ).scalar() or 0 - run_study_count = expdb.execute( - text("SELECT COUNT(*) FROM run_study WHERE uploader = :user_id"), - parameters={"user_id": user_id}, - ).scalar() or 0 - dataset_tag_count = expdb.execute( - text("SELECT COUNT(*) FROM dataset_tag WHERE uploader = :user_id"), - parameters={"user_id": user_id}, - ).scalar() or 0 + study_count = ( + expdb.execute( + text("SELECT COUNT(*) FROM study WHERE creator = :user_id"), + parameters={"user_id": user_id}, + ).scalar() + or 0 + ) + task_study_count = ( + expdb.execute( + text("SELECT COUNT(*) FROM task_study WHERE uploader = :user_id"), + parameters={"user_id": user_id}, + ).scalar() + or 0 + ) + run_study_count = ( + expdb.execute( + text("SELECT COUNT(*) FROM run_study WHERE uploader = :user_id"), + parameters={"user_id": user_id}, + ).scalar() + or 0 + ) + dataset_tag_count = ( + expdb.execute( + text("SELECT COUNT(*) FROM dataset_tag WHERE uploader = :user_id"), + parameters={"user_id": user_id}, + ).scalar() + or 0 + ) return int( dataset_count diff --git a/src/routers/openml/users.py b/src/routers/openml/users.py index 3ab25a4f..461321b5 100644 --- a/src/routers/openml/users.py +++ b/src/routers/openml/users.py @@ -5,9 +5,8 @@ from sqlalchemy import Connection from core.errors import UserError -from database.users import UserGroup, delete_user, get_user_resource_count +from database.users import User, UserGroup, delete_user, get_user_resource_count from routers.dependencies import expdb_connection, fetch_user, userdb_connection -from database.users import User router = APIRouter(prefix="/users", tags=["users"]) diff --git a/tests/routers/openml/users_test.py b/tests/routers/openml/users_test.py index a1c0c037..3dc00dd1 100644 --- a/tests/routers/openml/users_test.py +++ b/tests/routers/openml/users_test.py @@ -96,7 +96,7 @@ def test_delete_user_has_resources(py_api: TestClient, user_test: Connection) -> # User 16 owns dataset 130 per tests/users.py definition target_id = 16 response = py_api.delete(f"/users/{target_id}?api_key={ApiKey.DATASET_130_OWNER}") - + assert response.status_code == HTTPStatus.CONFLICT assert response.json()["detail"]["code"] == "122" assert "resource(s)" in response.json()["detail"]["message"] @@ -107,4 +107,3 @@ def test_delete_user_has_resources(py_api: TestClient, user_test: Connection) -> parameters={"id": target_id}, ).scalar() assert user_count == 1 - From ec5924789303d3820ea89cb35cc704a64cc181ce Mon Sep 17 00:00:00 2001 From: Jayant-Kernel Date: Sun, 8 Mar 2026 01:11:38 +0530 Subject: [PATCH 08/14] fix(types): specify generic type parameters for dict in users router --- src/routers/openml/users.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/routers/openml/users.py b/src/routers/openml/users.py index 461321b5..48e6fd23 100644 --- a/src/routers/openml/users.py +++ b/src/routers/openml/users.py @@ -1,5 +1,5 @@ from http import HTTPStatus -from typing import Annotated +from typing import Annotated, Any from fastapi import APIRouter, Depends, HTTPException from sqlalchemy import Connection @@ -25,7 +25,7 @@ def delete_account( caller: Annotated[User | None, Depends(fetch_user)] = None, user_db: Annotated[Connection, Depends(userdb_connection)] = None, expdb: Annotated[Connection, Depends(expdb_connection)] = None, -) -> dict: +) -> dict[str, Any]: if caller is None: raise HTTPException( status_code=HTTPStatus.UNAUTHORIZED, From e8fd89f7e54d6da23b9553c667c99adb04226682 Mon Sep 17 00:00:00 2001 From: Jayant-Kernel Date: Sun, 8 Mar 2026 01:13:22 +0530 Subject: [PATCH 09/14] style: remove inline comments to adhere to contribution guidelines --- tests/routers/openml/users_test.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/routers/openml/users_test.py b/tests/routers/openml/users_test.py index 3dc00dd1..e6970b90 100644 --- a/tests/routers/openml/users_test.py +++ b/tests/routers/openml/users_test.py @@ -10,7 +10,6 @@ @pytest.mark.mut def test_delete_user_self(py_api: TestClient, user_test: Connection) -> None: """A user without resources can delete their own account.""" - # Insert a fresh disposable user user_test.execute( text( "INSERT INTO users (session_hash, email, first_name, last_name, password)" @@ -19,7 +18,6 @@ def test_delete_user_self(py_api: TestClient, user_test: Connection) -> None: ) (new_id,) = user_test.execute(text("SELECT LAST_INSERT_ID()")).one() - # Add a users_groups entry to verify it gets deleted user_test.execute( text("INSERT INTO users_groups (user_id, group_id) VALUES (:id, 2)"), parameters={"id": new_id}, @@ -29,7 +27,6 @@ def test_delete_user_self(py_api: TestClient, user_test: Connection) -> None: assert response.status_code == HTTPStatus.OK assert response.json() == {"user_id": new_id, "deleted": True} - # Verify DB side-effects: user and groups should be gone user_count = user_test.execute( text("SELECT COUNT(*) FROM users WHERE id = :id"), parameters={"id": new_id}, @@ -53,7 +50,6 @@ def test_delete_user_as_admin(py_api: TestClient, user_test: Connection) -> None ) (new_id,) = user_test.execute(text("SELECT LAST_INSERT_ID()")).one() - # Add a users_groups entry user_test.execute( text("INSERT INTO users_groups (user_id, group_id) VALUES (:id, 2)"), parameters={"id": new_id}, @@ -63,7 +59,6 @@ def test_delete_user_as_admin(py_api: TestClient, user_test: Connection) -> None assert response.status_code == HTTPStatus.OK assert response.json() == {"user_id": new_id, "deleted": True} - # Verify DB side-effects user_count = user_test.execute( text("SELECT COUNT(*) FROM users WHERE id = :id"), parameters={"id": new_id}, @@ -79,7 +74,6 @@ def test_delete_user_no_auth(py_api: TestClient) -> None: def test_delete_user_not_owner(py_api: TestClient) -> None: """A non-owner non-admin user cannot delete someone else's account → 403.""" - # SOME_USER (user_id=2) tries to delete OWNER_USER (user_id=3229) response = py_api.delete(f"/users/3229?api_key={ApiKey.SOME_USER}") assert response.status_code == HTTPStatus.FORBIDDEN @@ -93,7 +87,6 @@ def test_delete_user_not_found(py_api: TestClient) -> None: def test_delete_user_has_resources(py_api: TestClient, user_test: Connection) -> None: """A user with resources (datasets, flows, runs) gets a 409 Conflict.""" - # User 16 owns dataset 130 per tests/users.py definition target_id = 16 response = py_api.delete(f"/users/{target_id}?api_key={ApiKey.DATASET_130_OWNER}") @@ -101,7 +94,6 @@ def test_delete_user_has_resources(py_api: TestClient, user_test: Connection) -> assert response.json()["detail"]["code"] == "122" assert "resource(s)" in response.json()["detail"]["message"] - # Verify user record was NOT deleted user_count = user_test.execute( text("SELECT COUNT(*) FROM users WHERE id = :id"), parameters={"id": target_id}, From aa4ed9c179cdcebcaf16d2e8123fd3982de70d90 Mon Sep 17 00:00:00 2001 From: Jayant-Kernel Date: Sun, 8 Mar 2026 01:34:10 +0530 Subject: [PATCH 10/14] fix(review): implement session locking and add missing regression tests - routers/users: invalidate session_hash to lock user state and prevent concurrent resource creation during deletion checks - tests/users_test: add parametrized test asserting resource ownership checks for datasets, runs, implementations, studies, and dataset_tags - tests/flows_test: add regression test for the deprecated GET flows/exists endpoint --- src/routers/openml/users.py | 24 ++++++++++++++--- tests/routers/openml/flows_test.py | 14 ++++++++++ tests/routers/openml/users_test.py | 42 ++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 4 deletions(-) diff --git a/src/routers/openml/users.py b/src/routers/openml/users.py index 48e6fd23..2fc1d6e8 100644 --- a/src/routers/openml/users.py +++ b/src/routers/openml/users.py @@ -43,19 +43,34 @@ def delete_account( from sqlalchemy import text # noqa: PLC0415 - existing = user_db.execute( - text("SELECT 1 FROM users WHERE id = :id LIMIT 1"), + original = user_db.execute( + text("SELECT session_hash FROM users WHERE id = :id FOR UPDATE"), parameters={"id": user_id}, - ).scalar() + ).fetchone() - if existing is None: + if original is None: raise HTTPException( status_code=HTTPStatus.NOT_FOUND, detail={"code": str(int(UserError.NOT_FOUND)), "message": "User not found"}, ) + + # Invalidate session immediately to prevent concurrent resource creation + # This serves as a 'deletion_pending' lock as suggested in code review + original_session_hash = original[0] + user_db.execute( + text("UPDATE users SET session_hash = 'DELETION_PENDING' WHERE id = :id"), + parameters={"id": user_id}, + ) + user_db.commit() resource_count = get_user_resource_count(user_id=user_id, expdb=expdb) if resource_count > 0: + # Restore session hash if deletion is blocked + user_db.execute( + text("UPDATE users SET session_hash = :hash WHERE id = :id"), + parameters={"hash": original_session_hash, "id": user_id}, + ) + user_db.commit() raise HTTPException( status_code=HTTPStatus.CONFLICT, detail={ @@ -68,4 +83,5 @@ def delete_account( ) delete_user(user_id=user_id, connection=user_db) + user_db.commit() return {"user_id": user_id, "deleted": True} diff --git a/tests/routers/openml/flows_test.py b/tests/routers/openml/flows_test.py index 658d6325..60b1baed 100644 --- a/tests/routers/openml/flows_test.py +++ b/tests/routers/openml/flows_test.py @@ -76,6 +76,20 @@ def test_flow_exists_not_exists(py_api: TestClient) -> None: assert response.json()["detail"] == "Flow not found." +def test_flow_exists_get_alias(flow: Flow, py_api: TestClient) -> None: + """Test the deprecated GET wrapper for backward compatibility.""" + response = py_api.get(f"/flows/exists/{flow.name}/{flow.external_version}") + assert response.status_code == HTTPStatus.OK + assert response.json() == {"flow_id": flow.id} + + +def test_flow_exists_get_alias_not_exists(py_api: TestClient) -> None: + """Test the deprecated GET wrapper returns 404 for non-existent flows.""" + response = py_api.get("/flows/exists/foo/bar") + assert response.status_code == HTTPStatus.NOT_FOUND + assert response.json()["detail"] == "Flow not found." + + def test_get_flow_no_subflow(py_api: TestClient) -> None: response = py_api.get("/flows/1") assert response.status_code == HTTPStatus.OK diff --git a/tests/routers/openml/users_test.py b/tests/routers/openml/users_test.py index e6970b90..fca28549 100644 --- a/tests/routers/openml/users_test.py +++ b/tests/routers/openml/users_test.py @@ -99,3 +99,45 @@ def test_delete_user_has_resources(py_api: TestClient, user_test: Connection) -> parameters={"id": target_id}, ).scalar() assert user_count == 1 + + +@pytest.mark.mut +@pytest.mark.parametrize( + ("table_name", "column_name", "insert_sql"), + [ + ("dataset", "uploader", "INSERT INTO dataset (uploader, name, format) VALUES (:id, 'x', 'ARFF')"), + ("implementation", "uploader", "INSERT INTO implementation (uploader, fullname, name, version, external_version, uploadDate) VALUES (:id, 'x', 'x', 1, '1', '2024-01-01')"), + ("run", "uploader", "INSERT INTO run (uploader, task_id, setup) VALUES (:id, 1, 1)"), + ("study", "creator", "INSERT INTO study (creator, name, main_entity_type) VALUES (:id, 'x', 'run')"), + ("task_study", "uploader", "INSERT INTO task_study (uploader, study_id, task_id) VALUES (:id, 14, 1)"), + ("run_study", "uploader", "INSERT INTO run_study (uploader, study_id, run_id) VALUES (:id, 14, 1)"), + ("dataset_tag", "uploader", "INSERT INTO dataset_tag (uploader, id, tag) VALUES (:id, 1, 'x')"), + ], +) +def test_delete_user_has_resources_parametrized( + py_api: TestClient, + user_test: Connection, + expdb_test: Connection, + table_name: str, + column_name: str, + insert_sql: str, +) -> None: + """Verify that possessing any tracked resource blocks deletion.""" + user_test.execute( + text( + "INSERT INTO users (session_hash, email, first_name, last_name, password)" + " VALUES ('eeeeffffccccddddaaaabbbbccccdddd', 'res@test.com', 'Del', 'User', 'x')", + ), + ) + (new_id,) = user_test.execute(text("SELECT LAST_INSERT_ID()")).one() + + # Disable constraints temporarily to inject simple orphaned rows for testing 409 + expdb_test.execute(text("SET FOREIGN_KEY_CHECKS=0")) + expdb_test.execute(text(insert_sql), parameters={"id": new_id}) + expdb_test.execute(text("SET FOREIGN_KEY_CHECKS=1")) + expdb_test.commit() + + response = py_api.delete(f"/users/{new_id}?api_key=eeeeffffccccddddaaaabbbbccccdddd") + + assert response.status_code == HTTPStatus.CONFLICT + assert response.json()["detail"]["code"] == "122" From 11e3c998b8f5fd430ff8665c95320c48900aecf0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 7 Mar 2026 20:04:42 +0000 Subject: [PATCH 11/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/routers/openml/users.py | 2 +- tests/routers/openml/users_test.py | 38 ++++++++++++++++++++++++------ 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/routers/openml/users.py b/src/routers/openml/users.py index 2fc1d6e8..89b66a6b 100644 --- a/src/routers/openml/users.py +++ b/src/routers/openml/users.py @@ -53,7 +53,7 @@ def delete_account( status_code=HTTPStatus.NOT_FOUND, detail={"code": str(int(UserError.NOT_FOUND)), "message": "User not found"}, ) - + # Invalidate session immediately to prevent concurrent resource creation # This serves as a 'deletion_pending' lock as suggested in code review original_session_hash = original[0] diff --git a/tests/routers/openml/users_test.py b/tests/routers/openml/users_test.py index fca28549..2cfa865e 100644 --- a/tests/routers/openml/users_test.py +++ b/tests/routers/openml/users_test.py @@ -105,13 +105,37 @@ def test_delete_user_has_resources(py_api: TestClient, user_test: Connection) -> @pytest.mark.parametrize( ("table_name", "column_name", "insert_sql"), [ - ("dataset", "uploader", "INSERT INTO dataset (uploader, name, format) VALUES (:id, 'x', 'ARFF')"), - ("implementation", "uploader", "INSERT INTO implementation (uploader, fullname, name, version, external_version, uploadDate) VALUES (:id, 'x', 'x', 1, '1', '2024-01-01')"), + ( + "dataset", + "uploader", + "INSERT INTO dataset (uploader, name, format) VALUES (:id, 'x', 'ARFF')", + ), + ( + "implementation", + "uploader", + "INSERT INTO implementation (uploader, fullname, name, version, external_version, uploadDate) VALUES (:id, 'x', 'x', 1, '1', '2024-01-01')", + ), ("run", "uploader", "INSERT INTO run (uploader, task_id, setup) VALUES (:id, 1, 1)"), - ("study", "creator", "INSERT INTO study (creator, name, main_entity_type) VALUES (:id, 'x', 'run')"), - ("task_study", "uploader", "INSERT INTO task_study (uploader, study_id, task_id) VALUES (:id, 14, 1)"), - ("run_study", "uploader", "INSERT INTO run_study (uploader, study_id, run_id) VALUES (:id, 14, 1)"), - ("dataset_tag", "uploader", "INSERT INTO dataset_tag (uploader, id, tag) VALUES (:id, 1, 'x')"), + ( + "study", + "creator", + "INSERT INTO study (creator, name, main_entity_type) VALUES (:id, 'x', 'run')", + ), + ( + "task_study", + "uploader", + "INSERT INTO task_study (uploader, study_id, task_id) VALUES (:id, 14, 1)", + ), + ( + "run_study", + "uploader", + "INSERT INTO run_study (uploader, study_id, run_id) VALUES (:id, 14, 1)", + ), + ( + "dataset_tag", + "uploader", + "INSERT INTO dataset_tag (uploader, id, tag) VALUES (:id, 1, 'x')", + ), ], ) def test_delete_user_has_resources_parametrized( @@ -138,6 +162,6 @@ def test_delete_user_has_resources_parametrized( expdb_test.commit() response = py_api.delete(f"/users/{new_id}?api_key=eeeeffffccccddddaaaabbbbccccdddd") - + assert response.status_code == HTTPStatus.CONFLICT assert response.json()["detail"]["code"] == "122" From d175de14c868b7b35cdf4d9a0bafcb3f379edc33 Mon Sep 17 00:00:00 2001 From: Jayant-Kernel Date: Sun, 8 Mar 2026 01:41:15 +0530 Subject: [PATCH 12/14] style: fix ruff E501 and PLR0913 in users tests --- tests/routers/openml/users_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/routers/openml/users_test.py b/tests/routers/openml/users_test.py index 2cfa865e..130a9a5b 100644 --- a/tests/routers/openml/users_test.py +++ b/tests/routers/openml/users_test.py @@ -113,7 +113,8 @@ def test_delete_user_has_resources(py_api: TestClient, user_test: Connection) -> ( "implementation", "uploader", - "INSERT INTO implementation (uploader, fullname, name, version, external_version, uploadDate) VALUES (:id, 'x', 'x', 1, '1', '2024-01-01')", + "INSERT INTO implementation (uploader, fullname, name, version, " + "external_version, uploadDate) VALUES (:id, 'x', 'x', 1, '1', '2024-01-01')", ), ("run", "uploader", "INSERT INTO run (uploader, task_id, setup) VALUES (:id, 1, 1)"), ( @@ -138,7 +139,7 @@ def test_delete_user_has_resources(py_api: TestClient, user_test: Connection) -> ), ], ) -def test_delete_user_has_resources_parametrized( +def test_delete_user_has_resources_parametrized( # noqa: PLR0913 py_api: TestClient, user_test: Connection, expdb_test: Connection, From 171c9b3925520628716806d87ceddbe88b0ea9a3 Mon Sep 17 00:00:00 2001 From: Jayant-Kernel Date: Sun, 8 Mar 2026 01:44:23 +0530 Subject: [PATCH 13/14] style: fix ruff ARG001 unused argument lint error --- tests/routers/openml/users_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/routers/openml/users_test.py b/tests/routers/openml/users_test.py index 130a9a5b..9b108f2f 100644 --- a/tests/routers/openml/users_test.py +++ b/tests/routers/openml/users_test.py @@ -143,8 +143,8 @@ def test_delete_user_has_resources_parametrized( # noqa: PLR0913 py_api: TestClient, user_test: Connection, expdb_test: Connection, - table_name: str, - column_name: str, + table_name: str, # noqa: ARG001 + column_name: str, # noqa: ARG001 insert_sql: str, ) -> None: """Verify that possessing any tracked resource blocks deletion.""" From f0999e5841fa8ffe2c4fa339ff19a0ae0225c43b Mon Sep 17 00:00:00 2001 From: Jayant-Kernel Date: Sun, 8 Mar 2026 01:50:40 +0530 Subject: [PATCH 14/14] fix(review): address CodeRabbit actionable comments - openapi: broaden description to match full blocked resource matrix - auth: use unguessable uuid for pending deletion lock to prevent impersonation - database: wrap deletion in try/finally to ensure session token restoration on unexpected failures --- src/routers/openml/users.py | 57 +++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/src/routers/openml/users.py b/src/routers/openml/users.py index 89b66a6b..f43a2b50 100644 --- a/src/routers/openml/users.py +++ b/src/routers/openml/users.py @@ -17,7 +17,7 @@ description=( "Deletes the account of the specified user. " "Only the account owner or an admin may perform this action. " - "Deletion is blocked if the user has uploaded any datasets, flows, or runs." + "Deletion is blocked if the user has uploaded any owned resources." ), ) def delete_account( @@ -41,6 +41,8 @@ def delete_account( detail={"code": str(int(UserError.NO_ACCESS)), "message": "No access granted"}, ) + import uuid + from sqlalchemy import text # noqa: PLC0415 original = user_db.execute( @@ -55,33 +57,38 @@ def delete_account( ) # Invalidate session immediately to prevent concurrent resource creation - # This serves as a 'deletion_pending' lock as suggested in code review original_session_hash = original[0] + temp_lock_hash = uuid.uuid4().hex user_db.execute( - text("UPDATE users SET session_hash = 'DELETION_PENDING' WHERE id = :id"), - parameters={"id": user_id}, + text("UPDATE users SET session_hash = :lock_hash WHERE id = :id"), + parameters={"lock_hash": temp_lock_hash, "id": user_id}, ) user_db.commit() - resource_count = get_user_resource_count(user_id=user_id, expdb=expdb) - if resource_count > 0: - # Restore session hash if deletion is blocked - user_db.execute( - text("UPDATE users SET session_hash = :hash WHERE id = :id"), - parameters={"hash": original_session_hash, "id": user_id}, - ) - user_db.commit() - raise HTTPException( - status_code=HTTPStatus.CONFLICT, - detail={ - "code": str(int(UserError.HAS_RESOURCES)), - "message": ( - f"User has {resource_count} resource(s). " - "Remove or transfer resources before deleting the account." - ), - }, - ) + deletion_successful = False + try: + resource_count = get_user_resource_count(user_id=user_id, expdb=expdb) + if resource_count > 0: + raise HTTPException( + status_code=HTTPStatus.CONFLICT, + detail={ + "code": str(int(UserError.HAS_RESOURCES)), + "message": ( + f"User has {resource_count} resource(s). " + "Remove or transfer resources before deleting the account." + ), + }, + ) - delete_user(user_id=user_id, connection=user_db) - user_db.commit() - return {"user_id": user_id, "deleted": True} + delete_user(user_id=user_id, connection=user_db) + user_db.commit() + deletion_successful = True + return {"user_id": user_id, "deleted": True} + finally: + if not deletion_successful: + # Restore session hash if deletion did not complete successfully + user_db.execute( + text("UPDATE users SET session_hash = :hash WHERE id = :id"), + parameters={"hash": original_session_hash, "id": user_id}, + ) + user_db.commit()