From 1d796f3fc35e75fc6394fd26a3f64e9d1d58de3d Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 17 Nov 2025 13:17:51 -0800 Subject: [PATCH 01/24] feat: centralized model rebuilding for more maintainable Pydantic circular dependencies Implements a centralized model rebuilding strategy for Pydantic model rebuilding. Instead of maintaing these model rebuilds in each file, we now can import circular dependencies in an `if TYPE_CHECKING:` block. The model rebuild module will then automatically handle model rebuilds, walking the view_model module and dynamically rebuilding our models based on their sub-classes. This should substantially increase ease of maintainability when adding dependent Pydantic models. --- src/mavedb/server_main.py | 2 + src/mavedb/view_models/clinical_control.py | 13 +- src/mavedb/view_models/experiment.py | 17 ++- src/mavedb/view_models/experiment_set.py | 14 +-- src/mavedb/view_models/gnomad_variant.py | 13 +- src/mavedb/view_models/mapped_variant.py | 15 +-- src/mavedb/view_models/model_rebuild.py | 133 +++++++++++++++++++++ src/mavedb/view_models/score_set.py | 18 +-- src/mavedb/view_models/variant.py | 14 +-- 9 files changed, 170 insertions(+), 69 deletions(-) create mode 100644 src/mavedb/view_models/model_rebuild.py diff --git a/src/mavedb/server_main.py b/src/mavedb/server_main.py index 23717e43..9e615572 100644 --- a/src/mavedb/server_main.py +++ b/src/mavedb/server_main.py @@ -18,6 +18,8 @@ UserAgentPlugin, ) +# Import the model rebuild module to ensure all view model forward references are resolved +import mavedb.view_models.model_rebuild # noqa: F401 from mavedb import __version__ from mavedb.lib.exceptions import ( AmbiguousIdentifierError, diff --git a/src/mavedb/view_models/clinical_control.py b/src/mavedb/view_models/clinical_control.py index f098dd12..85cd7834 100644 --- a/src/mavedb/view_models/clinical_control.py +++ b/src/mavedb/view_models/clinical_control.py @@ -2,11 +2,14 @@ from __future__ import annotations from datetime import date -from typing import Optional, Sequence +from typing import TYPE_CHECKING, Optional, Sequence from mavedb.view_models import record_type_validator, set_record_type from mavedb.view_models.base.base import BaseModel +if TYPE_CHECKING: + from mavedb.view_models.mapped_variant import MappedVariantCreate, MappedVariantForClinicalControl + class ClinicalControlBase(BaseModel): db_identifier: str @@ -54,11 +57,3 @@ class ClinicalControlWithMappedVariants(SavedClinicalControlWithMappedVariants): class ClinicalControlOptions(BaseModel): db_name: str available_versions: list[str] - - -# ruff: noqa: E402 -from mavedb.view_models.mapped_variant import MappedVariantCreate, MappedVariantForClinicalControl - -# ClinicalControlUpdate.model_rebuild() -SavedClinicalControlWithMappedVariants.model_rebuild() -ClinicalControlWithMappedVariants.model_rebuild() diff --git a/src/mavedb/view_models/experiment.py b/src/mavedb/view_models/experiment.py index b05766ff..ffea888d 100644 --- a/src/mavedb/view_models/experiment.py +++ b/src/mavedb/view_models/experiment.py @@ -1,15 +1,15 @@ from datetime import date -from typing import Any, Collection, Optional, Sequence +from typing import TYPE_CHECKING, Any, Collection, Optional, Sequence -from pydantic import field_validator, model_validator, ValidationInfo +from pydantic import ValidationInfo, field_validator, model_validator +from mavedb.lib.validation import urn_re from mavedb.lib.validation.exceptions import ValidationError from mavedb.lib.validation.transform import ( transform_experiment_set_to_urn, - transform_score_set_list_to_urn_list, transform_record_publication_identifiers, + transform_score_set_list_to_urn_list, ) -from mavedb.lib.validation import urn_re from mavedb.lib.validation.utilities import is_null from mavedb.view_models import record_type_validator, set_record_type from mavedb.view_models.base.base import BaseModel @@ -36,6 +36,9 @@ ) from mavedb.view_models.user import SavedUser, User +if TYPE_CHECKING: + from mavedb.view_models.score_set import ScoreSetPublicDump + class OfficialCollection(BaseModel): badge_name: str @@ -198,9 +201,3 @@ class AdminExperiment(Experiment): # Properties to include in a dump of all published data. class ExperimentPublicDump(SavedExperiment): score_sets: "Sequence[ScoreSetPublicDump]" - - -# ruff: noqa: E402 -from mavedb.view_models.score_set import ScoreSetPublicDump - -ExperimentPublicDump.model_rebuild() diff --git a/src/mavedb/view_models/experiment_set.py b/src/mavedb/view_models/experiment_set.py index de414a4b..c65d1ba8 100644 --- a/src/mavedb/view_models/experiment_set.py +++ b/src/mavedb/view_models/experiment_set.py @@ -1,11 +1,14 @@ from datetime import date -from typing import Sequence, Optional +from typing import TYPE_CHECKING, Optional, Sequence from mavedb.view_models import record_type_validator, set_record_type from mavedb.view_models.base.base import BaseModel from mavedb.view_models.contributor import Contributor from mavedb.view_models.user import SavedUser, User +if TYPE_CHECKING: + from mavedb.view_models.experiment import Experiment, ExperimentPublicDump, SavedExperiment + class ExperimentSetBase(BaseModel): urn: str @@ -60,12 +63,3 @@ class ExperimentSetPublicDump(SavedExperimentSet): experiments: "Sequence[ExperimentPublicDump]" created_by: Optional[User] = None modified_by: Optional[User] = None - - -# ruff: noqa: E402 -from mavedb.view_models.experiment import Experiment, ExperimentPublicDump, SavedExperiment - -SavedExperimentSet.model_rebuild() -ExperimentSet.model_rebuild() -AdminExperimentSet.model_rebuild() -ExperimentSetPublicDump.model_rebuild() diff --git a/src/mavedb/view_models/gnomad_variant.py b/src/mavedb/view_models/gnomad_variant.py index 1171cb49..97dd675e 100644 --- a/src/mavedb/view_models/gnomad_variant.py +++ b/src/mavedb/view_models/gnomad_variant.py @@ -2,11 +2,14 @@ from __future__ import annotations from datetime import date -from typing import Optional, Sequence +from typing import TYPE_CHECKING, Optional, Sequence from mavedb.view_models import record_type_validator, set_record_type from mavedb.view_models.base.base import BaseModel +if TYPE_CHECKING: + from mavedb.view_models.mapped_variant import MappedVariant, MappedVariantCreate, SavedMappedVariant + class GnomADVariantBase(BaseModel): """Base class for GnomAD variant view models.""" @@ -67,11 +70,3 @@ class GnomADVariantWithMappedVariants(SavedGnomADVariantWithMappedVariants): """GnomAD variant view model with mapped variants for non-admin clients.""" mapped_variants: Sequence["MappedVariant"] - - -# ruff: noqa: E402 -from mavedb.view_models.mapped_variant import MappedVariant, SavedMappedVariant, MappedVariantCreate - -GnomADVariantUpdate.model_rebuild() -SavedGnomADVariantWithMappedVariants.model_rebuild() -GnomADVariantWithMappedVariants.model_rebuild() diff --git a/src/mavedb/view_models/mapped_variant.py b/src/mavedb/view_models/mapped_variant.py index 13aec65d..0131ebdf 100644 --- a/src/mavedb/view_models/mapped_variant.py +++ b/src/mavedb/view_models/mapped_variant.py @@ -2,7 +2,7 @@ from __future__ import annotations from datetime import date -from typing import Any, Optional, Sequence +from typing import TYPE_CHECKING, Any, Optional, Sequence from pydantic import model_validator @@ -10,6 +10,10 @@ from mavedb.view_models import record_type_validator, set_record_type from mavedb.view_models.base.base import BaseModel +if TYPE_CHECKING: + from mavedb.view_models.clinical_control import ClinicalControl, ClinicalControlBase, SavedClinicalControl + from mavedb.view_models.gnomad_variant import GnomADVariant, GnomADVariantBase, SavedGnomADVariant + class MappedVariantBase(BaseModel): pre_mapped: Optional[Any] = None @@ -94,12 +98,3 @@ def generate_score_set_urn_list(cls, data: Any): except AttributeError as exc: raise ValidationError(f"Unable to create {cls.__name__} without attribute: {exc}.") # type: ignore return data - - -# ruff: noqa: E402 -from mavedb.view_models.clinical_control import ClinicalControlBase, ClinicalControl, SavedClinicalControl -from mavedb.view_models.gnomad_variant import GnomADVariantBase, GnomADVariant, SavedGnomADVariant - -MappedVariantUpdate.model_rebuild() -SavedMappedVariantWithControls.model_rebuild() -MappedVariantWithControls.model_rebuild() diff --git a/src/mavedb/view_models/model_rebuild.py b/src/mavedb/view_models/model_rebuild.py new file mode 100644 index 00000000..a14ce591 --- /dev/null +++ b/src/mavedb/view_models/model_rebuild.py @@ -0,0 +1,133 @@ +""" +Centralized model rebuilding for view models with circular dependencies. + +This module handles the rebuilding of all Pydantic models that have forward references +to other models, resolving circular import issues by performing the rebuilds after +all modules have been imported. +""" + +from __future__ import annotations + +import importlib +import inspect +import logging +import pkgutil +from pathlib import Path + +from pydantic import BaseModel + +logger = logging.getLogger(__name__) + + +def _discover_and_sort_models(): + """Discover all Pydantic models and sort them by dependencies.""" + import mavedb.view_models + + view_models_path = Path(mavedb.view_models.__file__).parent + models_by_module = {} + + # Discover all models grouped by module + for module_info in pkgutil.walk_packages([str(view_models_path)], "mavedb.view_models."): + module_name = module_info.name + if module_name.endswith(".model_rebuild"): + continue + + try: + module = importlib.import_module(module_name) + module_models = [] + + for name, obj in inspect.getmembers(module, inspect.isclass): + if issubclass(obj, BaseModel) and obj.__module__ == module_name and hasattr(obj, "model_rebuild"): + module_models.append((f"{module_name}.{name}", obj)) + + if module_models: + models_by_module[module_name] = module_models + + except ImportError as e: + logger.warning("Could not import %s: %s", module_name, e) + + # Sort models within each module by dependency (base classes first) + sorted_models = [] + for module_name, module_models in models_by_module.items(): + + def dependency_count(item): + _, model_class = item + # Count base classes within the same module + count = 0 + for base in model_class.__bases__: + if issubclass(base, BaseModel) and base != BaseModel and any(base == mc for _, mc in module_models): + count += 1 + return count + + module_models.sort(key=dependency_count) + sorted_models.extend(module_models) + + return sorted_models + + +def rebuild_all_models(): + """ + Rebuild all Pydantic models in the view_models package. + + Discovers models, sorts by dependencies, and rebuilds with multi-pass + approach to achieve 0 circular dependencies. + """ + # Discover and sort models by dependencies + models_to_rebuild = _discover_and_sort_models() + + # Create registry for forward reference resolution + model_registry = {name.split(".")[-1]: cls for name, cls in models_to_rebuild} + + logger.debug("Rebuilding %d Pydantic models...", len(models_to_rebuild)) + + successful_rebuilds = 0 + remaining_models = models_to_rebuild[:] + + # Multi-pass rebuild to handle complex dependencies + for pass_num in range(3): + if not remaining_models: + break + + models_for_next_pass = [] + + for model_name, model_class in remaining_models: + try: + # Temporarily inject all models into the module for forward references + module = importlib.import_module(model_class.__module__) + injected = {} + + for simple_name, ref_class in model_registry.items(): + if simple_name not in module.__dict__: + injected[simple_name] = module.__dict__.get(simple_name) + module.__dict__[simple_name] = ref_class + + try: + model_class.model_rebuild() + successful_rebuilds += 1 + logger.debug("Rebuilt %s", model_name) + finally: + # Restore original module state + for name, original in injected.items(): + if original is None: + module.__dict__.pop(name, None) + else: + module.__dict__[name] = original + + except Exception as e: + if "is not defined" in str(e) and pass_num < 2: + models_for_next_pass.append((model_name, model_class)) + logger.debug("Deferring %s to next pass", model_name) + else: + logger.error("Failed to rebuild %s: %s", model_name, e) + + remaining_models = models_for_next_pass + + logger.info( + "Rebuilt %d Pydantic models successfully, %d models with circular dependencies remain.", + successful_rebuilds, + len(remaining_models), + ) + + +# Automatically rebuild all models when this module is imported +rebuild_all_models() diff --git a/src/mavedb/view_models/score_set.py b/src/mavedb/view_models/score_set.py index 9f53cf64..de8984bd 100644 --- a/src/mavedb/view_models/score_set.py +++ b/src/mavedb/view_models/score_set.py @@ -3,7 +3,7 @@ import json from datetime import date -from typing import Any, Collection, Optional, Sequence, Union +from typing import TYPE_CHECKING, Any, Collection, Optional, Sequence, Union from pydantic import field_validator, model_validator from typing_extensions import Self @@ -46,6 +46,10 @@ from mavedb.view_models.user import SavedUser, User from mavedb.view_models.utils import all_fields_optional_model +if TYPE_CHECKING: + from mavedb.view_models.experiment import Experiment + from mavedb.view_models.variant import SavedVariantEffectMeasurement + UnboundedRange = tuple[Union[float, None], Union[float, None]] @@ -456,7 +460,7 @@ class ScoreSetWithVariants(ScoreSet): are requested. """ - variants: list[SavedVariantEffectMeasurement] + variants: list["SavedVariantEffectMeasurement"] class AdminScoreSet(ScoreSet): @@ -482,13 +486,3 @@ class ScoreSetPublicDump(SavedScoreSet): mapping_state: Optional[MappingState] = None mapping_errors: Optional[dict] = None score_calibrations: Optional[Sequence[ScoreCalibration]] = None # type: ignore[assignment] - - -# ruff: noqa: E402 -from mavedb.view_models.experiment import Experiment -from mavedb.view_models.variant import SavedVariantEffectMeasurement - -ScoreSetWithVariants.model_rebuild() -ShortScoreSet.model_rebuild() -ScoreSet.model_rebuild() -ScoreSetWithVariants.model_rebuild() diff --git a/src/mavedb/view_models/variant.py b/src/mavedb/view_models/variant.py index 2fc62d7f..1a9426de 100644 --- a/src/mavedb/view_models/variant.py +++ b/src/mavedb/view_models/variant.py @@ -1,12 +1,15 @@ from datetime import date -from typing import Any, Optional +from typing import TYPE_CHECKING, Any, Optional from pydantic import model_validator from mavedb.lib.validation.exceptions import ValidationError -from mavedb.view_models.mapped_variant import MappedVariant, SavedMappedVariant from mavedb.view_models import record_type_validator, set_record_type from mavedb.view_models.base.base import BaseModel +from mavedb.view_models.mapped_variant import MappedVariant, SavedMappedVariant + +if TYPE_CHECKING: + from mavedb.view_models.score_set import ScoreSet, ShortScoreSet class VariantEffectMeasurementBase(BaseModel): @@ -106,10 +109,3 @@ class ClingenAlleleIdVariantLookupResponse(BaseModel): exact_match: Optional[Variant] = None equivalent_nt: list[Variant] = [] equivalent_aa: list[Variant] = [] - - -# ruff: noqa: E402 -from mavedb.view_models.score_set import ScoreSet, ShortScoreSet - -VariantEffectMeasurementWithScoreSet.update_forward_refs() -VariantEffectMeasurementWithShortScoreSet.update_forward_refs() From 2e02b931c0aaf08e72ba5acbe4799a93b1aab33a Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 17 Nov 2025 14:21:47 -0800 Subject: [PATCH 02/24] feat: rebuilt Pydantic models up front for availability within entire module --- src/mavedb/__init__.py | 3 +++ src/mavedb/server_main.py | 2 -- src/mavedb/view_models/model_rebuild.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/mavedb/__init__.py b/src/mavedb/__init__.py index 60558b4a..9041300b 100644 --- a/src/mavedb/__init__.py +++ b/src/mavedb/__init__.py @@ -9,3 +9,6 @@ __version__ = "2025.5.0" logger.info(f"MaveDB {__version__}") + +# Import the model rebuild module to ensure all view model forward references are resolved +from mavedb.view_models import model_rebuild # noqa: F401, E402 diff --git a/src/mavedb/server_main.py b/src/mavedb/server_main.py index 9e615572..23717e43 100644 --- a/src/mavedb/server_main.py +++ b/src/mavedb/server_main.py @@ -18,8 +18,6 @@ UserAgentPlugin, ) -# Import the model rebuild module to ensure all view model forward references are resolved -import mavedb.view_models.model_rebuild # noqa: F401 from mavedb import __version__ from mavedb.lib.exceptions import ( AmbiguousIdentifierError, diff --git a/src/mavedb/view_models/model_rebuild.py b/src/mavedb/view_models/model_rebuild.py index a14ce591..ce738143 100644 --- a/src/mavedb/view_models/model_rebuild.py +++ b/src/mavedb/view_models/model_rebuild.py @@ -122,7 +122,7 @@ def rebuild_all_models(): remaining_models = models_for_next_pass - logger.info( + logger.debug( "Rebuilt %d Pydantic models successfully, %d models with circular dependencies remain.", successful_rebuilds, len(remaining_models), From d1641de7e4bee43e8a0c9f9283e022c5b56830ff Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 24 Nov 2025 10:33:20 -0800 Subject: [PATCH 03/24] feat: add flexible model loader for Pydantic models from JSON body or multipart form data --- src/mavedb/lib/flexible_model_loader.py | 210 +++++++++++++++ tests/lib/test_flexible_model_loader.py | 342 ++++++++++++++++++++++++ 2 files changed, 552 insertions(+) create mode 100644 src/mavedb/lib/flexible_model_loader.py create mode 100644 tests/lib/test_flexible_model_loader.py diff --git a/src/mavedb/lib/flexible_model_loader.py b/src/mavedb/lib/flexible_model_loader.py new file mode 100644 index 00000000..b3041e54 --- /dev/null +++ b/src/mavedb/lib/flexible_model_loader.py @@ -0,0 +1,210 @@ +"""Generic dependency for loading Pydantic models from either JSON body or multipart form data.""" + +from typing import Awaitable, Callable, Type, TypeVar + +from fastapi import Form, HTTPException, Request +from fastapi.exceptions import RequestValidationError +from pydantic import BaseModel, ValidationError + +T = TypeVar("T", bound=BaseModel) + + +def create_flexible_model_loader( + model_class: Type[T], form_field_name: str = "item", error_detail_prefix: str = "Invalid request" +) -> Callable[..., Awaitable[T]]: + """Create a flexible FastAPI dependency that can load a Pydantic model from either + JSON request body or multipart form data containing JSON. + + This factory function creates a dependency that enables FastAPI routes to accept + data in two formats: + 1. Standard JSON request body (Content-Type: application/json) + 2. Multipart form data with JSON string in a specified field + + This is particularly useful for endpoints that need to handle both pure JSON + requests and file uploads with accompanying metadata, allowing clients to + choose the most appropriate format for their use case. + + Args: + model_class (Type[T]): The Pydantic model class to instantiate from the JSON data. + Must be a subclass of BaseModel with proper field definitions and validation. + form_field_name (str, optional): Name of the form field containing JSON data + when using multipart/form-data requests. This parameter is primarily for + documentation purposes - the actual form field in OpenAPI docs will be + named 'item'. Defaults to "item". + error_detail_prefix (str, optional): Prefix text for error messages to provide + context about which operation failed. Defaults to "Invalid request". + + Returns: + Callable[..., Awaitable[T]]: An async dependency function that can be used + with FastAPI's Depends(). The returned function accepts a Request object + and optional form data, returning an instance of the specified model_class. + + Raises: + RequestValidationError: When the JSON data doesn't match the Pydantic model schema. + This preserves FastAPI's standard validation error format for consistent + client error handling. + HTTPException: For other parsing errors like invalid JSON syntax, missing data, + or unexpected exceptions during processing. + + Example: + Basic usage with a simple model: + + >>> from pydantic import BaseModel + >>> class UserModel(BaseModel): + ... name: str + ... email: str + + >>> user_loader = create_flexible_model_loader(UserModel) + + >>> @app.post("/users") + ... async def create_user(user: UserModel = Depends(user_loader)): + ... return {"user": user} + + Advanced usage with file uploads: + + >>> calibration_loader = create_flexible_model_loader( + ... ScoreCalibrationCreate, + ... form_field_name="calibration_metadata", + ... error_detail_prefix="Invalid calibration data" + ... ) + + >>> @app.post("/calibrations") + ... async def create_calibration( + ... calibration: ScoreCalibrationCreate = Depends(calibration_loader), + ... file: UploadFile = File(...) + ... ): + ... # Process both calibration metadata and uploaded file + ... return process_calibration(calibration, file) + + Client Usage Examples: + JSON request: + ```bash + curl -X POST "http://api/users" \\ + -H "Content-Type: application/json" \\ + -d '{"name": "John", "email": "john@example.com"}' + ``` + + Multipart form request: + ```bash + curl -X POST "http://api/calibrations" \\ + -F 'item={"name": "Test", "description": "Example"}' \\ + -F 'file=@data.csv' + ``` + + Note: + The dependency prioritizes form data over JSON body - if both are provided, + the form field data will be used. This ensures predictable behavior when + clients mix content types. + + OpenAPI Documentation Enhancement: + Without manual definition, OpenAPI docs will show the form field as 'item' for + multipart requests, regardless of the form_field_name parameter. To customize the + OpenAPI documentation and show both JSON and multipart form options clearly, use + the `openapi_extra` parameter on your route decorator: + + ```python + @router.post( + "/example-endpoint", + response_model=ExampleResponseModel, + summary="Example endpoint using flexible model loader", + description="Example endpoint description", + openapi_extra={ + "requestBody": { + "content": { + "application/json": { + "schema": {"$ref": "#/components/schemas/YourModelName"}, + "example": { + "example_field": "example_value", + "another_field": 123 + } + }, + "multipart/form-data": { + "schema": { + "type": "object", + "properties": { + "item": { + "type": "string", + "description": "JSON string containing the model data", + "example": '{"example_field":"example_value","another_field":123}' + }, + "file_upload": { + "type": "string", + "format": "binary", + "description": "Optional file upload" + } + } + } + } + }, + "description": "Data can be sent as JSON body or multipart form data" + } + } + ) + async def example_endpoint( + model_data: YourModel = Depends(your_loader), + file_upload: UploadFile = File(None) + ): + return process_data(model_data, file_upload) + ``` + + This configuration will display both content types clearly in the OpenAPI/Swagger UI, + allowing users to choose between JSON and multipart form submission methods. + """ + + async def flexible_loader( + request: Request, + item: str = Form(None, description="JSON data for the request", alias=form_field_name), + ) -> T: + """Load Pydantic model from either JSON body or form field.""" + try: + # Prefer form field if provided + if item is not None: + model_instance = model_class.model_validate_json(item) + # Fall back to JSON body + else: + body = await request.body() + if not body: + raise HTTPException( + status_code=422, detail=f"{error_detail_prefix}: No data provided in form field or request body" + ) + model_instance = model_class.model_validate_json(body) + + return model_instance + + # Raise validation errors in FastAPI's expected format + except ValidationError as e: + raise RequestValidationError(e.errors()) + # Any other parsing errors + except Exception as e: + raise HTTPException(status_code=422, detail=f"{error_detail_prefix}: {str(e)}") + + return flexible_loader + + +# Convenience factory for common use cases +def json_or_form_loader(model_class: Type[T], field_name: str = "item") -> Callable[..., Awaitable[T]]: + """Simplified factory function for creating flexible model loaders with sensible defaults. + + This is a convenience wrapper around create_flexible_model_loader() that provides + a quick way to create loaders without specifying all parameters. It automatically + generates an appropriate error message prefix based on the model class name. + + Args: + model_class (Type[T]): The Pydantic model class to load from JSON data. + field_name (str, optional): Name of the form field for documentation purposes. + Defaults to "item". + + Returns: + Callable[..., Awaitable[T]]: A flexible dependency function ready to use with Depends(). + + Example: + Quick setup for simple cases: + + >>> user_loader = json_or_form_loader(UserModel) + >>> @app.post("/users") + ... async def create_user(user: UserModel = Depends(user_loader)): + ... return user + """ + return create_flexible_model_loader( + model_class=model_class, form_field_name=field_name, error_detail_prefix=f"Invalid {model_class.__name__} data" + ) diff --git a/tests/lib/test_flexible_model_loader.py b/tests/lib/test_flexible_model_loader.py new file mode 100644 index 00000000..e9f578d7 --- /dev/null +++ b/tests/lib/test_flexible_model_loader.py @@ -0,0 +1,342 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("fastapi") + +import json +from typing import Optional +from unittest.mock import AsyncMock, Mock, patch + +from fastapi import HTTPException, Request +from fastapi.exceptions import RequestValidationError + +from mavedb.lib.flexible_model_loader import create_flexible_model_loader, json_or_form_loader +from mavedb.view_models.base.base import BaseModel + + +class SampleModel(BaseModel): + """Sample model for flexible model loader tests.""" + + name: str + age: int + email: Optional[str] = None + + +class ComplexSampleModel(BaseModel): + """More complex sample model with validation.""" + + id: int + title: str + tags: list[str] = [] + metadata: dict = {} + + +@pytest.fixture +def test_model_loader(): + """Create a flexible model loader for SampleModel.""" + return create_flexible_model_loader(SampleModel) + + +@pytest.fixture +def custom_loader(): + """Create a flexible model loader with custom parameters.""" + return create_flexible_model_loader(SampleModel, form_field_name="custom_field", error_detail_prefix="Custom error") + + +@pytest.fixture +def mock_request(): + """Create a mock FastAPI Request object.""" + request = Mock(spec=Request) + request.body = AsyncMock() + return request + + +class TestCreateFlexibleModelLoader: + """Test suite for create_flexible_model_loader function.""" + + @pytest.mark.asyncio + async def test_load_from_form_field_valid_data(self, test_model_loader, mock_request): + """Test loading valid data from form field.""" + test_data = {"name": "John", "age": 30, "email": "john@example.com"} + json_data = json.dumps(test_data) + + result = await test_model_loader(mock_request, item=json_data) + + assert isinstance(result, SampleModel) + assert result.name == "John" + assert result.age == 30 + assert result.email == "john@example.com" + + @pytest.mark.asyncio + async def test_load_from_form_field_minimal_data(self, test_model_loader, mock_request): + """Test loading minimal valid data from form field.""" + test_data = {"name": "Jane", "age": 25} + json_data = json.dumps(test_data) + + result = await test_model_loader(mock_request, item=json_data) + + assert isinstance(result, SampleModel) + assert result.name == "Jane" + assert result.age == 25 + assert result.email is None + + @pytest.mark.asyncio + async def test_load_from_json_body_valid_data(self, test_model_loader, mock_request): + """Test loading valid data from JSON body.""" + test_data = {"name": "Bob", "age": 35, "email": "bob@example.com"} + json_bytes = json.dumps(test_data).encode("utf-8") + mock_request.body.return_value = json_bytes + + result = await test_model_loader(mock_request, item=None) + + assert isinstance(result, SampleModel) + assert result.name == "Bob" + assert result.age == 35 + assert result.email == "bob@example.com" + + @pytest.mark.asyncio + async def test_form_field_takes_priority_over_json_body(self, test_model_loader, mock_request): + """Test that form field data takes priority over JSON body.""" + form_data = {"name": "FormUser", "age": 25} + body_data = {"name": "BodyUser", "age": 30} + + json_form = json.dumps(form_data) + json_body = json.dumps(body_data).encode("utf-8") + mock_request.body.return_value = json_body + + result = await test_model_loader(mock_request, item=json_form) + + assert result.name == "FormUser" + assert result.age == 25 + + @pytest.mark.asyncio + async def test_validation_error_from_form_field(self, test_model_loader, mock_request): + """Test ValidationError handling for invalid form field data.""" + invalid_data = {"name": "John"} # Missing required 'age' field + json_data = json.dumps(invalid_data) + + with pytest.raises(RequestValidationError) as exc_info: + await test_model_loader(mock_request, item=json_data) + + errors = exc_info.value.errors() + assert len(errors) > 0 + assert any(error["loc"] == ("age",) for error in errors) + + @pytest.mark.asyncio + async def test_validation_error_from_json_body(self, test_model_loader, mock_request): + """Test ValidationError handling for invalid JSON body data.""" + invalid_data = {"age": 25} # Missing required 'name' field + json_bytes = json.dumps(invalid_data).encode("utf-8") + mock_request.body.return_value = json_bytes + + with pytest.raises(RequestValidationError) as exc_info: + await test_model_loader(mock_request, item=None) + + errors = exc_info.value.errors() + assert len(errors) > 0 + assert any(error["loc"] == ("name",) for error in errors) + + @pytest.mark.asyncio + async def test_invalid_json_syntax_form_field(self, test_model_loader, mock_request): + """Test handling of invalid JSON syntax in form field.""" + invalid_json = '{"name": "John", "age":}' # Invalid JSON + + with pytest.raises(RequestValidationError) as exc_info: + await test_model_loader(mock_request, item=invalid_json) + + assert exc_info.value.errors() + assert "json_invalid" in exc_info.value.errors()[0]["type"] + + @pytest.mark.asyncio + async def test_invalid_json_syntax_body(self, test_model_loader, mock_request): + """Test handling of invalid JSON syntax in request body.""" + invalid_json = b'{"name": "John", "age":}' # Invalid JSON + mock_request.body.return_value = invalid_json + + with pytest.raises(RequestValidationError) as exc_info: + await test_model_loader(mock_request, item=None) + + assert exc_info.value.errors() + assert "json_invalid" in exc_info.value.errors()[0]["type"] + + @pytest.mark.asyncio + async def test_empty_request_body_and_no_form_field(self, test_model_loader, mock_request): + """Test handling when no data is provided in either form field or body.""" + mock_request.body.return_value = b"" + + with pytest.raises(HTTPException) as exc_info: + await test_model_loader(mock_request, item=None) + + assert exc_info.value.status_code == 422 + assert "No data provided in form field or request body" in exc_info.value.detail + + @pytest.mark.asyncio + async def test_custom_error_detail_prefix(self, custom_loader, mock_request): + """Test custom error detail prefix is used in error messages.""" + mock_request.body.return_value = b"" + + with pytest.raises(HTTPException) as exc_info: + await custom_loader(mock_request, item=None) + + assert exc_info.value.status_code == 422 + assert "Custom error" in exc_info.value.detail + + @pytest.mark.asyncio + async def test_complex_model_with_nested_data(self, mock_request): + """Test loading complex model with nested data structures.""" + complex_loader = create_flexible_model_loader(ComplexSampleModel) + test_data = { + "id": 1, + "title": "Test Item", + "tags": ["tag1", "tag2", "tag3"], + "metadata": {"key1": "value1", "key2": {"nested": "value"}}, + } + json_data = json.dumps(test_data) + + result = await complex_loader(mock_request, item=json_data) + + assert isinstance(result, ComplexSampleModel) + assert result.id == 1 + assert result.title == "Test Item" + assert result.tags == ["tag1", "tag2", "tag3"] + assert result.metadata == {"key1": "value1", "key2": {"nested": "value"}} + + @pytest.mark.asyncio + async def test_form_field_name_parameter_documentation_only(self, mock_request): + """Test that form_field_name parameter doesn't affect functionality.""" + # Create loaders with different form_field_name values + loader1 = create_flexible_model_loader(SampleModel, form_field_name="item") + loader2 = create_flexible_model_loader(SampleModel, form_field_name="custom_name") + + test_data = {"name": "Test", "age": 30} + json_data = json.dumps(test_data) + + # Both should work the same way since form_field_name is for docs only + result1 = await loader1(mock_request, item=json_data) + result2 = await loader2(mock_request, item=json_data) + + assert result1.name == result2.name == "Test" + assert result1.age == result2.age == 30 + + @pytest.mark.asyncio + async def test_exception_handling_for_unexpected_errors(self, test_model_loader, mock_request): + """Test handling of unexpected exceptions during processing.""" + # Mock an exception during model validation + with patch.object(SampleModel, "model_validate_json", side_effect=RuntimeError("Unexpected error")): + test_data = {"name": "John", "age": 30} + json_data = json.dumps(test_data) + + with pytest.raises(HTTPException) as exc_info: + await test_model_loader(mock_request, item=json_data) + + assert exc_info.value.status_code == 422 + assert "Unexpected error" in exc_info.value.detail + + @pytest.mark.asyncio + async def test_unicode_data_handling(self, test_model_loader, mock_request): + """Test handling of unicode characters in data.""" + test_data = {"name": "José María", "age": 25, "email": "josé@example.com"} + json_data = json.dumps(test_data, ensure_ascii=False) + + result = await test_model_loader(mock_request, item=json_data) + + assert result.name == "José María" + assert result.email == "josé@example.com" + + +class TestJsonOrFormLoader: + """Test suite for json_or_form_loader convenience function.""" + + @pytest.mark.asyncio + async def test_convenience_function_basic_usage(self, mock_request): + """Test the convenience function with basic usage.""" + loader = json_or_form_loader(SampleModel) + test_data = {"name": "Alice", "age": 28} + json_data = json.dumps(test_data) + + result = await loader(mock_request, item=json_data) + + assert isinstance(result, SampleModel) + assert result.name == "Alice" + assert result.age == 28 + + @pytest.mark.asyncio + async def test_convenience_function_custom_field_name(self, mock_request): + """Test the convenience function with custom field name.""" + loader = json_or_form_loader(SampleModel, field_name="custom_field") + test_data = {"name": "Charlie", "age": 35} + json_data = json.dumps(test_data) + + result = await loader(mock_request, item=json_data) + + assert isinstance(result, SampleModel) + assert result.name == "Charlie" + assert result.age == 35 + + @pytest.mark.asyncio + async def test_convenience_function_error_message_format(self, mock_request): + """Test that convenience function generates appropriate error messages.""" + loader = json_or_form_loader(SampleModel) + mock_request.body.return_value = b"" + + with pytest.raises(HTTPException) as exc_info: + await loader(mock_request, item=None) + + assert exc_info.value.status_code == 422 + assert "Invalid SampleModel data" in exc_info.value.detail + + @pytest.mark.asyncio + async def test_convenience_function_with_complex_model(self, mock_request): + """Test convenience function with more complex model.""" + loader = json_or_form_loader(ComplexSampleModel) + test_data = {"id": 42, "title": "Complex Test", "tags": ["test", "complex"], "metadata": {"source": "test"}} + json_data = json.dumps(test_data) + + result = await loader(mock_request, item=json_data) + + assert isinstance(result, ComplexSampleModel) + assert result.id == 42 + assert result.title == "Complex Test" + assert result.tags == ["test", "complex"] + assert result.metadata == {"source": "test"} + + +class TestEdgeCases: + """Test edge cases and boundary conditions.""" + + @pytest.mark.asyncio + async def test_empty_string_form_field(self, test_model_loader, mock_request): + """Test handling of empty string in form field.""" + with pytest.raises(RequestValidationError) as exc_info: + await test_model_loader(mock_request, item="") + + assert exc_info.value.errors() + assert "json_invalid" in exc_info.value.errors()[0]["type"] + + @pytest.mark.asyncio + async def test_whitespace_only_form_field(self, test_model_loader, mock_request): + """Test handling of whitespace-only form field.""" + with pytest.raises(RequestValidationError) as exc_info: + await test_model_loader(mock_request, item=" ") + + assert exc_info.value.errors() + assert "json_invalid" in exc_info.value.errors()[0]["type"] + + @pytest.mark.asyncio + async def test_null_json_value(self, test_model_loader, mock_request): + """Test handling of null JSON value.""" + with pytest.raises(RequestValidationError) as exc_info: + await test_model_loader(mock_request, item="null") + + assert exc_info.value.errors() + assert "model_type" in exc_info.value.errors()[0]["type"] + + @pytest.mark.asyncio + async def test_array_json_value(self, test_model_loader, mock_request): + """Test handling of array JSON value instead of object.""" + with pytest.raises(RequestValidationError) as exc_info: + await test_model_loader(mock_request, item='["not", "an", "object"]') + + assert exc_info.value.errors() + assert "model_type" in exc_info.value.errors()[0]["type"] From 8c8292c48ccdd31a854fbad07cc8044da99a5505 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 17 Nov 2025 22:18:17 -0800 Subject: [PATCH 04/24] feat: Introduce Score Calibration Functional Classification - Added new SQLAlchemy model `ScoreCalibrationFunctionalClassification` to represent functional classifications associated with score calibrations. - Established relationships between `ScoreCalibration` and `ScoreCalibrationFunctionalClassification`. - Created an association table for many-to-many relationships between functional classifications and variants. - Updated view models to accommodate new functional classification structures, including validation for inclusive bounds. - Enhanced tests to cover new functionality, including creation and validation of functional classifications. - Refactored existing code to ensure compatibility with new models and relationships. --- .../migrate_jsonb_ranges_to_table_rows.py | 374 ++++++++++++++++++ ...add_acmg_classification_and_functional_.py | 142 +++++++ src/mavedb/lib/acmg.py | 115 +++--- src/mavedb/lib/annotation/classification.py | 5 +- src/mavedb/lib/score_calibrations.py | 85 +++- src/mavedb/models/__init__.py | 6 +- src/mavedb/models/acmg_classification.py | 26 ++ src/mavedb/models/enums/acmg_criterion.py | 44 +++ .../models/enums/functional_classification.py | 7 + .../models/enums/strength_of_evidence.py | 11 + src/mavedb/models/score_calibration.py | 8 +- ...e_calibration_functional_classification.py | 79 ++++ ...onal_classification_variant_association.py | 14 + src/mavedb/models/variant.py | 3 + src/mavedb/view_models/acmg_classification.py | 16 +- src/mavedb/view_models/score_calibration.py | 80 +++- tests/helpers/constants.py | 19 +- tests/lib/annotation/test_classification.py | 2 +- tests/lib/conftest.py | 38 +- tests/lib/test_acmg.py | 158 ++++++++ tests/lib/test_score_calibrations.py | 217 +++++++++- tests/view_models/test_acmg_classification.py | 25 +- tests/view_models/test_score_calibration.py | 46 ++- 23 files changed, 1408 insertions(+), 112 deletions(-) create mode 100644 alembic/manual_migrations/migrate_jsonb_ranges_to_table_rows.py create mode 100644 alembic/versions/16beeb593513_add_acmg_classification_and_functional_.py create mode 100644 src/mavedb/models/acmg_classification.py create mode 100644 src/mavedb/models/enums/acmg_criterion.py create mode 100644 src/mavedb/models/enums/functional_classification.py create mode 100644 src/mavedb/models/enums/strength_of_evidence.py create mode 100644 src/mavedb/models/score_calibration_functional_classification.py create mode 100644 src/mavedb/models/score_calibration_functional_classification_variant_association.py diff --git a/alembic/manual_migrations/migrate_jsonb_ranges_to_table_rows.py b/alembic/manual_migrations/migrate_jsonb_ranges_to_table_rows.py new file mode 100644 index 00000000..f5219369 --- /dev/null +++ b/alembic/manual_migrations/migrate_jsonb_ranges_to_table_rows.py @@ -0,0 +1,374 @@ +""" +Migration script to convert JSONB functional_ranges to the new row-based implementation. + +This script migrates data from ScoreCalibration.functional_ranges (JSONB column) +to the new ScoreCalibrationFunctionalClassification table with proper foreign key relationships. +""" +from typing import Any, Dict + +import sqlalchemy as sa +from sqlalchemy.orm import Session, configure_mappers + +from mavedb.models import * +from mavedb.db.session import SessionLocal +from mavedb.models.acmg_classification import ACMGClassification +from mavedb.models.enums.acmg_criterion import ACMGCriterion +from mavedb.models.enums.functional_classification import FunctionalClassification +from mavedb.models.enums.strength_of_evidence import StrengthOfEvidenceProvided +from mavedb.models.score_calibration import ScoreCalibration +from mavedb.models.score_calibration_functional_classification import ScoreCalibrationFunctionalClassification +from mavedb.models.score_calibration_functional_classification_variant_association import ( + score_calibration_functional_classification_variants_association_table +) +from mavedb.models.variant import Variant +from mavedb.view_models.acmg_classification import ACMGClassificationCreate + +configure_mappers() + + +def populate_variant_associations( + db: Session, + functional_classification: ScoreCalibrationFunctionalClassification, + calibration: ScoreCalibration, +) -> int: + """Populate the association table with variants that fall within this functional range.""" + # Create a view model instance to use the existing range checking logic + if not functional_classification or not functional_classification.range: + print(f" Skipping variant association - no valid range or view model") + return 0 + + print(f" Finding variants within range {functional_classification.range} (lower_inclusive={functional_classification.inclusive_lower_bound}, upper_inclusive={functional_classification.inclusive_upper_bound})") + + # Get all variants for this score set and their scores + variants_query = db.execute(sa.select(Variant).where( + Variant.score_set_id == calibration.score_set_id, + )).scalars().all() + + variants_in_range = [] + total_variants = 0 + + for variant in variants_query: + total_variants += 1 + + # Extract score from JSONB data + try: + score_data = variant.data.get("score_data", {}).get("score") if variant.data else None + if score_data is not None: + variant_score = float(score_data) + + # Use the existing view model method for range checking + if functional_classification.score_is_contained_in_range(variant_score): + variants_in_range.append(variant) + + except (ValueError, TypeError) as e: + print(f" Warning: Could not parse score for variant {variant.id}: {e}") + continue + + print(f" Found {len(variants_in_range)} variants in range out of {total_variants} total variants") + + # Bulk insert associations + if variants_in_range: + associations = [ + { + "functional_classification_id": functional_classification.id, + "variant_id": variant.id + } + for variant in variants_in_range + ] + + db.execute( + score_calibration_functional_classification_variants_association_table.insert(), + associations + ) + + return len(variants_in_range) + + +def migrate_functional_range_to_row( + db: Session, + calibration: ScoreCalibration, + functional_range: Dict[str, Any], + acmg_classification_cache: Dict[str, ACMGClassification] +) -> ScoreCalibrationFunctionalClassification: + """Convert a single functional range from JSONB to table row.""" + + # Handle ACMG classification if present + acmg_classification_id = None + acmg_data = functional_range.get("acmg_classification") + if acmg_data: + # Create a cache key for the ACMG classification + criterion = acmg_data.get("criterion").upper() if acmg_data.get("criterion") else None + evidence_strength = acmg_data.get("evidence_strength").upper() if acmg_data.get("evidence_strength") else None + points = acmg_data.get("points") + + classification = ACMGClassificationCreate( + criterion=ACMGCriterion(criterion) if criterion else None, + evidence_strength=StrengthOfEvidenceProvided(evidence_strength) if evidence_strength else None, + points=points + ) + + cache_key = f"{classification.criterion}_{classification.evidence_strength}_{classification.points}" + + if cache_key not in acmg_classification_cache: + # Create new ACMG classification + acmg_classification = ACMGClassification( + criterion=classification.criterion, + evidence_strength=classification.evidence_strength, + points=classification.points + ) + db.add(acmg_classification) + db.flush() # Get the ID + acmg_classification_cache[cache_key] = acmg_classification + + acmg_classification_id = acmg_classification_cache[cache_key].id + + # Create the functional classification row + functional_classification = ScoreCalibrationFunctionalClassification( + calibration_id=calibration.id, + label=functional_range.get("label", ""), + description=functional_range.get("description"), + classification=FunctionalClassification(functional_range.get("classification", "not_specified")), + range=functional_range.get("range"), + inclusive_lower_bound=functional_range.get("inclusive_lower_bound"), + inclusive_upper_bound=functional_range.get("inclusive_upper_bound"), + oddspaths_ratio=functional_range.get("oddspaths_ratio"), + positive_likelihood_ratio=functional_range.get("positive_likelihood_ratio"), + acmg_classification_id=acmg_classification_id + ) + + return functional_classification + + +def do_migration(db: Session): + """Main migration function.""" + print("Starting migration of JSONB functional_ranges to table rows...") + + # Find all calibrations with functional_ranges + calibrations_with_ranges = db.scalars( + sa.select(ScoreCalibration).where(ScoreCalibration.functional_ranges_deprecated_json.isnot(None)) + ).all() + + print(f"Found {len(calibrations_with_ranges)} calibrations with functional ranges to migrate.") + + # Cache for ACMG classifications to avoid duplicates + acmg_classification_cache: Dict[str, ACMGClassification] = {} + + migrated_count = 0 + error_count = 0 + + for calibration in calibrations_with_ranges: + try: + print(f"Migrating calibration {calibration.id} (URN: {calibration.urn})...") + + functional_ranges_data = calibration.functional_ranges_deprecated_json + if not functional_ranges_data or not isinstance(functional_ranges_data, list): + print(f" Skipping calibration {calibration.id} - no valid functional ranges data") + continue + + # Create functional classification rows for each range + functional_classifications = [] + for i, functional_range in enumerate(functional_ranges_data): + try: + functional_classification = migrate_functional_range_to_row( + db, calibration, functional_range, acmg_classification_cache + ) + db.add(functional_classification) + functional_classifications.append(functional_classification) + print(f" Created functional classification row {i+1}/{len(functional_ranges_data)}") + + except Exception as e: + print(f" Error migrating functional range {i+1} for calibration {calibration.id}: {e}") + error_count += 1 + continue + + # Flush to get IDs for the functional classifications + db.flush() + + # Populate variant associations for each functional classification + total_associations = 0 + for functional_classification in functional_classifications: + try: + associations_count = populate_variant_associations( + db, functional_classification, calibration + ) + total_associations += associations_count + + except Exception as e: + print(f" Error populating variant associations for functional classification {functional_classification.id}: {e}") + error_count += 1 + continue + + print(f" Created {total_associations} variant associations") + + # Commit the changes for this calibration + db.commit() + migrated_count += 1 + print(f" Successfully migrated calibration {calibration.id}") + + except Exception as e: + print(f"Error migrating calibration {calibration.id}: {e}") + db.rollback() + error_count += 1 + continue + + # Final statistics + total_functional_classifications = db.scalar( + sa.select(sa.func.count(ScoreCalibrationFunctionalClassification.id)) + ) + + total_associations = db.scalar( + sa.select(sa.func.count()).select_from( + score_calibration_functional_classification_variants_association_table + ) + ) or 0 + + print(f"\nMigration completed:") + print(f" Successfully migrated: {migrated_count} calibrations") + print(f" Functional classification rows created: {total_functional_classifications}") + print(f" Variant associations created: {total_associations}") + print(f" ACMG classifications created: {len(acmg_classification_cache)}") + print(f" Errors encountered: {error_count}") + + +def verify_migration(db: Session): + """Verify that the migration was successful.""" + print("\nVerifying migration...") + + # Count original calibrations with functional ranges + original_count = db.scalar( + sa.select(sa.func.count(ScoreCalibration.id)).where( + ScoreCalibration.functional_ranges_deprecated_json.isnot(None) + ) + ) + + # Count migrated functional classifications + migrated_count = db.scalar( + sa.select(sa.func.count(ScoreCalibrationFunctionalClassification.id)) + ) + + # Count ACMG classifications + acmg_count = db.scalar( + sa.select(sa.func.count(ACMGClassification.id)) + ) + + # Count variant associations + association_count = db.scalar( + sa.select(sa.func.count()).select_from( + score_calibration_functional_classification_variants_association_table + ) + ) + + print(f"Original calibrations with functional ranges: {original_count}") + print(f"Migrated functional classification rows: {migrated_count}") + print(f"ACMG classification records: {acmg_count}") + print(f"Variant associations created: {association_count}") + + # Sample verification - check that relationships work + sample_classification = db.scalar( + sa.select(ScoreCalibrationFunctionalClassification).limit(1) + ) + + if sample_classification: + print(f"\nSample verification:") + print(f" Functional classification ID: {sample_classification.id}") + print(f" Label: {sample_classification.label}") + print(f" Classification: {sample_classification.classification}") + print(f" Range: {sample_classification.range}") + print(f" Calibration ID: {sample_classification.calibration_id}") + print(f" ACMG classification ID: {sample_classification.acmg_classification_id}") + + # Count variants associated with this classification + variant_count = db.scalar( + sa.select(sa.func.count()).select_from( + score_calibration_functional_classification_variants_association_table + ).where( + score_calibration_functional_classification_variants_association_table.c.functional_classification_id == sample_classification.id + ) + ) + print(f" Associated variants: {variant_count}") + + # Functional classifications by type + classification_stats = db.execute( + sa.select( + ScoreCalibrationFunctionalClassification.classification, + sa.func.count().label('count') + ).group_by(ScoreCalibrationFunctionalClassification.classification) + ).all() + + for classification, count in classification_stats: + print(f"{classification}: {count} ranges") + + + +def rollback_migration(db: Session): + """Rollback the migration by deleting all migrated data.""" + print("Rolling back migration...") + + # Count records before deletion + functional_count = db.scalar( + sa.select(sa.func.count(ScoreCalibrationFunctionalClassification.id)) + ) + + acmg_count = db.scalar( + sa.select(sa.func.count(ACMGClassification.id)) + ) + + association_count = db.scalar( + sa.select(sa.func.count()).select_from( + score_calibration_functional_classification_variants_association_table + ) + ) + + # Delete in correct order (associations first, then functional classifications, then ACMG) + db.execute(sa.delete(score_calibration_functional_classification_variants_association_table)) + db.execute(sa.delete(ScoreCalibrationFunctionalClassification)) + db.execute(sa.delete(ACMGClassification)) + db.commit() + + print(f"Deleted {association_count} variant associations") + print(f"Deleted {functional_count} functional classification rows") + print(f"Deleted {acmg_count} ACMG classification rows") + + +def show_usage(): + """Show usage information.""" + print(""" +Usage: python migrate_jsonb_ranges_to_table_rows.py [command] + +Commands: + migrate (default) - Migrate JSONB functional_ranges to table rows + verify - Verify migration without running it + rollback - Remove all migrated data (destructive!) + +Examples: + python migrate_jsonb_ranges_to_table_rows.py # Run migration + python migrate_jsonb_ranges_to_table_rows.py verify # Check status + python migrate_jsonb_ranges_to_table_rows.py rollback # Undo migration +""") + + +if __name__ == "__main__": + import sys + + command = sys.argv[1] if len(sys.argv) > 1 else "migrate" + + if command == "help" or command == "--help" or command == "-h": + show_usage() + elif command == "rollback": + print("WARNING: This will delete all migrated functional classification data!") + response = input("Are you sure you want to continue? (y/N): ") + if response.lower() == 'y': + with SessionLocal() as db: + rollback_migration(db) + else: + print("Rollback cancelled.") + elif command == "verify": + with SessionLocal() as db: + verify_migration(db) + elif command == "migrate": + with SessionLocal() as db: + do_migration(db) + verify_migration(db) + else: + print(f"Unknown command: {command}") + show_usage() diff --git a/alembic/versions/16beeb593513_add_acmg_classification_and_functional_.py b/alembic/versions/16beeb593513_add_acmg_classification_and_functional_.py new file mode 100644 index 00000000..53e812bb --- /dev/null +++ b/alembic/versions/16beeb593513_add_acmg_classification_and_functional_.py @@ -0,0 +1,142 @@ +"""add acmg classification and functional classification tables + +Revision ID: 16beeb593513 +Revises: b22b450d409c +Create Date: 2025-11-17 11:46:38.276980 + +""" + +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "16beeb593513" +down_revision = "b22b450d409c" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "acmg_classifications", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "criterion", + sa.Enum( + "PVS1", + "PS1", + "PS2", + "PS3", + "PS4", + "PM1", + "PM2", + "PM3", + "PM4", + "PM5", + "PM6", + "PP1", + "PP2", + "PP3", + "PP4", + "PP5", + "BA1", + "BS1", + "BS2", + "BS3", + "BS4", + "BP1", + "BP2", + "BP3", + "BP4", + "BP5", + "BP6", + "BP7", + name="acmgcriterion", + native_enum=False, + length=32, + ), + nullable=True, + ), + sa.Column( + "evidence_strength", + sa.Enum( + "VERY_STRONG", + "STRONG", + "MODERATE_PLUS", + "MODERATE", + "SUPPORTING", + name="strengthofevidenceprovided", + native_enum=False, + length=32, + ), + nullable=True, + ), + sa.Column("points", sa.Integer(), nullable=True), + sa.Column("creation_date", sa.Date(), nullable=False), + sa.Column("modification_date", sa.Date(), nullable=False), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "score_calibration_functional_classifications", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("calibration_id", sa.Integer(), nullable=False), + sa.Column("label", sa.String(), nullable=False), + sa.Column("description", sa.String(), nullable=True), + sa.Column( + "classification", + sa.Enum( + "normal", "abnormal", "not_specified", name="functionalclassification", native_enum=False, length=32 + ), + nullable=False, + ), + sa.Column("range", postgresql.JSONB(none_as_null=True, astext_type=sa.Text()), nullable=True), + sa.Column("inclusive_lower_bound", sa.Boolean(), nullable=True), + sa.Column("inclusive_upper_bound", sa.Boolean(), nullable=True), + sa.Column("oddspaths_ratio", sa.Float(), nullable=True), + sa.Column("positive_likelihood_ratio", sa.Float(), nullable=True), + sa.Column("acmg_classification_id", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ["acmg_classification_id"], + ["acmg_classifications.id"], + ), + sa.ForeignKeyConstraint( + ["calibration_id"], + ["score_calibrations.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "score_calibration_functional_classification_variants", + sa.Column("functional_classification_id", sa.Integer(), nullable=False), + sa.Column("variant_id", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint( + ["functional_classification_id"], + ["score_calibration_functional_classifications.id"], + ), + sa.ForeignKeyConstraint( + ["variant_id"], + ["variants.id"], + ), + sa.PrimaryKeyConstraint("functional_classification_id", "variant_id"), + ) + op.alter_column("score_calibrations", "functional_ranges", new_column_name="functional_ranges_deprecated_json") + op.create_index(op.f("ix_score_calibrations_created_by_id"), "score_calibrations", ["created_by_id"], unique=False) + op.create_index( + op.f("ix_score_calibrations_modified_by_id"), "score_calibrations", ["modified_by_id"], unique=False + ) + op.create_index(op.f("ix_score_calibrations_urn"), "score_calibrations", ["urn"], unique=True) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f("ix_score_calibrations_urn"), table_name="score_calibrations") + op.drop_index(op.f("ix_score_calibrations_modified_by_id"), table_name="score_calibrations") + op.drop_index(op.f("ix_score_calibrations_created_by_id"), table_name="score_calibrations") + op.drop_table("score_calibration_functional_classification_variants") + op.drop_table("score_calibration_functional_classifications") + op.drop_table("acmg_classifications") + # ### end Alembic commands ### diff --git a/src/mavedb/lib/acmg.py b/src/mavedb/lib/acmg.py index 971923c2..d7de860e 100644 --- a/src/mavedb/lib/acmg.py +++ b/src/mavedb/lib/acmg.py @@ -1,58 +1,11 @@ -from enum import Enum from typing import Optional +from sqlalchemy import select +from sqlalchemy.orm import Session -class ACMGCriterion(str, Enum): - """Enum for ACMG criteria codes.""" - - PVS1 = "PVS1" - PS1 = "PS1" - PS2 = "PS2" - PS3 = "PS3" - PS4 = "PS4" - PM1 = "PM1" - PM2 = "PM2" - PM3 = "PM3" - PM4 = "PM4" - PM5 = "PM5" - PM6 = "PM6" - PP1 = "PP1" - PP2 = "PP2" - PP3 = "PP3" - PP4 = "PP4" - PP5 = "PP5" - BA1 = "BA1" - BS1 = "BS1" - BS2 = "BS2" - BS3 = "BS3" - BS4 = "BS4" - BP1 = "BP1" - BP2 = "BP2" - BP3 = "BP3" - BP4 = "BP4" - BP5 = "BP5" - BP6 = "BP6" - BP7 = "BP7" - - @property - def is_pathogenic(self) -> bool: - """Return True if the criterion is pathogenic, False if benign.""" - return self.name.startswith("P") # PVS, PS, PM, PP are pathogenic criteria - - @property - def is_benign(self) -> bool: - """Return True if the criterion is benign, False if pathogenic.""" - return self.name.startswith("B") # BA, BS, BP are benign criteria - - -class StrengthOfEvidenceProvided(str, Enum): - """Enum for strength of evidence provided.""" - - VERY_STRONG = "very_strong" - STRONG = "strong" - MODERATE_PLUS = "moderate_plus" - MODERATE = "moderate" - SUPPORTING = "supporting" +from mavedb.models.acmg_classification import ACMGClassification +from mavedb.models.enums.acmg_criterion import ACMGCriterion +from mavedb.models.enums.strength_of_evidence import StrengthOfEvidenceProvided def points_evidence_strength_equivalent( @@ -121,3 +74,61 @@ def points_evidence_strength_equivalent( return (ACMGCriterion.BS3, StrengthOfEvidenceProvided.STRONG) else: # points <= -8 return (ACMGCriterion.BS3, StrengthOfEvidenceProvided.VERY_STRONG) + + +def find_or_create_acmg_classification( + db: Session, + criterion: Optional[ACMGCriterion], + evidence_strength: Optional[StrengthOfEvidenceProvided], + points: Optional[int], +): + """Create or find an ACMG classification based on criterion, evidence strength, and points. + + Parameters + ---------- + db : Session + The database session to use for querying and creating the ACMG classification. + criterion : Optional[ACMGCriterion] + The ACMG criterion for the classification. + evidence_strength : Optional[StrengthOfEvidenceProvided] + The strength of evidence provided for the classification. + points : Optional[int] + The point value associated with the classification. + + Returns + ------- + ACMGClassification + The existing or newly created ACMG classification instance. + + Raises + ------ + ValueError + If the combination of criterion, evidence strength, and points does not correspond to a valid ACMG classification. + + Notes + ----- + - This function does not commit the new entry to the database; the caller is responsible for committing the session. + """ + if (criterion is None) != (evidence_strength is None): + raise ValueError("Both criterion and evidence_strength must be provided together or both be None, with points.") + elif criterion is None and evidence_strength is None and points is not None: + criterion, evidence_strength = points_evidence_strength_equivalent(points) + + # If we cannot infer a classification, return None + if criterion is None and evidence_strength is None: + return None + + acmg_classification = db.execute( + select(ACMGClassification) + .where(ACMGClassification.criterion == criterion) + .where(ACMGClassification.evidence_strength == evidence_strength) + .where(ACMGClassification.points == points) + ).scalar_one_or_none() + + if not acmg_classification: + acmg_classification = ACMGClassification( + criterion=criterion, evidence_strength=evidence_strength, points=points + ) + db.add(acmg_classification) + + return acmg_classification diff --git a/src/mavedb/lib/annotation/classification.py b/src/mavedb/lib/annotation/classification.py index 9bf7526b..e15c8327 100644 --- a/src/mavedb/lib/annotation/classification.py +++ b/src/mavedb/lib/annotation/classification.py @@ -5,6 +5,7 @@ from ga4gh.va_spec.acmg_2015 import VariantPathogenicityEvidenceLine from ga4gh.va_spec.base.enums import StrengthOfEvidenceProvided +from mavedb.models.enums.functional_classification import FunctionalClassification from mavedb.models.mapped_variant import MappedVariant from mavedb.view_models.score_calibration import FunctionalRange @@ -62,9 +63,9 @@ def functional_classification_of_variant( functional_range_view = FunctionalRange.model_validate(functional_range) if functional_range_view.is_contained_by_range(functional_score): - if functional_range_view.classification == "normal": + if functional_range_view.classification is FunctionalClassification.normal: return ExperimentalVariantFunctionalImpactClassification.NORMAL - elif functional_range_view.classification == "abnormal": + elif functional_range_view.classification is FunctionalClassification.abnormal: return ExperimentalVariantFunctionalImpactClassification.ABNORMAL else: return ExperimentalVariantFunctionalImpactClassification.INDETERMINATE diff --git a/src/mavedb/lib/score_calibrations.py b/src/mavedb/lib/score_calibrations.py index cc67673a..cdf09f75 100644 --- a/src/mavedb/lib/score_calibrations.py +++ b/src/mavedb/lib/score_calibrations.py @@ -1,16 +1,75 @@ """Utilities for building and mutating score calibration ORM objects.""" +from typing import Union + from sqlalchemy.orm import Session +from mavedb.lib.acmg import find_or_create_acmg_classification from mavedb.lib.identifiers import find_or_create_publication_identifier from mavedb.models.enums.score_calibration_relation import ScoreCalibrationRelation from mavedb.models.score_calibration import ScoreCalibration -from mavedb.models.score_set import ScoreSet +from mavedb.models.score_calibration_functional_classification import ScoreCalibrationFunctionalClassification from mavedb.models.score_calibration_publication_identifier import ScoreCalibrationPublicationIdentifierAssociation +from mavedb.models.score_set import ScoreSet from mavedb.models.user import User from mavedb.view_models import score_calibration +def create_functional_classification( + db: Session, + functional_range_create: Union[score_calibration.FunctionalRangeCreate, score_calibration.FunctionalRangeModify], + containing_calibration: ScoreCalibration, +) -> ScoreCalibrationFunctionalClassification: + """ + Create a functional classification entity for score calibration. + This function creates a new ScoreCalibrationFunctionalClassification object + based on the provided functional range data. It optionally creates or finds + an associated ACMG classification if one is specified in the input data. + + Args: + db (Session): Database session for performing database operations. + functional_range_create (score_calibration.FunctionalRangeCreate): + Input data containing the functional range parameters including label, + description, range bounds, inclusivity flags, and optional ACMG + classification information. + + Returns: + ScoreCalibrationFunctionalClassification: The newly created functional + classification entity that has been added to the database session. + + Note: + The function adds the created functional classification to the database + session but does not commit the transaction. The caller is responsible + for committing the changes. + """ + acmg_classification = None + if functional_range_create.acmg_classification: + acmg_classification = find_or_create_acmg_classification( + db, + criterion=functional_range_create.acmg_classification.criterion, + evidence_strength=functional_range_create.acmg_classification.evidence_strength, + points=functional_range_create.acmg_classification.points, + ) + else: + acmg_classification = None + + functional_classification = ScoreCalibrationFunctionalClassification( + label=functional_range_create.label, + description=functional_range_create.description, + range=functional_range_create.range, + inclusive_lower_bound=functional_range_create.inclusive_lower_bound, + inclusive_upper_bound=functional_range_create.inclusive_upper_bound, + acmg_classification=acmg_classification, + classification=functional_range_create.classification, + oddspaths_ratio=functional_range_create.oddspaths_ratio, # type: ignore[arg-type] + positive_likelihood_ratio=functional_range_create.positive_likelihood_ratio, # type: ignore[arg-type] + acmg_classification_id=acmg_classification.id if acmg_classification else None, + calibration=containing_calibration, + ) + + return functional_classification + + async def _create_score_calibration( db: Session, calibration_create: score_calibration.ScoreCalibrationCreate, user: User ) -> ScoreCalibration: @@ -89,6 +148,7 @@ async def _create_score_calibration( **calibration_create.model_dump( by_alias=False, exclude={ + "functional_ranges", "threshold_sources", "classification_sources", "method_sources", @@ -96,10 +156,18 @@ async def _create_score_calibration( }, ), publication_identifier_associations=calibration_pub_assocs, + functional_ranges=[], created_by=user, modified_by=user, ) # type: ignore[call-arg] + for functional_range_create in calibration_create.functional_ranges or []: + persisted_functional_range = create_functional_classification( + db, functional_range_create, containing_calibration=calibration + ) + db.add(persisted_functional_range) + calibration.functional_ranges.append(persisted_functional_range) + return calibration @@ -328,12 +396,18 @@ async def modify_score_calibration( db.add(pub) db.flush() - # Remove associations that are no longer present + # Remove associations and calibrations that are no longer present for assoc in existing_assocs_map.values(): db.delete(assoc) + for functional_classification in calibration.functional_ranges: + db.delete(functional_classification) + calibration.functional_ranges.clear() + db.flush() + db.refresh(calibration) for attr, value in calibration_update.model_dump().items(): if attr not in { + "functional_ranges", "threshold_sources", "classification_sources", "method_sources", @@ -349,6 +423,13 @@ async def modify_score_calibration( calibration.publication_identifier_associations = updated_assocs calibration.modified_by = user + for functional_range_update in calibration_update.functional_ranges or []: + persisted_functional_range = create_functional_classification( + db, functional_range_update, containing_calibration=calibration + ) + db.add(persisted_functional_range) + calibration.functional_ranges.append(persisted_functional_range) + db.add(calibration) return calibration diff --git a/src/mavedb/models/__init__.py b/src/mavedb/models/__init__.py index 684b3c98..1a20b792 100644 --- a/src/mavedb/models/__init__.py +++ b/src/mavedb/models/__init__.py @@ -1,5 +1,6 @@ __all__ = [ "access_key", + "acmg_classification", "collection", "clinical_control", "controlled_keyword", @@ -19,8 +20,11 @@ "refseq_identifier", "refseq_offset", "role", - "score_set", + "score_calibration_functional_classification_variant_association", + "score_calibration_functional_classification", + "score_calibration_publication_identifier", "score_calibration", + "score_set", "target_gene", "target_sequence", "taxonomy", diff --git a/src/mavedb/models/acmg_classification.py b/src/mavedb/models/acmg_classification.py new file mode 100644 index 00000000..027a2caa --- /dev/null +++ b/src/mavedb/models/acmg_classification.py @@ -0,0 +1,26 @@ +"""SQLAlchemy model for ACMG classification entities.""" + +from datetime import date + +from sqlalchemy import Column, Date, Enum, Integer + +from mavedb.db.base import Base +from mavedb.models.enums.acmg_criterion import ACMGCriterion +from mavedb.models.enums.strength_of_evidence import StrengthOfEvidenceProvided + + +class ACMGClassification(Base): + """ACMG classification model for storing ACMG criteria, evidence strength, and points.""" + + __tablename__ = "acmg_classifications" + + id = Column(Integer, primary_key=True) + + criterion = Column(Enum(ACMGCriterion, native_enum=False, validate_strings=True, length=32), nullable=True) + evidence_strength = Column( + Enum(StrengthOfEvidenceProvided, native_enum=False, validate_strings=True, length=32), nullable=True + ) + points = Column(Integer, nullable=True) + + creation_date = Column(Date, nullable=False, default=date.today) + modification_date = Column(Date, nullable=False, default=date.today, onupdate=date.today) diff --git a/src/mavedb/models/enums/acmg_criterion.py b/src/mavedb/models/enums/acmg_criterion.py new file mode 100644 index 00000000..1c5435eb --- /dev/null +++ b/src/mavedb/models/enums/acmg_criterion.py @@ -0,0 +1,44 @@ +import enum + + +class ACMGCriterion(enum.Enum): + """Enum for ACMG criteria codes.""" + + PVS1 = "PVS1" + PS1 = "PS1" + PS2 = "PS2" + PS3 = "PS3" + PS4 = "PS4" + PM1 = "PM1" + PM2 = "PM2" + PM3 = "PM3" + PM4 = "PM4" + PM5 = "PM5" + PM6 = "PM6" + PP1 = "PP1" + PP2 = "PP2" + PP3 = "PP3" + PP4 = "PP4" + PP5 = "PP5" + BA1 = "BA1" + BS1 = "BS1" + BS2 = "BS2" + BS3 = "BS3" + BS4 = "BS4" + BP1 = "BP1" + BP2 = "BP2" + BP3 = "BP3" + BP4 = "BP4" + BP5 = "BP5" + BP6 = "BP6" + BP7 = "BP7" + + @property + def is_pathogenic(self) -> bool: + """Return True if the criterion is pathogenic, False if benign.""" + return self.name.startswith("P") # PVS, PS, PM, PP are pathogenic criteria + + @property + def is_benign(self) -> bool: + """Return True if the criterion is benign, False if pathogenic.""" + return self.name.startswith("B") # BA, BS, BP are benign criteria diff --git a/src/mavedb/models/enums/functional_classification.py b/src/mavedb/models/enums/functional_classification.py new file mode 100644 index 00000000..2a472a65 --- /dev/null +++ b/src/mavedb/models/enums/functional_classification.py @@ -0,0 +1,7 @@ +import enum + + +class FunctionalClassification(enum.Enum): + normal = "normal" + abnormal = "abnormal" + not_specified = "not_specified" diff --git a/src/mavedb/models/enums/strength_of_evidence.py b/src/mavedb/models/enums/strength_of_evidence.py new file mode 100644 index 00000000..58c3c26d --- /dev/null +++ b/src/mavedb/models/enums/strength_of_evidence.py @@ -0,0 +1,11 @@ +import enum + + +class StrengthOfEvidenceProvided(enum.Enum): + """Enum for strength of evidence provided.""" + + VERY_STRONG = "VERY_STRONG" + STRONG = "STRONG" + MODERATE_PLUS = "MODERATE_PLUS" + MODERATE = "MODERATE" + SUPPORTING = "SUPPORTING" diff --git a/src/mavedb/models/score_calibration.py b/src/mavedb/models/score_calibration.py index ef32c107..25955eff 100644 --- a/src/mavedb/models/score_calibration.py +++ b/src/mavedb/models/score_calibration.py @@ -12,6 +12,7 @@ from mavedb.db.base import Base from mavedb.lib.urns import generate_calibration_urn +from mavedb.models.score_calibration_functional_classification import ScoreCalibrationFunctionalClassification from mavedb.models.score_calibration_publication_identifier import ScoreCalibrationPublicationIdentifierAssociation if TYPE_CHECKING: @@ -42,7 +43,12 @@ class ScoreCalibration(Base): # Ranges and sources are stored as JSONB (intersection structure) to avoid complex joins for now. # ranges: list[ { label, description?, classification, range:[lower,upper], inclusive_lower_bound, inclusive_upper_bound } ] - functional_ranges = Column(JSONB(none_as_null=True), nullable=True) + functional_ranges_deprecated_json = Column(JSONB(none_as_null=True), nullable=True) + functional_ranges: Mapped[list["ScoreCalibrationFunctionalClassification"]] = relationship( + "ScoreCalibrationFunctionalClassification", + back_populates="calibration", + cascade="all, delete-orphan", + ) publication_identifier_associations: Mapped[list[ScoreCalibrationPublicationIdentifierAssociation]] = relationship( "ScoreCalibrationPublicationIdentifierAssociation", diff --git a/src/mavedb/models/score_calibration_functional_classification.py b/src/mavedb/models/score_calibration_functional_classification.py new file mode 100644 index 00000000..2ebdf261 --- /dev/null +++ b/src/mavedb/models/score_calibration_functional_classification.py @@ -0,0 +1,79 @@ +"""SQLAlchemy model for variant score calibration functional classifications.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from sqlalchemy import Boolean, Column, Enum, Float, ForeignKey, Integer, String +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, relationship + +from mavedb.db.base import Base +from mavedb.lib.validation.utilities import inf_or_float +from mavedb.models.acmg_classification import ACMGClassification +from mavedb.models.enums.functional_classification import FunctionalClassification +from mavedb.models.score_calibration_functional_classification_variant_association import ( + score_calibration_functional_classification_variants_association_table, +) + +if TYPE_CHECKING: + from mavedb.models.score_calibration import ScoreCalibration + from mavedb.models.variant import Variant + + +class ScoreCalibrationFunctionalClassification(Base): + __tablename__ = "score_calibration_functional_classifications" + + id = Column(Integer, primary_key=True) + + calibration_id = Column(Integer, ForeignKey("score_calibrations.id"), nullable=False) + calibration: Mapped["ScoreCalibration"] = relationship("ScoreCalibration", foreign_keys=[calibration_id]) + + label = Column(String, nullable=False) + description = Column(String, nullable=True) + + classification = Column( + Enum(FunctionalClassification, native_enum=False, validate_strings=True, length=32), + nullable=False, + default=FunctionalClassification.not_specified, + ) + + range = Column(JSONB(none_as_null=True), nullable=True) # (lower_bound, upper_bound) + inclusive_lower_bound = Column(Boolean, nullable=True, default=True) + inclusive_upper_bound = Column(Boolean, nullable=True, default=False) + + oddspaths_ratio = Column(Float, nullable=True) + positive_likelihood_ratio = Column(Float, nullable=True) + + acmg_classification_id = Column(Integer, ForeignKey("acmg_classifications.id"), nullable=True) + acmg_classification: Mapped[ACMGClassification] = relationship( + "ACMGClassification", foreign_keys=[acmg_classification_id] + ) + + # Many-to-many relationship with variants + variants: Mapped[list["Variant"]] = relationship( + "Variant", + secondary=score_calibration_functional_classification_variants_association_table, + ) + + def score_is_contained_in_range(self, score: float) -> bool: + """Check if a given score falls within the defined range.""" + if self.range is None or not isinstance(self.range, list) or len(self.range) != 2: + return False + + lower_bound, upper_bound = inf_or_float(self.range[0], lower=True), inf_or_float(self.range[1], lower=False) + if self.inclusive_lower_bound: + if score < lower_bound: + return False + else: + if score <= lower_bound: + return False + + if self.inclusive_upper_bound: + if score > upper_bound: + return False + else: + if score >= upper_bound: + return False + + return True diff --git a/src/mavedb/models/score_calibration_functional_classification_variant_association.py b/src/mavedb/models/score_calibration_functional_classification_variant_association.py new file mode 100644 index 00000000..61f074bd --- /dev/null +++ b/src/mavedb/models/score_calibration_functional_classification_variant_association.py @@ -0,0 +1,14 @@ +"""SQLAlchemy association table for variants belonging to functional classifications.""" + +from sqlalchemy import Column, ForeignKey, Table + +from mavedb.db.base import Base + +score_calibration_functional_classification_variants_association_table = Table( + "score_calibration_functional_classification_variants", + Base.metadata, + Column( + "functional_classification_id", ForeignKey("score_calibration_functional_classifications.id"), primary_key=True + ), + Column("variant_id", ForeignKey("variants.id"), primary_key=True), +) diff --git a/src/mavedb/models/variant.py b/src/mavedb/models/variant.py index b038c1ea..59b6e729 100644 --- a/src/mavedb/models/variant.py +++ b/src/mavedb/models/variant.py @@ -34,3 +34,6 @@ class Variant(Base): mapped_variants: Mapped[List["MappedVariant"]] = relationship( back_populates="variant", cascade="all, delete-orphan" ) + + # Bidirectional relationship with ScoreCalibrationFunctionalClassification is left + # purposefully undefined for performance reasons. diff --git a/src/mavedb/view_models/acmg_classification.py b/src/mavedb/view_models/acmg_classification.py index 05757442..5bb8832f 100644 --- a/src/mavedb/view_models/acmg_classification.py +++ b/src/mavedb/view_models/acmg_classification.py @@ -4,16 +4,17 @@ classifications, and associated odds path ratios. """ +from datetime import date from typing import Optional + from pydantic import model_validator -from mavedb.lib.exceptions import ValidationError from mavedb.lib.acmg import ( - StrengthOfEvidenceProvided, ACMGCriterion, + StrengthOfEvidenceProvided, points_evidence_strength_equivalent, ) - +from mavedb.lib.exceptions import ValidationError from mavedb.view_models import record_type_validator, set_record_type from mavedb.view_models.base.base import BaseModel @@ -76,6 +77,15 @@ class SavedACMGClassification(ACMGClassificationBase): record_type: str = None # type: ignore _record_type_factory = record_type_validator()(set_record_type) + creation_date: date + modification_date: date + + class Config: + """Pydantic configuration (ORM mode).""" + + from_attributes = True + arbitrary_types_allowed = True + class ACMGClassification(SavedACMGClassification): """Complete ACMG classification model returned by the API.""" diff --git a/src/mavedb/view_models/score_calibration.py b/src/mavedb/view_models/score_calibration.py index 00d5d692..9164d5ef 100644 --- a/src/mavedb/view_models/score_calibration.py +++ b/src/mavedb/view_models/score_calibration.py @@ -5,7 +5,7 @@ """ from datetime import date -from typing import Any, Collection, Literal, Optional, Sequence, Union +from typing import TYPE_CHECKING, Any, Collection, Optional, Sequence, Union from pydantic import field_validator, model_validator @@ -16,6 +16,7 @@ transform_score_set_to_urn, ) from mavedb.lib.validation.utilities import inf_or_float +from mavedb.models.enums.functional_classification import FunctionalClassification from mavedb.view_models import record_type_validator, set_record_type from mavedb.view_models.acmg_classification import ( ACMGClassification, @@ -33,6 +34,12 @@ ) from mavedb.view_models.user import SavedUser, User +if TYPE_CHECKING: + from mavedb.view_models.variant import ( + SavedVariantEffectMeasurement, + VariantEffectMeasurement, + ) + ### Functional range models @@ -46,11 +53,11 @@ class FunctionalRangeBase(BaseModel): label: str description: Optional[str] = None - classification: Literal["normal", "abnormal", "not_specified"] = "not_specified" + classification: FunctionalClassification = FunctionalClassification.not_specified - range: tuple[Union[float, None], Union[float, None]] - inclusive_lower_bound: bool = True - inclusive_upper_bound: bool = False + range: Optional[tuple[Union[float, None], Union[float, None]]] = None # (lower_bound, upper_bound) + inclusive_lower_bound: Optional[bool] = None + inclusive_upper_bound: Optional[bool] = None acmg_classification: Optional[ACMGClassificationBase] = None @@ -59,9 +66,12 @@ class FunctionalRangeBase(BaseModel): @field_validator("range") def ranges_are_not_backwards( - cls, field_value: tuple[Union[float, None], Union[float, None]] - ) -> tuple[Union[float, None], Union[float, None]]: + cls, field_value: Optional[tuple[Union[float, None], Union[float, None]]] + ) -> Optional[tuple[Union[float, None], Union[float, None]]]: """Reject reversed or zero-width intervals.""" + if field_value is None: + return None + lower = inf_or_float(field_value[0], True) upper = inf_or_float(field_value[1], False) if lower > upper: @@ -78,12 +88,28 @@ def ratios_must_be_positive(cls, field_value: Optional[float]) -> Optional[float return field_value + @model_validator(mode="after") + def inclusive_bounds_require_range(self: "FunctionalRangeBase") -> "FunctionalRangeBase": + """Inclusive bounds may only be set if a range is provided. If they are unset, default them.""" + if self.range is None: + if self.inclusive_lower_bound: + raise ValidationError("An inclusive lower bound requires a defined range.") + if self.inclusive_upper_bound: + raise ValidationError("An inclusive upper bound requires a defined range.") + else: + if self.inclusive_lower_bound is None: + self.inclusive_lower_bound = True + if self.inclusive_upper_bound is None: + self.inclusive_upper_bound = False + + return self + @model_validator(mode="after") def inclusive_bounds_do_not_include_infinity(self: "FunctionalRangeBase") -> "FunctionalRangeBase": """Disallow inclusive bounds on unbounded (infinite) ends.""" - if self.inclusive_lower_bound and self.range[0] is None: + if self.inclusive_lower_bound and self.range is not None and self.range[0] is None: raise ValidationError("An inclusive lower bound may not include negative infinity.") - if self.inclusive_upper_bound and self.range[1] is None: + if self.inclusive_upper_bound and self.range is not None and self.range[1] is None: raise ValidationError("An inclusive upper bound may not include positive infinity.") return self @@ -95,9 +121,9 @@ def acmg_classification_evidence_agrees_with_classification(self: "FunctionalRan return self if ( - self.classification == "normal" + self.classification is FunctionalClassification.normal and self.acmg_classification.criterion.is_pathogenic - or self.classification == "abnormal" + or self.classification is FunctionalClassification.abnormal and self.acmg_classification.criterion.is_benign ): raise ValidationError( @@ -129,13 +155,16 @@ def oddspaths_ratio_agrees_with_acmg_classification(self: "FunctionalRangeBase") def is_contained_by_range(self, score: float) -> bool: """Determine if a given score falls within this functional range.""" + if not self.range: + return False + lower_bound, upper_bound = ( inf_or_float(self.range[0], lower=True), inf_or_float(self.range[1], lower=False), ) - lower_check = score > lower_bound or (self.inclusive_lower_bound and score == lower_bound) - upper_check = score < upper_bound or (self.inclusive_upper_bound and score == upper_bound) + lower_check = score > lower_bound or (self.inclusive_lower_bound is True and score == lower_bound) + upper_check = score < upper_bound or (self.inclusive_upper_bound is True and score == upper_bound) return lower_check and upper_check @@ -157,14 +186,22 @@ class SavedFunctionalRange(FunctionalRangeBase): record_type: str = None # type: ignore acmg_classification: Optional[SavedACMGClassification] = None + variants: Sequence["SavedVariantEffectMeasurement"] = [] _record_type_factory = record_type_validator()(set_record_type) + class Config: + """Pydantic configuration (ORM mode).""" + + from_attributes = True + arbitrary_types_allowed = True + class FunctionalRange(SavedFunctionalRange): """Complete functional range model returned by the API.""" acmg_classification: Optional[ACMGClassification] = None + variants: Sequence["VariantEffectMeasurement"] = [] ### Score calibration models @@ -197,7 +234,12 @@ def ranges_do_not_overlap( def test_overlap(range_test: FunctionalRangeBase, range_check: FunctionalRangeBase) -> bool: # Allow 'not_specified' classifications to overlap with anything. - if range_test.classification == "not_specified" or range_check.classification == "not_specified": + if ( + range_test.classification is FunctionalClassification.not_specified + or range_check.classification is FunctionalClassification.not_specified + or range_test.range is None + or range_check.range is None + ): return False if min(inf_or_float(range_test.range[0], True), inf_or_float(range_check.range[0], True)) == inf_or_float( @@ -207,14 +249,15 @@ def test_overlap(range_test: FunctionalRangeBase, range_check: FunctionalRangeBa else: first, second = range_check, range_test + # The range types below that mypy complains about are verified by the earlier checks for None. touching_and_inclusive = ( first.inclusive_upper_bound and second.inclusive_lower_bound - and inf_or_float(first.range[1], False) == inf_or_float(second.range[0], True) + and inf_or_float(first.range[1], False) == inf_or_float(second.range[0], True) # type: ignore ) if touching_and_inclusive: return True - if inf_or_float(first.range[1], False) > inf_or_float(second.range[0], True): + if inf_or_float(first.range[1], False) > inf_or_float(second.range[0], True): # type: ignore return True return False @@ -263,7 +306,10 @@ def validate_baseline_score(self: "ScoreCalibrationBase") -> "ScoreCalibrationBa return self for fr in self.functional_ranges: - if fr.is_contained_by_range(self.baseline_score) and fr.classification != "normal": + if ( + fr.is_contained_by_range(self.baseline_score) + and fr.classification is not FunctionalClassification.normal + ): raise ValidationError( f"The provided baseline score of {self.baseline_score} falls within a non-normal range ({fr.label}). Baseline scores may not fall within non-normal ranges.", custom_loc=["body", "baselineScore"], diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index 1a219f17..d38955b4 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -1355,44 +1355,52 @@ TEST_ACMG_BS3_STRONG_CLASSIFICATION = { "criterion": "BS3", - "evidence_strength": "strong", + "evidence_strength": "STRONG", } TEST_SAVED_ACMG_BS3_STRONG_CLASSIFICATION = { "recordType": "ACMGClassification", + "creationDate": date.today().isoformat(), + "modificationDate": date.today().isoformat(), **{camelize(k): v for k, v in TEST_ACMG_BS3_STRONG_CLASSIFICATION.items()}, } TEST_ACMG_PS3_STRONG_CLASSIFICATION = { "criterion": "PS3", - "evidence_strength": "strong", + "evidence_strength": "STRONG", } TEST_SAVED_ACMG_PS3_STRONG_CLASSIFICATION = { "recordType": "ACMGClassification", + "creationDate": date.today().isoformat(), + "modificationDate": date.today().isoformat(), **{camelize(k): v for k, v in TEST_ACMG_PS3_STRONG_CLASSIFICATION.items()}, } TEST_ACMG_BS3_STRONG_CLASSIFICATION_WITH_POINTS = { "criterion": "BS3", - "evidence_strength": "strong", + "evidence_strength": "STRONG", "points": -4, } TEST_SAVED_ACMG_BS3_STRONG_CLASSIFICATION_WITH_POINTS = { "recordType": "ACMGClassification", + "creationDate": date.today().isoformat(), + "modificationDate": date.today().isoformat(), **{camelize(k): v for k, v in TEST_ACMG_BS3_STRONG_CLASSIFICATION_WITH_POINTS.items()}, } TEST_ACMG_PS3_STRONG_CLASSIFICATION_WITH_POINTS = { "criterion": "PS3", - "evidence_strength": "strong", + "evidence_strength": "STRONG", "points": 4, } TEST_SAVED_ACMG_PS3_STRONG_CLASSIFICATION_WITH_POINTS = { "recordType": "ACMGClassification", + "creationDate": date.today().isoformat(), + "modificationDate": date.today().isoformat(), **{camelize(k): v for k, v in TEST_ACMG_PS3_STRONG_CLASSIFICATION_WITH_POINTS.items()}, } @@ -1416,6 +1424,7 @@ "recordType": "FunctionalRange", **{camelize(k): v for k, v in TEST_FUNCTIONAL_RANGE_NORMAL.items() if k not in ("acmg_classification",)}, "acmgClassification": TEST_SAVED_ACMG_BS3_STRONG_CLASSIFICATION, + "variants": [], } @@ -1435,6 +1444,7 @@ "recordType": "FunctionalRange", **{camelize(k): v for k, v in TEST_FUNCTIONAL_RANGE_ABNORMAL.items() if k not in ("acmg_classification",)}, "acmgClassification": TEST_SAVED_ACMG_PS3_STRONG_CLASSIFICATION, + "variants": [], } @@ -1450,6 +1460,7 @@ TEST_SAVED_FUNCTIONAL_RANGE_NOT_SPECIFIED = { "recordType": "FunctionalRange", **{camelize(k): v for k, v in TEST_FUNCTIONAL_RANGE_NOT_SPECIFIED.items()}, + "variants": [], } diff --git a/tests/lib/annotation/test_classification.py b/tests/lib/annotation/test_classification.py index 83f2388d..39865241 100644 --- a/tests/lib/annotation/test_classification.py +++ b/tests/lib/annotation/test_classification.py @@ -218,7 +218,7 @@ def test_pathogenicity_classification_of_variant_with_invalid_evidence_strength_ ) assert primary_cal is not None for r in primary_cal.functional_ranges: - r["acmgClassification"]["evidenceStrength"] = "moderate_plus" + r["acmgClassification"]["evidenceStrength"] = "MODERATE_PLUS" r["oddspathsRatio"] = None with pytest.raises(ValueError) as exc: diff --git a/tests/lib/conftest.py b/tests/lib/conftest.py index 5cffa374..efdb254b 100644 --- a/tests/lib/conftest.py +++ b/tests/lib/conftest.py @@ -1,45 +1,51 @@ -from humps import decamelize from copy import deepcopy from datetime import datetime from pathlib import Path -import pytest from shutil import copytree from unittest import mock +import pytest +from humps import decamelize + +from mavedb.models.acmg_classification import ACMGClassification from mavedb.models.enums.user_role import UserRole -from mavedb.models.score_calibration import ScoreCalibration -from mavedb.models.experiment_set import ExperimentSet from mavedb.models.experiment import Experiment +from mavedb.models.experiment_set import ExperimentSet from mavedb.models.license import License +from mavedb.models.mapped_variant import MappedVariant from mavedb.models.publication_identifier import PublicationIdentifier -from mavedb.models.score_set_publication_identifier import ScoreSetPublicationIdentifierAssociation from mavedb.models.role import Role -from mavedb.models.taxonomy import Taxonomy +from mavedb.models.score_calibration import ScoreCalibration from mavedb.models.score_set import ScoreSet +from mavedb.models.score_set_publication_identifier import ScoreSetPublicationIdentifierAssociation +from mavedb.models.taxonomy import Taxonomy from mavedb.models.user import User from mavedb.models.variant import Variant -from mavedb.models.mapped_variant import MappedVariant from tests.helpers.constants import ( ADMIN_USER, EXTRA_USER, + TEST_ACMG_BS3_STRONG_CLASSIFICATION, + TEST_ACMG_BS3_STRONG_CLASSIFICATION_WITH_POINTS, + TEST_ACMG_PS3_STRONG_CLASSIFICATION, + TEST_ACMG_PS3_STRONG_CLASSIFICATION_WITH_POINTS, TEST_EXPERIMENT, TEST_EXPERIMENT_SET, - TEST_LICENSE, TEST_INACTIVE_LICENSE, + TEST_LICENSE, TEST_MAVEDB_ATHENA_ROW, TEST_MINIMAL_MAPPED_VARIANT, TEST_MINIMAL_VARIANT, + TEST_PUBMED_IDENTIFIER, + TEST_SAVED_BRNICH_SCORE_CALIBRATION, + TEST_SAVED_PATHOGENICITY_SCORE_CALIBRATION, TEST_SAVED_TAXONOMY, TEST_SEQ_SCORESET, TEST_USER, - TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, - VALID_SCORE_SET_URN, - VALID_EXPERIMENT_URN, + TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, VALID_EXPERIMENT_SET_URN, - TEST_SAVED_BRNICH_SCORE_CALIBRATION, - TEST_SAVED_PATHOGENICITY_SCORE_CALIBRATION, - TEST_PUBMED_IDENTIFIER, + VALID_EXPERIMENT_URN, + VALID_SCORE_SET_URN, ) @@ -56,6 +62,10 @@ def setup_lib_db(session): db.add(Taxonomy(**TEST_SAVED_TAXONOMY)) db.add(License(**TEST_LICENSE)) db.add(License(**TEST_INACTIVE_LICENSE)) + db.add(ACMGClassification(**TEST_ACMG_PS3_STRONG_CLASSIFICATION)) + db.add(ACMGClassification(**TEST_ACMG_BS3_STRONG_CLASSIFICATION)) + db.add(ACMGClassification(**TEST_ACMG_BS3_STRONG_CLASSIFICATION_WITH_POINTS)) + db.add(ACMGClassification(**TEST_ACMG_PS3_STRONG_CLASSIFICATION_WITH_POINTS)) db.commit() diff --git a/tests/lib/test_acmg.py b/tests/lib/test_acmg.py index db458439..faef40f0 100644 --- a/tests/lib/test_acmg.py +++ b/tests/lib/test_acmg.py @@ -1,10 +1,17 @@ import pytest +from sqlalchemy import select from mavedb.lib.acmg import ( ACMGCriterion, StrengthOfEvidenceProvided, + find_or_create_acmg_classification, points_evidence_strength_equivalent, ) +from mavedb.models.acmg_classification import ACMGClassification + +############################################################################### +# Tests for points_evidence_strength_equivalent +############################################################################### @pytest.mark.parametrize( @@ -79,3 +86,154 @@ def test_all_strength_categories_covered(): assert StrengthOfEvidenceProvided.MODERATE_PLUS in seen assert StrengthOfEvidenceProvided.MODERATE in seen assert StrengthOfEvidenceProvided.SUPPORTING in seen + + +############################################################################### +# Tests for find_or_create_acmg_classification +############################################################################### + + +@pytest.mark.parametrize( + "criterion,evidence_strength,points", + [ + # Valid combinations + (ACMGCriterion.PS3, StrengthOfEvidenceProvided.STRONG, 4), + (ACMGCriterion.BS3, StrengthOfEvidenceProvided.MODERATE, -2), + (None, None, None), # Should return None + (None, None, 5), # Should derive from points + ], +) +def test_find_or_create_acmg_classification_validation_does_not_raise_on_valid_combinations( + session, criterion, evidence_strength, points +): + """Test input validation for find_or_create_acmg_classification valid values.""" + result = find_or_create_acmg_classification(session, criterion, evidence_strength, points) + + if criterion is None and evidence_strength is None and points is None: + assert result is None + else: + assert result is not None + + +@pytest.mark.parametrize( + "criterion,evidence_strength,points", + [ + # Invalid combinations - only one is None + (ACMGCriterion.PS3, None, 4), + (None, StrengthOfEvidenceProvided.STRONG, 4), + ], +) +def test_find_or_create_acmg_classification_validation_raises_on_invalid_combinations( + session, criterion, evidence_strength, points +): + """Test input validation for find_or_create_acmg_classification invalid values.""" + with pytest.raises( + ValueError, + match="Both criterion and evidence_strength must be provided together or both be None, with points.", + ): + find_or_create_acmg_classification(session, criterion, evidence_strength, points) + + +def test_find_or_create_acmg_classification_returns_none_for_all_none(session): + """Test that function returns None when all parameters are None.""" + + result = find_or_create_acmg_classification(session, None, None, None) + assert result is None + + +def test_find_or_create_acmg_classification_derives_from_points(session): + """Test that function derives criterion and evidence_strength from points when they are None.""" + + result = find_or_create_acmg_classification(session, None, None, 4) + + assert result is not None + assert result.criterion == ACMGCriterion.PS3 + assert result.evidence_strength == StrengthOfEvidenceProvided.STRONG + assert result.points == 4 + + +def test_find_or_create_acmg_classification_creates_new_entry(session): + """Test that function creates a new ACMGClassification when one doesn't exist.""" + + # Verify no existing entry + existing = session.execute( + select(ACMGClassification) + .where(ACMGClassification.criterion == ACMGCriterion.PS3) + .where(ACMGClassification.evidence_strength == StrengthOfEvidenceProvided.MODERATE) + .where(ACMGClassification.points == 2) + ).scalar_one_or_none() + assert existing is None + + result = find_or_create_acmg_classification(session, ACMGCriterion.PS3, StrengthOfEvidenceProvided.MODERATE, 2) + + assert result is not None + assert result.criterion == ACMGCriterion.PS3 + assert result.evidence_strength == StrengthOfEvidenceProvided.MODERATE + assert result.points == 2 + + # Verify it was added to the session + session_objects = [obj for obj in session.new if isinstance(obj, ACMGClassification)] + assert len(session_objects) == 1 + assert session_objects[0] == result + + +def test_find_or_create_acmg_classification_finds_existing_entry(session): + """Test that function finds and returns existing ACMGClassification.""" + + # Create an existing entry + existing_classification = ACMGClassification( + criterion=ACMGCriterion.BS3, evidence_strength=StrengthOfEvidenceProvided.STRONG, points=-5 + ) + session.add(existing_classification) + session.commit() + + result = find_or_create_acmg_classification(session, ACMGCriterion.BS3, StrengthOfEvidenceProvided.STRONG, -5) + + assert result is not None + assert result == existing_classification + assert result.criterion == ACMGCriterion.BS3 + assert result.evidence_strength == StrengthOfEvidenceProvided.STRONG + assert result.points == -5 + + # Verify no new objects were added to the session + assert len(session.new) == 0 + + +def test_find_or_create_acmg_classification_with_zero_points(session): + """Test function behavior with zero points.""" + + result = find_or_create_acmg_classification(session, None, None, 0) + assert result is None + + +@pytest.mark.parametrize("points", [-8, -4, -1, 1, 3, 8]) +def test_find_or_create_acmg_classification_points_integration(session, points): + """Test that function works correctly with various point values.""" + + result = find_or_create_acmg_classification(session, None, None, points) + + expected_criterion, expected_strength = points_evidence_strength_equivalent(points) + + assert result is not None + assert result.criterion == expected_criterion + assert result.evidence_strength == expected_strength + assert result.points == points + + +def test_find_or_create_acmg_classification_does_not_commit(session): + """Test that function does not commit the session.""" + + find_or_create_acmg_classification(session, ACMGCriterion.PS3, StrengthOfEvidenceProvided.SUPPORTING, 1) + + # Rollback the session + session.rollback() + + # Verify the object is no longer in the database + existing = session.execute( + select(ACMGClassification) + .where(ACMGClassification.criterion == ACMGCriterion.PS3) + .where(ACMGClassification.evidence_strength == StrengthOfEvidenceProvided.SUPPORTING) + .where(ACMGClassification.points == 1) + ).scalar_one_or_none() + + assert existing is None diff --git a/tests/lib/test_score_calibrations.py b/tests/lib/test_score_calibrations.py index 9ca1b010..286072a3 100644 --- a/tests/lib/test_score_calibrations.py +++ b/tests/lib/test_score_calibrations.py @@ -11,6 +11,7 @@ from sqlalchemy.exc import NoResultFound from mavedb.lib.score_calibrations import ( + create_functional_classification, create_score_calibration, create_score_calibration_in_score_set, delete_score_calibration, @@ -24,8 +25,8 @@ from mavedb.models.score_set import ScoreSet from mavedb.models.user import User from mavedb.view_models.score_calibration import ScoreCalibrationCreate, ScoreCalibrationModify - from tests.helpers.constants import ( + EXTRA_USER, TEST_BIORXIV_IDENTIFIER, TEST_BRNICH_SCORE_CALIBRATION, TEST_CROSSREF_IDENTIFIER, @@ -34,11 +35,172 @@ TEST_PUBMED_IDENTIFIER, TEST_SEQ_SCORESET, VALID_SCORE_SET_URN, - EXTRA_USER, ) from tests.helpers.util.contributor import add_contributor from tests.helpers.util.score_calibration import create_test_score_calibration_in_score_set +################################################################################ +# Tests for create_functional_classification +################################################################################ + + +def test_create_functional_classification_without_acmg_classification(setup_lib_db, session): + # Create a mock calibration + calibration = ScoreCalibration() + + # Create mock functional range without ACMG classification + MockFunctionalRangeCreate = create_model( + "MockFunctionalRangeCreate", + label=(str, "Test Label"), + description=(str, "Test Description"), + range=(list, [0.0, 1.0]), + inclusive_lower_bound=(bool, True), + inclusive_upper_bound=(bool, False), + classification=(str, "pathogenic"), + oddspaths_ratio=(float, 1.5), + positive_likelihood_ratio=(float, 2.0), + acmg_classification=(type(None), None), + ) + + result = create_functional_classification(session, MockFunctionalRangeCreate(), calibration) + + assert result.description == "Test Description" + assert result.range == [0.0, 1.0] + assert result.inclusive_lower_bound is True + assert result.inclusive_upper_bound is False + assert result.classification == "pathogenic" + assert result.oddspaths_ratio == 1.5 + assert result.positive_likelihood_ratio == 2.0 + assert result.acmg_classification is None + assert result.acmg_classification_id is None + assert result.calibration == calibration + + +def test_create_functional_classification_with_acmg_classification(setup_lib_db, session): + # Create a mock calibration + calibration = ScoreCalibration() + + # Create mock ACMG classification + mock_criterion = "PS1" + mock_evidence_strength = "STRONG" + mock_points = 4 + MockAcmgClassification = create_model( + "MockAcmgClassification", + criterion=(str, mock_criterion), + evidence_strength=(str, mock_evidence_strength), + points=(int, mock_points), + ) + + # Create mock functional range with ACMG classification + MockFunctionalRangeCreate = create_model( + "MockFunctionalRangeCreate", + label=(str, "Test Label"), + description=(str, "Test Description"), + range=(list, [0.0, 1.0]), + inclusive_lower_bound=(bool, True), + inclusive_upper_bound=(bool, False), + classification=(str, "pathogenic"), + oddspaths_ratio=(float, 1.5), + positive_likelihood_ratio=(float, 2.0), + acmg_classification=(MockAcmgClassification, MockAcmgClassification()), + ) + + functional_range_create = MockFunctionalRangeCreate() + + with mock.patch("mavedb.lib.score_calibrations.find_or_create_acmg_classification") as mock_find_or_create: + # Mock the ACMG classification with an ID + MockPersistedAcmgClassification = create_model( + "MockPersistedAcmgClassification", + id=(int, 123), + ) + + mocked_persisted_acmg_classification = MockPersistedAcmgClassification() + mock_find_or_create.return_value = mocked_persisted_acmg_classification + result = create_functional_classification(session, functional_range_create, calibration) + + # Verify find_or_create_acmg_classification was called with correct parameters + mock_find_or_create.assert_called_once_with( + session, + criterion=mock_criterion, + evidence_strength=mock_evidence_strength, + points=mock_points, + ) + + # Verify the result + assert result.label == "Test Label" + assert result.description == "Test Description" + assert result.range == [0.0, 1.0] + assert result.inclusive_lower_bound is True + assert result.inclusive_upper_bound is False + assert result.classification == "pathogenic" + assert result.oddspaths_ratio == 1.5 + assert result.positive_likelihood_ratio == 2.0 + assert result.acmg_classification == mocked_persisted_acmg_classification + assert result.acmg_classification_id == 123 + assert result.calibration == calibration + + +def test_create_functional_classification_propagates_acmg_errors(setup_lib_db, session): + # Create a mock calibration + calibration = ScoreCalibration() + + # Create mock ACMG classification + MockAcmgClassification = create_model( + "MockAcmgClassification", + criterion=(str, "PS1"), + evidence_strength=(str, "strong"), + points=(int, 4), + ) + + # Create mock functional range with ACMG classification + MockFunctionalRangeCreate = create_model( + "MockFunctionalRangeCreate", + label=(str, "Test Label"), + description=(str, "Test Description"), + range=(list, [0.0, 1.0]), + inclusive_lower_bound=(bool, True), + inclusive_upper_bound=(bool, False), + classification=(str, "pathogenic"), + oddspaths_ratio=(float, 1.5), + positive_likelihood_ratio=(float, 2.0), + acmg_classification=(MockAcmgClassification, MockAcmgClassification()), + ) + + functional_range_create = MockFunctionalRangeCreate() + + with ( + pytest.raises(ValueError, match="ACMG error"), + mock.patch( + "mavedb.lib.score_calibrations.find_or_create_acmg_classification", + side_effect=ValueError("ACMG error"), + ), + ): + create_functional_classification(session, functional_range_create, calibration) + + +def test_create_functional_classification_does_not_commit_transaction(setup_lib_db, session): + # Create a mock calibration + calibration = ScoreCalibration() + + # Create mock functional range without ACMG classification + MockFunctionalRangeCreate = create_model( + "MockFunctionalRangeCreate", + label=(str, "Test Label"), + description=(str, "Test Description"), + range=(list, [0.0, 1.0]), + inclusive_lower_bound=(bool, True), + inclusive_upper_bound=(bool, False), + classification=(str, "pathogenic"), + oddspaths_ratio=(float, 1.5), + positive_likelihood_ratio=(float, 2.0), + acmg_classification=(type(None), None), + ) + + with mock.patch.object(session, "commit") as mock_commit: + create_functional_classification(session, MockFunctionalRangeCreate(), calibration) + mock_commit.assert_not_called() + + ################################################################################ # Tests for create_score_calibration ################################################################################ @@ -83,6 +245,7 @@ async def test_create_score_calibration_in_score_set_creates_score_calibration_w threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), + functional_ranges=(list, []), ) calibration = await create_score_calibration_in_score_set(session, MockCalibrationCreate(), test_user) @@ -102,6 +265,7 @@ async def test_create_score_calibration_in_score_set_investigator_provided_set_w threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), + functional_ranges=(list, []), ) calibration = await create_score_calibration_in_score_set(session, MockCalibrationCreate(), test_user) @@ -133,6 +297,7 @@ async def test_create_score_calibration_in_score_set_investigator_provided_set_w threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), + functional_ranges=(list, []), ) calibration = await create_score_calibration_in_score_set(session, MockCalibrationCreate(), extra_user) @@ -153,6 +318,7 @@ async def test_create_score_calibration_in_score_set_investigator_provided_not_s threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), + functional_ranges=(list, []), ) # invoke from a different user context @@ -191,6 +357,7 @@ async def test_create_score_calibration_creates_score_calibration_when_score_set threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), + functional_ranges=(list, []), ) calibration = await create_score_calibration(session, MockCalibrationCreate(), test_user) @@ -225,6 +392,7 @@ async def test_create_score_calibration_propagates_errors_from_publication_find_ ), classification_sources=(list, []), method_sources=(list, []), + functional_ranges=(list, []), ) with ( pytest.raises( @@ -277,6 +445,7 @@ async def test_create_score_calibration_publication_identifier_associations_crea threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), + functional_ranges=(list, []), ) test_user = session.execute(select(User)).scalars().first() @@ -312,6 +481,7 @@ async def test_create_score_calibration_user_is_set_as_creator_and_modifier( threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), + functional_ranges=(list, []), ) test_user = session.execute(select(User)).scalars().first() @@ -411,6 +581,7 @@ async def test_modify_score_calibration_modifies_score_calibration_when_score_se threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), + functional_ranges=(list, []), ) modified_calibration = await modify_score_calibration( @@ -447,6 +618,7 @@ async def test_modify_score_calibration_clears_existing_publication_identifier_a threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), + functional_ranges=(list, []), ) mocked_calibration = MockCalibrationModify() @@ -493,6 +665,7 @@ async def test_modify_score_calibration_publication_identifier_associations_crea threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), + functional_ranges=(list, []), ) mocked_calibration = MockCalibrationModify() @@ -566,6 +739,7 @@ async def test_modify_score_calibration_retains_existing_publication_relationshi for pub_dict in TEST_BRNICH_SCORE_CALIBRATION["method_sources"] ], ), + functional_ranges=(list, []), ) modified_calibration = await modify_score_calibration( @@ -611,6 +785,7 @@ async def test_modify_score_calibration_adds_new_publication_association( ), classification_sources=(list, []), method_sources=(list, []), + functional_ranges=(list, []), ) modified_calibration = await modify_score_calibration( @@ -651,6 +826,7 @@ async def test_modify_score_calibration_user_is_set_as_modifier( threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), + functional_ranges=(list, []), ) modify_user = session.execute(select(User).where(User.id != test_user.id)).scalars().first() @@ -700,6 +876,7 @@ async def test_modify_score_calibration_new_score_set(setup_lib_db_with_score_se threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), + functional_ranges=(list, []), ) modified_calibration = await modify_score_calibration( @@ -709,6 +886,42 @@ async def test_modify_score_calibration_new_score_set(setup_lib_db_with_score_se assert modified_calibration.score_set == new_containing_score_set +@pytest.mark.asyncio +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ], + ], + indirect=["mock_publication_fetch"], +) +async def test_modify_score_calibration_clears_functional_ranges( + setup_lib_db_with_score_set, session, mock_publication_fetch +): + test_user = session.execute(select(User)).scalars().first() + + existing_calibration = await create_test_score_calibration_in_score_set( + session, setup_lib_db_with_score_set.urn, test_user + ) + + MockCalibrationModify = create_model( + "MockCalibrationModify", + score_set_urn=(str | None, setup_lib_db_with_score_set.urn), + threshold_sources=(list, []), + classification_sources=(list, []), + method_sources=(list, []), + functional_ranges=(list, []), + ) + + modified_calibration = await modify_score_calibration( + session, existing_calibration, MockCalibrationModify(), test_user + ) + assert modified_calibration is not None + assert len(modified_calibration.functional_ranges) == 0 + + @pytest.mark.asyncio @pytest.mark.parametrize( "mock_publication_fetch", diff --git a/tests/view_models/test_acmg_classification.py b/tests/view_models/test_acmg_classification.py index f7b68149..4640196c 100644 --- a/tests/view_models/test_acmg_classification.py +++ b/tests/view_models/test_acmg_classification.py @@ -1,21 +1,22 @@ -import pytest from copy import deepcopy -from mavedb.lib.exceptions import ValidationError -from mavedb.view_models.acmg_classification import ACMGClassificationCreate, ACMGClassification +import pytest +from mavedb.lib.exceptions import ValidationError +from mavedb.models.enums.acmg_criterion import ACMGCriterion +from mavedb.models.enums.strength_of_evidence import StrengthOfEvidenceProvided +from mavedb.view_models.acmg_classification import ACMGClassification, ACMGClassificationCreate from tests.helpers.constants import ( TEST_ACMG_BS3_STRONG_CLASSIFICATION, - TEST_ACMG_PS3_STRONG_CLASSIFICATION, TEST_ACMG_BS3_STRONG_CLASSIFICATION_WITH_POINTS, + TEST_ACMG_PS3_STRONG_CLASSIFICATION, TEST_ACMG_PS3_STRONG_CLASSIFICATION_WITH_POINTS, TEST_SAVED_ACMG_BS3_STRONG_CLASSIFICATION, - TEST_SAVED_ACMG_PS3_STRONG_CLASSIFICATION, TEST_SAVED_ACMG_BS3_STRONG_CLASSIFICATION_WITH_POINTS, + TEST_SAVED_ACMG_PS3_STRONG_CLASSIFICATION, TEST_SAVED_ACMG_PS3_STRONG_CLASSIFICATION_WITH_POINTS, ) - ### ACMG Classification Creation Tests ### @@ -33,8 +34,8 @@ def test_can_create_acmg_classification(valid_acmg_classification): acmg = ACMGClassificationCreate(**valid_acmg_classification) assert isinstance(acmg, ACMGClassificationCreate) - assert acmg.criterion == valid_acmg_classification.get("criterion") - assert acmg.evidence_strength == valid_acmg_classification.get("evidence_strength") + assert acmg.criterion.value == valid_acmg_classification.get("criterion") + assert acmg.evidence_strength.value == valid_acmg_classification.get("evidence_strength") assert acmg.points == valid_acmg_classification.get("points") @@ -78,8 +79,8 @@ def test_can_create_acmg_classification_from_points(): acmg = ACMGClassificationCreate(points=-4) # BS3 Strong assert isinstance(acmg, ACMGClassificationCreate) - assert acmg.criterion == "BS3" - assert acmg.evidence_strength == "strong" + assert acmg.criterion == ACMGCriterion.BS3 + assert acmg.evidence_strength == StrengthOfEvidenceProvided.STRONG assert acmg.points == -4 @@ -100,6 +101,6 @@ def test_can_create_acmg_classification_from_saved_data(valid_saved_classificati acmg = ACMGClassification(**valid_saved_classification) assert isinstance(acmg, ACMGClassification) - assert acmg.criterion == valid_saved_classification.get("criterion") - assert acmg.evidence_strength == valid_saved_classification.get("evidenceStrength") + assert acmg.criterion.value == valid_saved_classification.get("criterion") + assert acmg.evidence_strength.value == valid_saved_classification.get("evidenceStrength") assert acmg.points == valid_saved_classification.get("points") diff --git a/tests/view_models/test_score_calibration.py b/tests/view_models/test_score_calibration.py index bf89aec4..11985f26 100644 --- a/tests/view_models/test_score_calibration.py +++ b/tests/view_models/test_score_calibration.py @@ -47,7 +47,7 @@ def test_can_create_valid_functional_range(functional_range): assert fr.label == functional_range["label"] assert fr.description == functional_range.get("description") - assert fr.classification == functional_range["classification"] + assert fr.classification.value == functional_range["classification"] assert fr.range == tuple(functional_range["range"]) assert fr.inclusive_lower_bound == functional_range.get("inclusive_lower_bound", True) assert fr.inclusive_upper_bound == functional_range.get("inclusive_upper_bound", False) @@ -156,6 +156,50 @@ def test_is_contained_by_range(): assert not fr.is_contained_by_range(0.0), "0.0 (exclusive lower bound) should not be contained in the range" +def test_inclusive_bounds_get_default_when_unset_and_range_exists(): + fr = FunctionalRangeCreate.model_validate( + { + "label": "test range", + "classification": "abnormal", + "range": (0.0, 1.0), + } + ) + + assert fr.inclusive_lower_bound is True, "inclusive_lower_bound should default to True" + assert fr.inclusive_upper_bound is False, "inclusive_upper_bound should default to False" + + +def test_inclusive_bounds_remain_none_when_range_is_none(): + fr = FunctionalRangeCreate.model_validate( + { + "label": "test range", + "classification": "abnormal", + "range": None, + } + ) + + assert fr.inclusive_lower_bound is None, "inclusive_lower_bound should remain None" + assert fr.inclusive_upper_bound is None, "inclusive_upper_bound should remain None" + + +@pytest.mark.parametrize( + "bound_property, bound_value, match_text", + [ + ("inclusive_lower_bound", True, "An inclusive lower bound requires a defined range."), + ("inclusive_upper_bound", True, "An inclusive upper bound requires a defined range."), + ], +) +def test_cant_set_inclusive_bounds_when_range_is_none(bound_property, bound_value, match_text): + invalid_data = { + "label": "test range", + "classification": "abnormal", + "range": None, + bound_property: bound_value, + } + with pytest.raises(ValidationError, match=match_text): + FunctionalRangeCreate.model_validate(invalid_data) + + ############################################################################## # Tests for ScoreCalibration view models ############################################################################## From 4b99dc4dea1ca59b56001610fe7c9dc8cb61317f Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 17 Nov 2025 22:20:51 -0800 Subject: [PATCH 05/24] feat: remove deprecated functional_ranges_deprecated_json column from ScoreCalibration model --- ...770fa9e6e58_drop_functional_range_jsonb.py | 38 +++++++++++++++++++ src/mavedb/models/score_calibration.py | 3 -- 2 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 alembic/versions/c770fa9e6e58_drop_functional_range_jsonb.py diff --git a/alembic/versions/c770fa9e6e58_drop_functional_range_jsonb.py b/alembic/versions/c770fa9e6e58_drop_functional_range_jsonb.py new file mode 100644 index 00000000..3b1e7998 --- /dev/null +++ b/alembic/versions/c770fa9e6e58_drop_functional_range_jsonb.py @@ -0,0 +1,38 @@ +"""drop functional range jsonb + +Revision ID: c770fa9e6e58 +Revises: 16beeb593513 +Create Date: 2025-11-17 22:19:22.440742 + +""" + +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "c770fa9e6e58" +down_revision = "16beeb593513" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("score_calibrations", "functional_ranges_deprecated_json") + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + "score_calibrations", + sa.Column( + "functional_ranges_deprecated_json", + postgresql.JSONB(astext_type=sa.Text()), + autoincrement=False, + nullable=True, + ), + ) + # ### end Alembic commands ### diff --git a/src/mavedb/models/score_calibration.py b/src/mavedb/models/score_calibration.py index 25955eff..a0a8967c 100644 --- a/src/mavedb/models/score_calibration.py +++ b/src/mavedb/models/score_calibration.py @@ -41,9 +41,6 @@ class ScoreCalibration(Base): baseline_score = Column(Float, nullable=True) baseline_score_description = Column(String, nullable=True) - # Ranges and sources are stored as JSONB (intersection structure) to avoid complex joins for now. - # ranges: list[ { label, description?, classification, range:[lower,upper], inclusive_lower_bound, inclusive_upper_bound } ] - functional_ranges_deprecated_json = Column(JSONB(none_as_null=True), nullable=True) functional_ranges: Mapped[list["ScoreCalibrationFunctionalClassification"]] = relationship( "ScoreCalibrationFunctionalClassification", back_populates="calibration", From a47ec9b0010a3cb104b9dc718c8d2751507a00bd Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 18 Nov 2025 12:53:06 -0800 Subject: [PATCH 06/24] feat: add variants_for_functional_classification function to filter variants by score range --- src/mavedb/lib/score_calibrations.py | 106 +++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/src/mavedb/lib/score_calibrations.py b/src/mavedb/lib/score_calibrations.py index cdf09f75..e834d251 100644 --- a/src/mavedb/lib/score_calibrations.py +++ b/src/mavedb/lib/score_calibrations.py @@ -1,17 +1,21 @@ """Utilities for building and mutating score calibration ORM objects.""" +import math from typing import Union +from sqlalchemy import Float, and_, select from sqlalchemy.orm import Session from mavedb.lib.acmg import find_or_create_acmg_classification from mavedb.lib.identifiers import find_or_create_publication_identifier +from mavedb.lib.validation.utilities import inf_or_float from mavedb.models.enums.score_calibration_relation import ScoreCalibrationRelation from mavedb.models.score_calibration import ScoreCalibration from mavedb.models.score_calibration_functional_classification import ScoreCalibrationFunctionalClassification from mavedb.models.score_calibration_publication_identifier import ScoreCalibrationPublicationIdentifierAssociation from mavedb.models.score_set import ScoreSet from mavedb.models.user import User +from mavedb.models.variant import Variant from mavedb.view_models import score_calibration @@ -67,6 +71,9 @@ def create_functional_classification( calibration=containing_calibration, ) + contained_variants = variants_for_functional_classification(db, functional_classification, use_sql=True) + functional_classification.variants = contained_variants + return functional_classification @@ -598,3 +605,102 @@ def delete_score_calibration(db: Session, calibration: ScoreCalibration) -> None db.delete(calibration) return None + + +def variants_for_functional_classification( + db: Session, + functional_classification: ScoreCalibrationFunctionalClassification, + use_sql: bool = False, +) -> list[Variant]: + """Return variants in the parent score set whose numeric score falls inside the + functional classification's range. + + The variant score is extracted from the JSONB ``Variant.data`` field using + ``score_json_path`` (default: ("score_data", "score") meaning + ``variant.data['score_data']['score']``). The classification's existing + ``score_is_contained_in_range`` method is used for interval logic, including + inclusive/exclusive behaviors. + + Parameters + ---------- + db : Session + Active SQLAlchemy session. + functional_classification : ScoreCalibrationFunctionalClassification + The ORM row defining the interval to test against. + use_sql : bool + When True, perform filtering in the database using JSONB extraction and + range predicates; falls back to Python filtering if an error occurs. + + Returns + ------- + list[Variant] + Variants whose score falls within the specified range. Empty list if + classification has no usable range. + + Notes + ----- + * If use_sql=False (default) filtering occurs in Python after loading all + variants for the score set. For large sets set use_sql=True to push + comparison into Postgres. + * Variants lacking a score or with non-numeric scores are skipped. + * If ``functional_classification.range`` is ``None`` an empty list is + returned immediately. + """ + if not functional_classification.range: + return [] + + # Resolve score set id from attached calibration (relationship may be lazy) + score_set_id = functional_classification.calibration.score_set_id # type: ignore[attr-defined] + + if use_sql: + try: + # Build score extraction expression: data['score_data']['score']::text::float + score_expr = Variant.data["score_data"]["score"].astext.cast(Float) + + lower_raw, upper_raw = functional_classification.range + + # Convert 'inf' sentinels (or None) to float infinities for condition omission. + lower_bound = inf_or_float(lower_raw, lower=True) + upper_bound = inf_or_float(upper_raw, lower=False) + + conditions = [Variant.score_set_id == score_set_id] + if not math.isinf(lower_bound): + if functional_classification.inclusive_lower_bound: + conditions.append(score_expr >= lower_bound) + else: + conditions.append(score_expr > lower_bound) + if not math.isinf(upper_bound): + if functional_classification.inclusive_upper_bound: + conditions.append(score_expr <= upper_bound) + else: + conditions.append(score_expr < upper_bound) + + stmt = select(Variant).where(and_(*conditions)) + return list(db.execute(stmt).scalars()) + + except Exception: # noqa: BLE001 + # Fall back to Python filtering if casting/JSON path errors occur. + pass + + # Python filtering fallback / default path + variants = db.execute(select(Variant).where(Variant.score_set_id == score_set_id)).scalars().all() + matches: list[Variant] = [] + for v in variants: + try: + container = v.data.get("score_data") if isinstance(v.data, dict) else None + if not container or not isinstance(container, dict): + continue + + raw = container.get("score") + if raw is None: + continue + + score = float(raw) + + except Exception: # noqa: BLE001 + continue + + if functional_classification.score_is_contained_in_range(score): + matches.append(v) + + return matches From ef3eb8c4ac3022316ad852cb43f65fa4d8bca3a9 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 18 Nov 2025 14:31:47 -0800 Subject: [PATCH 07/24] feat: add support for class based score ranges - Add a property `class_` to score calibration functional classifications. One of `range` or `class_` must be defined - Add validation logic to class based score ranges - Refactor lib code to support both range types - Refactor tests to support both range types TODO: Support for creating variant associations in class based score ranges. --- ...rename_functional_ranges_to_functional_.py | 45 ++ src/mavedb/lib/annotation/classification.py | 24 +- src/mavedb/lib/annotation/util.py | 21 +- src/mavedb/lib/score_calibrations.py | 26 +- src/mavedb/models/score_calibration.py | 2 +- ...e_calibration_functional_classification.py | 10 +- src/mavedb/scripts/load_calibration_csv.py | 6 +- .../scripts/load_pp_style_calibration.py | 8 +- src/mavedb/view_models/score_calibration.py | 194 ++++-- tests/conftest.py | 4 +- tests/helpers/constants.py | 107 +++- tests/helpers/util/score_calibration.py | 13 +- tests/lib/annotation/test_annotate.py | 12 +- tests/lib/annotation/test_classification.py | 8 +- tests/lib/annotation/test_util.py | 20 +- tests/lib/conftest.py | 4 +- tests/lib/test_acmg.py | 4 + tests/lib/test_score_calibrations.py | 579 +++++++++++++++--- tests/routers/test_mapped_variants.py | 22 +- tests/routers/test_score_calibrations.py | 196 +++--- tests/routers/test_score_set.py | 42 +- tests/view_models/test_score_calibration.py | 294 ++++++--- tests/view_models/test_score_set.py | 8 +- 23 files changed, 1225 insertions(+), 424 deletions(-) create mode 100644 alembic/versions/0520dfa9f2db_rename_functional_ranges_to_functional_.py diff --git a/alembic/versions/0520dfa9f2db_rename_functional_ranges_to_functional_.py b/alembic/versions/0520dfa9f2db_rename_functional_ranges_to_functional_.py new file mode 100644 index 00000000..7b66d976 --- /dev/null +++ b/alembic/versions/0520dfa9f2db_rename_functional_ranges_to_functional_.py @@ -0,0 +1,45 @@ +"""rename functional ranges to functional classifications, add class_ to model, rename classification to functional_classification + +Revision ID: 0520dfa9f2db +Revises: c770fa9e6e58 +Create Date: 2025-11-18 18:51:33.107952 + +""" + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "0520dfa9f2db" +down_revision = "c770fa9e6e58" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "score_calibration_functional_classifications", + "classification", + new_column_name="functional_classification", + type_=sa.Enum( + "normal", "abnormal", "not_specified", name="functionalclassification", native_enum=False, length=32 + ), + nullable=False, + ) + op.add_column("score_calibration_functional_classifications", sa.Column("class_", sa.String(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "score_calibration_functional_classifications", + "functional_classification", + new_column_name="classification", + type_=sa.VARCHAR(length=32), + nullable=False, + ) + op.drop_column("score_calibration_functional_classifications", "class_") + # ### end Alembic commands ### diff --git a/src/mavedb/lib/annotation/classification.py b/src/mavedb/lib/annotation/classification.py index e15c8327..19dd13a5 100644 --- a/src/mavedb/lib/annotation/classification.py +++ b/src/mavedb/lib/annotation/classification.py @@ -5,9 +5,9 @@ from ga4gh.va_spec.acmg_2015 import VariantPathogenicityEvidenceLine from ga4gh.va_spec.base.enums import StrengthOfEvidenceProvided -from mavedb.models.enums.functional_classification import FunctionalClassification +from mavedb.models.enums.functional_classification import FunctionalClassification as FunctionalClassificationOptions from mavedb.models.mapped_variant import MappedVariant -from mavedb.view_models.score_calibration import FunctionalRange +from mavedb.view_models.score_calibration import FunctionalClassification logger = logging.getLogger(__name__) @@ -44,7 +44,7 @@ def functional_classification_of_variant( " Unable to classify functional impact." ) - if not primary_calibration.functional_ranges: + if not primary_calibration.functional_classifications: raise ValueError( f"Variant {mapped_variant.variant.urn} does not have ranges defined in its primary score calibration." " Unable to classify functional impact." @@ -58,14 +58,14 @@ def functional_classification_of_variant( " Unable to classify functional impact." ) - for functional_range in primary_calibration.functional_ranges: + for functional_range in primary_calibration.functional_classifications: # It's easier to reason with the view model objects for functional ranges than the JSONB fields in the raw database object. - functional_range_view = FunctionalRange.model_validate(functional_range) + functional_range_view = FunctionalClassification.model_validate(functional_range) if functional_range_view.is_contained_by_range(functional_score): - if functional_range_view.classification is FunctionalClassification.normal: + if functional_range_view.functional_classification is FunctionalClassificationOptions.normal: return ExperimentalVariantFunctionalImpactClassification.NORMAL - elif functional_range_view.classification is FunctionalClassification.abnormal: + elif functional_range_view.functional_classification is FunctionalClassificationOptions.abnormal: return ExperimentalVariantFunctionalImpactClassification.ABNORMAL else: return ExperimentalVariantFunctionalImpactClassification.INDETERMINATE @@ -97,7 +97,7 @@ def pathogenicity_classification_of_variant( " Unable to classify clinical impact." ) - if not primary_calibration.functional_ranges: + if not primary_calibration.functional_classifications: raise ValueError( f"Variant {mapped_variant.variant.urn} does not have ranges defined in its primary score calibration." " Unable to classify clinical impact." @@ -111,9 +111,9 @@ def pathogenicity_classification_of_variant( " Unable to classify clinical impact." ) - for pathogenicity_range in primary_calibration.functional_ranges: + for pathogenicity_range in primary_calibration.functional_classifications: # It's easier to reason with the view model objects for functional ranges than the JSONB fields in the raw database object. - pathogenicity_range_view = FunctionalRange.model_validate(pathogenicity_range) + pathogenicity_range_view = FunctionalClassification.model_validate(pathogenicity_range) if pathogenicity_range_view.is_contained_by_range(functional_score): if pathogenicity_range_view.acmg_classification is None: @@ -124,7 +124,7 @@ def pathogenicity_classification_of_variant( if ( pathogenicity_range_view.acmg_classification.evidence_strength is None or pathogenicity_range_view.acmg_classification.criterion is None - ): # pragma: no cover - enforced by model validators in FunctionalRange view model + ): # pragma: no cover - enforced by model validators in FunctionalClassification view model return (VariantPathogenicityEvidenceLine.Criterion.PS3, None) # TODO#540: Handle moderate+ @@ -140,7 +140,7 @@ def pathogenicity_classification_of_variant( if ( pathogenicity_range_view.acmg_classification.criterion.name not in VariantPathogenicityEvidenceLine.Criterion._member_names_ - ): # pragma: no cover - enforced by model validators in FunctionalRange view model + ): # pragma: no cover - enforced by model validators in FunctionalClassification view model raise ValueError( f"Variant {mapped_variant.variant.urn} is contained in a clinical calibration range with an invalid criterion." " Unable to classify clinical impact." diff --git a/src/mavedb/lib/annotation/util.py b/src/mavedb/lib/annotation/util.py index 0baab474..0b6274ad 100644 --- a/src/mavedb/lib/annotation/util.py +++ b/src/mavedb/lib/annotation/util.py @@ -1,16 +1,18 @@ from typing import Literal + from ga4gh.core.models import Extension from ga4gh.vrs.models import ( - MolecularVariation, Allele, CisPhasedBlock, - SequenceLocation, - SequenceReference, Expression, LiteralSequenceExpression, + MolecularVariation, + SequenceLocation, + SequenceReference, ) -from mavedb.models.mapped_variant import MappedVariant + from mavedb.lib.annotation.exceptions import MappingDataDoesntExistException +from mavedb.models.mapped_variant import MappedVariant from mavedb.view_models.score_calibration import SavedScoreCalibration @@ -190,13 +192,16 @@ def _variant_score_calibrations_have_required_calibrations_and_ranges_for_annota saved_calibration = SavedScoreCalibration.model_validate(primary_calibration) if annotation_type == "pathogenicity": return ( - saved_calibration.functional_ranges is not None - and len(saved_calibration.functional_ranges) > 0 - and any(fr.acmg_classification is not None for fr in saved_calibration.functional_ranges) + saved_calibration.functional_classifications is not None + and len(saved_calibration.functional_classifications) > 0 + and any(fr.acmg_classification is not None for fr in saved_calibration.functional_classifications) ) if annotation_type == "functional": - return saved_calibration.functional_ranges is not None and len(saved_calibration.functional_ranges) > 0 + return ( + saved_calibration.functional_classifications is not None + and len(saved_calibration.functional_classifications) > 0 + ) return True diff --git a/src/mavedb/lib/score_calibrations.py b/src/mavedb/lib/score_calibrations.py index e834d251..1fa00068 100644 --- a/src/mavedb/lib/score_calibrations.py +++ b/src/mavedb/lib/score_calibrations.py @@ -21,7 +21,9 @@ def create_functional_classification( db: Session, - functional_range_create: Union[score_calibration.FunctionalRangeCreate, score_calibration.FunctionalRangeModify], + functional_range_create: Union[ + score_calibration.FunctionalClassificationCreate, score_calibration.FunctionalClassificationModify + ], containing_calibration: ScoreCalibration, ) -> ScoreCalibrationFunctionalClassification: """ @@ -32,7 +34,7 @@ def create_functional_classification( Args: db (Session): Database session for performing database operations. - functional_range_create (score_calibration.FunctionalRangeCreate): + functional_range_create (score_calibration.FunctionalClassificationCreate): Input data containing the functional range parameters including label, description, range bounds, inclusivity flags, and optional ACMG classification information. @@ -64,7 +66,7 @@ def create_functional_classification( inclusive_lower_bound=functional_range_create.inclusive_lower_bound, inclusive_upper_bound=functional_range_create.inclusive_upper_bound, acmg_classification=acmg_classification, - classification=functional_range_create.classification, + functional_classification=functional_range_create.functional_classification, oddspaths_ratio=functional_range_create.oddspaths_ratio, # type: ignore[arg-type] positive_likelihood_ratio=functional_range_create.positive_likelihood_ratio, # type: ignore[arg-type] acmg_classification_id=acmg_classification.id if acmg_classification else None, @@ -155,7 +157,7 @@ async def _create_score_calibration( **calibration_create.model_dump( by_alias=False, exclude={ - "functional_ranges", + "functional_classifications", "threshold_sources", "classification_sources", "method_sources", @@ -163,17 +165,17 @@ async def _create_score_calibration( }, ), publication_identifier_associations=calibration_pub_assocs, - functional_ranges=[], + functional_classifications=[], created_by=user, modified_by=user, ) # type: ignore[call-arg] - for functional_range_create in calibration_create.functional_ranges or []: + for functional_range_create in calibration_create.functional_classifications or []: persisted_functional_range = create_functional_classification( db, functional_range_create, containing_calibration=calibration ) db.add(persisted_functional_range) - calibration.functional_ranges.append(persisted_functional_range) + calibration.functional_classifications.append(persisted_functional_range) return calibration @@ -406,15 +408,15 @@ async def modify_score_calibration( # Remove associations and calibrations that are no longer present for assoc in existing_assocs_map.values(): db.delete(assoc) - for functional_classification in calibration.functional_ranges: + for functional_classification in calibration.functional_classifications: db.delete(functional_classification) - calibration.functional_ranges.clear() + calibration.functional_classifications.clear() db.flush() db.refresh(calibration) for attr, value in calibration_update.model_dump().items(): if attr not in { - "functional_ranges", + "functional_classifications", "threshold_sources", "classification_sources", "method_sources", @@ -430,12 +432,12 @@ async def modify_score_calibration( calibration.publication_identifier_associations = updated_assocs calibration.modified_by = user - for functional_range_update in calibration_update.functional_ranges or []: + for functional_range_update in calibration_update.functional_classifications or []: persisted_functional_range = create_functional_classification( db, functional_range_update, containing_calibration=calibration ) db.add(persisted_functional_range) - calibration.functional_ranges.append(persisted_functional_range) + calibration.functional_classifications.append(persisted_functional_range) db.add(calibration) return calibration diff --git a/src/mavedb/models/score_calibration.py b/src/mavedb/models/score_calibration.py index a0a8967c..38ce1f28 100644 --- a/src/mavedb/models/score_calibration.py +++ b/src/mavedb/models/score_calibration.py @@ -41,7 +41,7 @@ class ScoreCalibration(Base): baseline_score = Column(Float, nullable=True) baseline_score_description = Column(String, nullable=True) - functional_ranges: Mapped[list["ScoreCalibrationFunctionalClassification"]] = relationship( + functional_classifications: Mapped[list["ScoreCalibrationFunctionalClassification"]] = relationship( "ScoreCalibrationFunctionalClassification", back_populates="calibration", cascade="all, delete-orphan", diff --git a/src/mavedb/models/score_calibration_functional_classification.py b/src/mavedb/models/score_calibration_functional_classification.py index 2ebdf261..5afd4f69 100644 --- a/src/mavedb/models/score_calibration_functional_classification.py +++ b/src/mavedb/models/score_calibration_functional_classification.py @@ -11,7 +11,7 @@ from mavedb.db.base import Base from mavedb.lib.validation.utilities import inf_or_float from mavedb.models.acmg_classification import ACMGClassification -from mavedb.models.enums.functional_classification import FunctionalClassification +from mavedb.models.enums.functional_classification import FunctionalClassification as FunctionalClassificationOptions from mavedb.models.score_calibration_functional_classification_variant_association import ( score_calibration_functional_classification_variants_association_table, ) @@ -32,13 +32,15 @@ class ScoreCalibrationFunctionalClassification(Base): label = Column(String, nullable=False) description = Column(String, nullable=True) - classification = Column( - Enum(FunctionalClassification, native_enum=False, validate_strings=True, length=32), + functional_classification = Column( + Enum(FunctionalClassificationOptions, native_enum=False, validate_strings=True, length=32), nullable=False, - default=FunctionalClassification.not_specified, + default=FunctionalClassificationOptions.not_specified, ) range = Column(JSONB(none_as_null=True), nullable=True) # (lower_bound, upper_bound) + class_ = Column(String, nullable=True) + inclusive_lower_bound = Column(Boolean, nullable=True, default=True) inclusive_upper_bound = Column(Boolean, nullable=True, default=False) diff --git a/src/mavedb/scripts/load_calibration_csv.py b/src/mavedb/scripts/load_calibration_csv.py index 5c3b2bba..95da46fe 100644 --- a/src/mavedb/scripts/load_calibration_csv.py +++ b/src/mavedb/scripts/load_calibration_csv.py @@ -106,7 +106,7 @@ from mavedb.scripts.environment import with_database_session from mavedb.view_models.acmg_classification import ACMGClassificationCreate from mavedb.view_models.publication_identifier import PublicationIdentifierCreate -from mavedb.view_models.score_calibration import FunctionalRangeCreate, ScoreCalibrationCreate +from mavedb.view_models.score_calibration import FunctionalClassificationCreate, ScoreCalibrationCreate BRNICH_PMID = "31892348" RANGE_PATTERN = re.compile(r"^\s*([\[(])\s*([^,]+)\s*,\s*([^\])]+)\s*([])])\s*$", re.IGNORECASE) @@ -274,7 +274,7 @@ def build_ranges(row: Dict[str, str], infer_strengths: bool = True) -> Tuple[Lis label = row.get(f"class_{i}_name", "").strip() ranges.append( - FunctionalRangeCreate( + FunctionalClassificationCreate( label=label, classification=classification, range=(lower, upper), @@ -366,7 +366,7 @@ def main(db: Session, csv_path: str, delimiter: str, overwrite: bool, purge_publ method_sources=method_publications, classification_sources=calculation_publications, research_use_only=False, - functional_ranges=ranges, + functional_classifications=ranges, notes=calibration_notes, ) except Exception as e: # broad to keep import running diff --git a/src/mavedb/scripts/load_pp_style_calibration.py b/src/mavedb/scripts/load_pp_style_calibration.py index 3d5015e4..99862d6d 100644 --- a/src/mavedb/scripts/load_pp_style_calibration.py +++ b/src/mavedb/scripts/load_pp_style_calibration.py @@ -183,7 +183,7 @@ def main(db: Session, archive_path: str, dataset_map: str, overwrite: bool) -> N click.echo(f" Overwriting existing '{calibration_name}' in Score Set {score_set.urn}") benign_has_lower_functional_scores = calibration_data.get("scoreset_flipped", False) - functional_ranges: List[score_calibration.FunctionalRangeCreate] = [] + functional_classifications: List[score_calibration.FunctionalClassificationCreate] = [] for points, range_data in calibration_data.get("point_ranges", {}).items(): if not range_data: continue @@ -212,7 +212,7 @@ def main(db: Session, archive_path: str, dataset_map: str, overwrite: bool) -> N inclusive_lower = False inclusive_upper = True if upper_bound is not None else False - functional_range = score_calibration.FunctionalRangeCreate( + functional_range = score_calibration.FunctionalClassificationCreate( label=f"{ps_or_bs} {strength_label} ({points})", classification="abnormal" if points > 0 else "normal", range=range_data, @@ -222,11 +222,11 @@ def main(db: Session, archive_path: str, dataset_map: str, overwrite: bool) -> N inclusive_lower_bound=inclusive_lower, inclusive_upper_bound=inclusive_upper, ) - functional_ranges.append(functional_range) + functional_classifications.append(functional_range) score_calibration_create = score_calibration.ScoreCalibrationCreate( title=calibration_name, - functional_ranges=functional_ranges, + functional_classifications=functional_classifications, research_use_only=True, score_set_urn=score_set.urn, calibration_metadata={"prior_probability_pathogenicity": calibration_data.get("prior", None)}, diff --git a/src/mavedb/view_models/score_calibration.py b/src/mavedb/view_models/score_calibration.py index 9164d5ef..e8696943 100644 --- a/src/mavedb/view_models/score_calibration.py +++ b/src/mavedb/view_models/score_calibration.py @@ -7,7 +7,7 @@ from datetime import date from typing import TYPE_CHECKING, Any, Collection, Optional, Sequence, Union -from pydantic import field_validator, model_validator +from pydantic import Field, field_validator, model_validator from mavedb.lib.oddspaths import oddspaths_evidence_strength_equivalent from mavedb.lib.validation.exceptions import ValidationError @@ -16,7 +16,7 @@ transform_score_set_to_urn, ) from mavedb.lib.validation.utilities import inf_or_float -from mavedb.models.enums.functional_classification import FunctionalClassification +from mavedb.models.enums.functional_classification import FunctionalClassification as FunctionalClassifcationOptions from mavedb.view_models import record_type_validator, set_record_type from mavedb.view_models.acmg_classification import ( ACMGClassification, @@ -43,7 +43,7 @@ ### Functional range models -class FunctionalRangeBase(BaseModel): +class FunctionalClassificationBase(BaseModel): """Base functional range model. Represents a labeled numeric score interval with optional evidence metadata. @@ -53,9 +53,11 @@ class FunctionalRangeBase(BaseModel): label: str description: Optional[str] = None - classification: FunctionalClassification = FunctionalClassification.not_specified + functional_classification: FunctionalClassifcationOptions = FunctionalClassifcationOptions.not_specified range: Optional[tuple[Union[float, None], Union[float, None]]] = None # (lower_bound, upper_bound) + class_: Optional[str] = Field(None, alias="class", serialization_alias="class") + inclusive_lower_bound: Optional[bool] = None inclusive_upper_bound: Optional[bool] = None @@ -64,6 +66,9 @@ class FunctionalRangeBase(BaseModel): oddspaths_ratio: Optional[float] = None positive_likelihood_ratio: Optional[float] = None + class Config: + populate_by_name = True + @field_validator("range") def ranges_are_not_backwards( cls, field_value: Optional[tuple[Union[float, None], Union[float, None]]] @@ -88,15 +93,52 @@ def ratios_must_be_positive(cls, field_value: Optional[float]) -> Optional[float return field_value + @field_validator("class_", "label", mode="before") + def labels_and_class_strip_whitespace_and_validate_not_empty(cls, field_value: Optional[str]) -> Optional[str]: + """Strip leading/trailing whitespace from class names.""" + if field_value is None: + return None + + field_value = field_value.strip() + if not field_value: + raise ValidationError("This field may not be empty or contain only whitespace.") + + return field_value + @model_validator(mode="after") - def inclusive_bounds_require_range(self: "FunctionalRangeBase") -> "FunctionalRangeBase": + def At_least_one_of_range_or_class_must_be_provided( + self: "FunctionalClassificationBase", + ) -> "FunctionalClassificationBase": + """Either a range or a class must be provided.""" + if self.range is None and self.class_ is None: + raise ValidationError("A functional range must specify either a numeric range or a class.") + + return self + + @model_validator(mode="after") + def class_and_range_mutually_exclusive( + self: "FunctionalClassificationBase", + ) -> "FunctionalClassificationBase": + """Either a range or a class may be provided, but not both.""" + if self.range is not None and self.class_ is not None: + raise ValidationError("A functional range may not specify both a numeric range and a class.") + + return self + + @model_validator(mode="after") + def inclusive_bounds_require_range(self: "FunctionalClassificationBase") -> "FunctionalClassificationBase": """Inclusive bounds may only be set if a range is provided. If they are unset, default them.""" - if self.range is None: + if self.class_ is not None: if self.inclusive_lower_bound: - raise ValidationError("An inclusive lower bound requires a defined range.") + raise ValidationError( + "An inclusive lower bound may not be set on a class based functional classification." + ) if self.inclusive_upper_bound: - raise ValidationError("An inclusive upper bound requires a defined range.") - else: + raise ValidationError( + "An inclusive upper bound may not be set on a class based functional classification." + ) + + if self.range is not None: if self.inclusive_lower_bound is None: self.inclusive_lower_bound = True if self.inclusive_upper_bound is None: @@ -105,7 +147,9 @@ def inclusive_bounds_require_range(self: "FunctionalRangeBase") -> "FunctionalRa return self @model_validator(mode="after") - def inclusive_bounds_do_not_include_infinity(self: "FunctionalRangeBase") -> "FunctionalRangeBase": + def inclusive_bounds_do_not_include_infinity( + self: "FunctionalClassificationBase", + ) -> "FunctionalClassificationBase": """Disallow inclusive bounds on unbounded (infinite) ends.""" if self.inclusive_lower_bound and self.range is not None and self.range[0] is None: raise ValidationError("An inclusive lower bound may not include negative infinity.") @@ -115,25 +159,29 @@ def inclusive_bounds_do_not_include_infinity(self: "FunctionalRangeBase") -> "Fu return self @model_validator(mode="after") - def acmg_classification_evidence_agrees_with_classification(self: "FunctionalRangeBase") -> "FunctionalRangeBase": + def acmg_classification_evidence_agrees_with_classification( + self: "FunctionalClassificationBase", + ) -> "FunctionalClassificationBase": """If oddspaths is provided, ensure its evidence agrees with the classification.""" if self.acmg_classification is None or self.acmg_classification.criterion is None: return self if ( - self.classification is FunctionalClassification.normal + self.functional_classification is FunctionalClassifcationOptions.normal and self.acmg_classification.criterion.is_pathogenic - or self.classification is FunctionalClassification.abnormal + or self.functional_classification is FunctionalClassifcationOptions.abnormal and self.acmg_classification.criterion.is_benign ): raise ValidationError( - f"The ACMG classification criterion ({self.acmg_classification.criterion}) must agree with the functional range classification ({self.classification})." + f"The ACMG classification criterion ({self.acmg_classification.criterion}) must agree with the functional range classification ({self.functional_classification})." ) return self @model_validator(mode="after") - def oddspaths_ratio_agrees_with_acmg_classification(self: "FunctionalRangeBase") -> "FunctionalRangeBase": + def oddspaths_ratio_agrees_with_acmg_classification( + self: "FunctionalClassificationBase", + ) -> "FunctionalClassificationBase": """If both oddspaths and acmg_classification are provided, ensure they agree.""" if self.oddspaths_ratio is None or self.acmg_classification is None: return self @@ -168,20 +216,30 @@ def is_contained_by_range(self, score: float) -> bool: return lower_check and upper_check + @property + def class_based(self) -> bool: + """Determine if this functional classification is class-based.""" + return self.class_ is not None + + @property + def range_based(self) -> bool: + """Determine if this functional classification is range-based.""" + return self.range is not None + -class FunctionalRangeModify(FunctionalRangeBase): +class FunctionalClassificationModify(FunctionalClassificationBase): """Model used to modify an existing functional range.""" acmg_classification: Optional[ACMGClassificationModify] = None -class FunctionalRangeCreate(FunctionalRangeModify): +class FunctionalClassificationCreate(FunctionalClassificationModify): """Model used to create a new functional range.""" acmg_classification: Optional[ACMGClassificationCreate] = None -class SavedFunctionalRange(FunctionalRangeBase): +class SavedFunctionalClassification(FunctionalClassificationBase): """Persisted functional range model (includes record type metadata).""" record_type: str = None # type: ignore @@ -197,7 +255,7 @@ class Config: arbitrary_types_allowed = True -class FunctionalRange(SavedFunctionalRange): +class FunctionalClassification(SavedFunctionalClassification): """Complete functional range model returned by the API.""" acmg_classification: Optional[ACMGClassification] = None @@ -220,23 +278,23 @@ class ScoreCalibrationBase(BaseModel): baseline_score_description: Optional[str] = None notes: Optional[str] = None - functional_ranges: Optional[Sequence[FunctionalRangeBase]] = None + functional_classifications: Optional[Sequence[FunctionalClassificationBase]] = None threshold_sources: Optional[Sequence[PublicationIdentifierBase]] = None classification_sources: Optional[Sequence[PublicationIdentifierBase]] = None method_sources: Optional[Sequence[PublicationIdentifierBase]] = None calibration_metadata: Optional[dict] = None - @field_validator("functional_ranges") + @field_validator("functional_classifications") def ranges_do_not_overlap( - cls, field_value: Optional[Sequence[FunctionalRangeBase]] - ) -> Optional[Sequence[FunctionalRangeBase]]: + cls, field_value: Optional[Sequence[FunctionalClassificationBase]] + ) -> Optional[Sequence[FunctionalClassificationBase]]: """Ensure that no two functional ranges overlap (respecting inclusivity).""" - def test_overlap(range_test: FunctionalRangeBase, range_check: FunctionalRangeBase) -> bool: + def test_overlap(range_test: FunctionalClassificationBase, range_check: FunctionalClassificationBase) -> bool: # Allow 'not_specified' classifications to overlap with anything. if ( - range_test.classification is FunctionalClassification.not_specified - or range_check.classification is FunctionalClassification.not_specified + range_test.functional_classification is FunctionalClassifcationOptions.not_specified + or range_check.functional_classification is FunctionalClassifcationOptions.not_specified or range_test.range is None or range_check.range is None ): @@ -275,23 +333,34 @@ def test_overlap(range_test: FunctionalRangeBase, range_check: FunctionalRangeBa return field_value @model_validator(mode="after") - def functional_range_labels_must_be_unique(self: "ScoreCalibrationBase") -> "ScoreCalibrationBase": - """Enforce uniqueness (post-strip) of functional range labels.""" - if not self.functional_ranges: + def functional_range_labels_classes_must_be_unique(self: "ScoreCalibrationBase") -> "ScoreCalibrationBase": + """Enforce uniqueness (post-strip) of functional range labels and classes.""" + if not self.functional_classifications: return self - seen, dupes = set(), set() - for i, fr in enumerate(self.functional_ranges): - fr.label = fr.label.strip() - if fr.label in seen: - dupes.add((fr.label, i)) + seen_l, dupes_l = set(), set() + seen_c, dupes_c = set(), set() + for i, fr in enumerate(self.functional_classifications): + if fr.label in seen_l: + dupes_l.add((fr.label, i)) else: - seen.add(fr.label) + seen_l.add(fr.label) - if dupes: + if fr.class_ is not None: + if fr.class_ in seen_c: + dupes_c.add((fr.class_, i)) + else: + seen_c.add(fr.class_) + + if dupes_l: + raise ValidationError( + f"Detected repeated label(s): {', '.join(label for label, _ in dupes_l)}. Functional range labels must be unique.", + custom_loc=["body", "functionalClassifications", dupes_l.pop()[1], "label"], + ) + if dupes_c: raise ValidationError( - f"Detected repeated label(s): {', '.join(label for label, _ in dupes)}. Functional range labels must be unique.", - custom_loc=["body", "functionalRanges", dupes.pop()[1], "label"], + f"Detected repeated class name(s): {', '.join(class_name for class_name, _ in dupes_c)}. Functional range class names must be unique.", + custom_loc=["body", "functionalClassifications", dupes_c.pop()[1], "class"], ) return self @@ -299,16 +368,16 @@ def functional_range_labels_must_be_unique(self: "ScoreCalibrationBase") -> "Sco @model_validator(mode="after") def validate_baseline_score(self: "ScoreCalibrationBase") -> "ScoreCalibrationBase": """If a baseline score is provided and it falls within a functional range, it may only be contained in a normal range.""" - if not self.functional_ranges: + if not self.functional_classifications: return self if self.baseline_score is None: return self - for fr in self.functional_ranges: + for fr in self.functional_classifications: if ( fr.is_contained_by_range(self.baseline_score) - and fr.classification is not FunctionalClassification.normal + and fr.functional_classification is not FunctionalClassifcationOptions.normal ): raise ValidationError( f"The provided baseline score of {self.baseline_score} falls within a non-normal range ({fr.label}). Baseline scores may not fall within non-normal ranges.", @@ -317,13 +386,48 @@ def validate_baseline_score(self: "ScoreCalibrationBase") -> "ScoreCalibrationBa return self + @model_validator(mode="after") + def functional_classifications_must_be_of_same_type( + self: "ScoreCalibrationBase", + ) -> "ScoreCalibrationBase": + """All functional classifications must be either range-based or class-based.""" + if not self.functional_classifications: + return self + + range_based_count = sum(1 for fc in self.functional_classifications if fc.range_based) + class_based_count = sum(1 for fc in self.functional_classifications if fc.class_based) + + if range_based_count > 0 and class_based_count > 0: + raise ValidationError( + "All functional classifications within a score calibration must be of the same type (either all range-based or all class-based).", + custom_loc=["body", "functionalClassifications"], + ) + + return self + + @property + def range_based(self) -> bool: + """Determine if this score calibration is range-based.""" + if not self.functional_classifications: + return False + + return self.functional_classifications[0].range_based + + @property + def class_based(self) -> bool: + """Determine if this score calibration is class-based.""" + if not self.functional_classifications: + return False + + return self.functional_classifications[0].class_based + class ScoreCalibrationModify(ScoreCalibrationBase): """Model used to modify an existing score calibration.""" score_set_urn: Optional[str] = None - functional_ranges: Optional[Sequence[FunctionalRangeModify]] = None + functional_classifications: Optional[Sequence[FunctionalClassificationModify]] = None threshold_sources: Optional[Sequence[PublicationIdentifierCreate]] = None classification_sources: Optional[Sequence[PublicationIdentifierCreate]] = None method_sources: Optional[Sequence[PublicationIdentifierCreate]] = None @@ -332,7 +436,7 @@ class ScoreCalibrationModify(ScoreCalibrationBase): class ScoreCalibrationCreate(ScoreCalibrationModify): """Model used to create a new score calibration.""" - functional_ranges: Optional[Sequence[FunctionalRangeCreate]] = None + functional_classifications: Optional[Sequence[FunctionalClassificationCreate]] = None threshold_sources: Optional[Sequence[PublicationIdentifierCreate]] = None classification_sources: Optional[Sequence[PublicationIdentifierCreate]] = None method_sources: Optional[Sequence[PublicationIdentifierCreate]] = None @@ -352,7 +456,7 @@ class SavedScoreCalibration(ScoreCalibrationBase): primary: bool = False private: bool = True - functional_ranges: Optional[Sequence[SavedFunctionalRange]] = None + functional_classifications: Optional[Sequence[SavedFunctionalClassification]] = None threshold_sources: Optional[Sequence[SavedPublicationIdentifier]] = None classification_sources: Optional[Sequence[SavedPublicationIdentifier]] = None method_sources: Optional[Sequence[SavedPublicationIdentifier]] = None @@ -430,7 +534,7 @@ def generate_threshold_classification_and_method_sources(cls, data: Any): # typ class ScoreCalibration(SavedScoreCalibration): """Complete score calibration model returned by the API.""" - functional_ranges: Optional[Sequence[FunctionalRange]] = None + functional_classifications: Optional[Sequence[FunctionalClassification]] = None threshold_sources: Optional[Sequence[PublicationIdentifier]] = None classification_sources: Optional[Sequence[PublicationIdentifier]] = None method_sources: Optional[Sequence[PublicationIdentifier]] = None diff --git a/tests/conftest.py b/tests/conftest.py index b11f728c..33e709e9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,7 +25,7 @@ from tests.helpers.constants import ( ADMIN_USER, EXTRA_USER, - TEST_BRNICH_SCORE_CALIBRATION, + TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, TEST_INACTIVE_LICENSE, TEST_LICENSE, TEST_PATHOGENICITY_SCORE_CALIBRATION, @@ -143,7 +143,7 @@ def mock_experiment(): def mock_score_set(mock_user, mock_experiment, mock_publication_associations): score_set = mock.Mock(spec=ScoreSet) score_set.urn = VALID_SCORE_SET_URN - score_set.score_calibrations = [TEST_BRNICH_SCORE_CALIBRATION, TEST_PATHOGENICITY_SCORE_CALIBRATION] + score_set.score_calibrations = [TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, TEST_PATHOGENICITY_SCORE_CALIBRATION] score_set.license.short_name = "MIT" score_set.created_by = mock_user score_set.modified_by = mock_user diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index d38955b4..c7b17b1d 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -1411,7 +1411,7 @@ TEST_FUNCTIONAL_RANGE_NORMAL = { "label": "test normal functional range", "description": "A normal functional range", - "classification": "normal", + "functional_classification": "normal", "range": [1.0, 5.0], "acmg_classification": TEST_ACMG_BS3_STRONG_CLASSIFICATION, "oddspaths_ratio": TEST_BS3_STRONG_ODDS_PATH_RATIO, @@ -1421,7 +1421,7 @@ TEST_SAVED_FUNCTIONAL_RANGE_NORMAL = { - "recordType": "FunctionalRange", + "recordType": "FunctionalClassification", **{camelize(k): v for k, v in TEST_FUNCTIONAL_RANGE_NORMAL.items() if k not in ("acmg_classification",)}, "acmgClassification": TEST_SAVED_ACMG_BS3_STRONG_CLASSIFICATION, "variants": [], @@ -1431,7 +1431,7 @@ TEST_FUNCTIONAL_RANGE_ABNORMAL = { "label": "test abnormal functional range", "description": "An abnormal functional range", - "classification": "abnormal", + "functional_classification": "abnormal", "range": [-5.0, -1.0], "acmg_classification": TEST_ACMG_PS3_STRONG_CLASSIFICATION, "oddspaths_ratio": TEST_PS3_STRONG_ODDS_PATH_RATIO, @@ -1441,7 +1441,7 @@ TEST_SAVED_FUNCTIONAL_RANGE_ABNORMAL = { - "recordType": "FunctionalRange", + "recordType": "FunctionalClassification", **{camelize(k): v for k, v in TEST_FUNCTIONAL_RANGE_ABNORMAL.items() if k not in ("acmg_classification",)}, "acmgClassification": TEST_SAVED_ACMG_PS3_STRONG_CLASSIFICATION, "variants": [], @@ -1450,7 +1450,7 @@ TEST_FUNCTIONAL_RANGE_NOT_SPECIFIED = { "label": "test not specified functional range", - "classification": "not_specified", + "functional_classification": "not_specified", "range": [-1.0, 1.0], "inclusive_lower_bound": True, "inclusive_upper_bound": False, @@ -1458,16 +1458,66 @@ TEST_SAVED_FUNCTIONAL_RANGE_NOT_SPECIFIED = { - "recordType": "FunctionalRange", + "recordType": "FunctionalClassification", **{camelize(k): v for k, v in TEST_FUNCTIONAL_RANGE_NOT_SPECIFIED.items()}, "variants": [], } +TEST_FUNCTIONAL_CLASSIFICATION_NORMAL = { + "label": "test normal functional class", + "description": "A normal functional class", + "functional_classification": "normal", + "class": "normal_class", + "acmg_classification": TEST_ACMG_BS3_STRONG_CLASSIFICATION, + "oddspaths_ratio": TEST_BS3_STRONG_ODDS_PATH_RATIO, +} + + +TEST_SAVED_FUNCTIONAL_CLASSIFICATION_NORMAL = { + "recordType": "FunctionalClassification", + **{camelize(k): v for k, v in TEST_FUNCTIONAL_CLASSIFICATION_NORMAL.items() if k not in ("acmg_classification",)}, + "acmgClassification": TEST_SAVED_ACMG_BS3_STRONG_CLASSIFICATION, + "variants": [], +} + + +TEST_FUNCTIONAL_CLASSIFICATION_ABNORMAL = { + "label": "test abnormal functional class", + "description": "An abnormal functional class", + "functional_classification": "abnormal", + "class": "abnormal_class", + "acmg_classification": TEST_ACMG_PS3_STRONG_CLASSIFICATION, + "oddspaths_ratio": TEST_PS3_STRONG_ODDS_PATH_RATIO, +} + + +TEST_SAVED_FUNCTIONAL_CLASSIFICATION_ABNORMAL = { + "recordType": "FunctionalClassification", + **{camelize(k): v for k, v in TEST_FUNCTIONAL_CLASSIFICATION_ABNORMAL.items() if k not in ("acmg_classification",)}, + "acmgClassification": TEST_SAVED_ACMG_PS3_STRONG_CLASSIFICATION, + "variants": [], +} + + +TEST_FUNCTIONAL_CLASSIFICATION_NOT_SPECIFIED = { + "label": "test not specified functional class", + "functional_classification": "not_specified", + "class": "not_specified_class", +} + + +TEST_SAVED_FUNCTIONAL_CLASSIFICATION_NOT_SPECIFIED = { + "recordType": "FunctionalClassification", + **{camelize(k): v for k, v in TEST_FUNCTIONAL_CLASSIFICATION_NOT_SPECIFIED.items()}, + "variants": [], +} + + TEST_FUNCTIONAL_RANGE_INCLUDING_NEGATIVE_INFINITY = { "label": "test functional range including negative infinity", "description": "A functional range including negative infinity", - "classification": "not_specified", + "functional_classification": "not_specified", "range": [None, 0.0], "inclusive_lower_bound": False, "inclusive_upper_bound": False, @@ -1475,7 +1525,7 @@ TEST_SAVED_FUNCTIONAL_RANGE_INCLUDING_NEGATIVE_INFINITY = { - "recordType": "FunctionalRange", + "recordType": "FunctionalClassification", **{camelize(k): v for k, v in TEST_FUNCTIONAL_RANGE_INCLUDING_NEGATIVE_INFINITY.items()}, } @@ -1483,7 +1533,7 @@ TEST_FUNCTIONAL_RANGE_INCLUDING_POSITIVE_INFINITY = { "label": "test functional range including positive infinity", "description": "A functional range including positive infinity", - "classification": "not_specified", + "functional_classification": "not_specified", "range": [0.0, None], "inclusive_lower_bound": False, "inclusive_upper_bound": False, @@ -1494,7 +1544,7 @@ "title": "Test BRNICH Score Calibration", "research_use_only": False, "investigator_provided": False, - "functional_ranges": [ + "functional_classifications": [ TEST_FUNCTIONAL_RANGE_NORMAL, TEST_FUNCTIONAL_RANGE_ABNORMAL, TEST_FUNCTIONAL_RANGE_NOT_SPECIFIED, @@ -1506,12 +1556,12 @@ } -TEST_BRNICH_SCORE_CALIBRATION = { +TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED = { "title": "Test BRNICH Score Calibration", "research_use_only": False, "baseline_score": TEST_BASELINE_SCORE, "baseline_score_description": "Test baseline score description", - "functional_ranges": [ + "functional_classifications": [ TEST_FUNCTIONAL_RANGE_NORMAL, TEST_FUNCTIONAL_RANGE_ABNORMAL, TEST_FUNCTIONAL_RANGE_NOT_SPECIFIED, @@ -1525,14 +1575,14 @@ "calibration_metadata": {}, } -TEST_SAVED_BRNICH_SCORE_CALIBRATION = { +TEST_SAVED_BRNICH_SCORE_CALIBRATION_RANGE_BASED = { "recordType": "ScoreCalibration", **{ camelize(k): v - for k, v in TEST_BRNICH_SCORE_CALIBRATION.items() - if k not in ("functional_ranges", "classification_sources", "threshold_sources", "method_sources") + for k, v in TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED.items() + if k not in ("functional_classifications", "classification_sources", "threshold_sources", "method_sources") }, - "functionalRanges": [ + "functionalClassifications": [ TEST_SAVED_FUNCTIONAL_RANGE_NORMAL, TEST_SAVED_FUNCTIONAL_RANGE_ABNORMAL, TEST_SAVED_FUNCTIONAL_RANGE_NOT_SPECIFIED, @@ -1562,12 +1612,31 @@ "modificationDate": date.today().isoformat(), } + +TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED = { + **TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, + "functional_classifications": [ + TEST_FUNCTIONAL_CLASSIFICATION_NORMAL, + TEST_FUNCTIONAL_CLASSIFICATION_ABNORMAL, + TEST_FUNCTIONAL_CLASSIFICATION_NOT_SPECIFIED, + ], +} + +TEST_SAVED_BRNICH_SCORE_CALIBRATION_CLASS_BASED = { + **TEST_SAVED_BRNICH_SCORE_CALIBRATION_RANGE_BASED, + "functionalClassifications": [ + TEST_SAVED_FUNCTIONAL_CLASSIFICATION_NORMAL, + TEST_SAVED_FUNCTIONAL_CLASSIFICATION_ABNORMAL, + TEST_SAVED_FUNCTIONAL_CLASSIFICATION_NOT_SPECIFIED, + ], +} + TEST_PATHOGENICITY_SCORE_CALIBRATION = { "title": "Test Pathogenicity Score Calibration", "research_use_only": False, "baseline_score": TEST_BASELINE_SCORE, "baseline_score_description": "Test baseline score description", - "functional_ranges": [ + "functional_classifications": [ TEST_FUNCTIONAL_RANGE_NORMAL, TEST_FUNCTIONAL_RANGE_ABNORMAL, ], @@ -1582,9 +1651,9 @@ **{ camelize(k): v for k, v in TEST_PATHOGENICITY_SCORE_CALIBRATION.items() - if k not in ("functional_ranges", "classification_sources", "threshold_sources", "method_sources") + if k not in ("functional_classifications", "classification_sources", "threshold_sources", "method_sources") }, - "functionalRanges": [ + "functionalClassifications": [ TEST_SAVED_FUNCTIONAL_RANGE_NORMAL, TEST_SAVED_FUNCTIONAL_RANGE_ABNORMAL, ], diff --git a/tests/helpers/util/score_calibration.py b/tests/helpers/util/score_calibration.py index 8c432e8f..a535096c 100644 --- a/tests/helpers/util/score_calibration.py +++ b/tests/helpers/util/score_calibration.py @@ -6,16 +6,19 @@ from mavedb.models.score_calibration import ScoreCalibration from mavedb.models.user import User from mavedb.view_models.score_calibration import ScoreCalibrationCreate, ScoreCalibrationWithScoreSetUrn - -from tests.helpers.constants import TEST_BRNICH_SCORE_CALIBRATION +from tests.helpers.constants import TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED if TYPE_CHECKING: - from sqlalchemy.orm import Session from fastapi.testclient import TestClient + from sqlalchemy.orm import Session -async def create_test_score_calibration_in_score_set(db: "Session", score_set_urn: str, user: User) -> ScoreCalibration: - calibration_create = ScoreCalibrationCreate(**TEST_BRNICH_SCORE_CALIBRATION, score_set_urn=score_set_urn) +async def create_test_range_based_score_calibration_in_score_set( + db: "Session", score_set_urn: str, user: User +) -> ScoreCalibration: + calibration_create = ScoreCalibrationCreate( + **TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, score_set_urn=score_set_urn + ) created_score_calibration = await create_score_calibration_in_score_set(db, calibration_create, user) assert created_score_calibration is not None diff --git a/tests/lib/annotation/test_annotate.py b/tests/lib/annotation/test_annotate.py index 9c1846cb..3a664d7e 100644 --- a/tests/lib/annotation/test_annotate.py +++ b/tests/lib/annotation/test_annotate.py @@ -1,8 +1,10 @@ from copy import deepcopy -from mavedb.lib.annotation.annotate import variant_study_result -from mavedb.lib.annotation.annotate import variant_functional_impact_statement -from mavedb.lib.annotation.annotate import variant_pathogenicity_evidence +from mavedb.lib.annotation.annotate import ( + variant_functional_impact_statement, + variant_pathogenicity_evidence, + variant_study_result, +) # The contents of these results are tested elsewhere. These tests focus on object structure. @@ -81,8 +83,8 @@ def test_variant_pathogenicity_evidence_with_no_acmg_classifications( for ( calibration ) in mock_mapped_variant_with_pathogenicity_calibration_score_set.variant.score_set.score_calibrations: - calibration.functional_ranges = [ - {**deepcopy(r), "acmgClassification": None} for r in calibration.functional_ranges + calibration.functional_classifications = [ + {**deepcopy(r), "acmgClassification": None} for r in calibration.functional_classifications ] result = variant_pathogenicity_evidence(mock_mapped_variant_with_pathogenicity_calibration_score_set) diff --git a/tests/lib/annotation/test_classification.py b/tests/lib/annotation/test_classification.py index 39865241..bab685e7 100644 --- a/tests/lib/annotation/test_classification.py +++ b/tests/lib/annotation/test_classification.py @@ -87,7 +87,7 @@ def test_functional_classification_of_variant_without_ranges_in_primary_calibrat None, ) assert primary_cal is not None - primary_cal.functional_ranges = None + primary_cal.functional_classifications = None with pytest.raises(ValueError) as exc: functional_classification_of_variant(mock_mapped_variant_with_functional_calibration_score_set) @@ -171,7 +171,7 @@ def test_pathogenicity_classification_of_variant_without_ranges_in_primary_calib None, ) assert primary_cal is not None - primary_cal.functional_ranges = None + primary_cal.functional_classifications = None with pytest.raises(ValueError) as exc: pathogenicity_classification_of_variant(mock_mapped_variant_with_pathogenicity_calibration_score_set) @@ -194,7 +194,7 @@ def test_pathogenicity_classification_of_variant_without_acmg_classification_in_ None, ) assert primary_cal is not None - for r in primary_cal.functional_ranges: + for r in primary_cal.functional_classifications: r["acmgClassification"] = None criterion, strength = pathogenicity_classification_of_variant( @@ -217,7 +217,7 @@ def test_pathogenicity_classification_of_variant_with_invalid_evidence_strength_ None, ) assert primary_cal is not None - for r in primary_cal.functional_ranges: + for r in primary_cal.functional_classifications: r["acmgClassification"]["evidenceStrength"] = "MODERATE_PLUS" r["oddspathsRatio"] = None diff --git a/tests/lib/annotation/test_util.py b/tests/lib/annotation/test_util.py index afb19cbe..572a0489 100644 --- a/tests/lib/annotation/test_util.py +++ b/tests/lib/annotation/test_util.py @@ -1,17 +1,17 @@ from copy import deepcopy +from unittest.mock import patch + import pytest from mavedb.lib.annotation.exceptions import MappingDataDoesntExistException from mavedb.lib.annotation.util import ( - variation_from_mapped_variant, _can_annotate_variant_base_assumptions, _variant_score_calibrations_have_required_calibrations_and_ranges_for_annotation, can_annotate_variant_for_functional_statement, can_annotate_variant_for_pathogenicity_evidence, + variation_from_mapped_variant, ) - -from tests.helpers.constants import TEST_VALID_POST_MAPPED_VRS_ALLELE, TEST_SEQUENCE_LOCATION_ACCESSION -from unittest.mock import patch +from tests.helpers.constants import TEST_SEQUENCE_LOCATION_ACCESSION, TEST_VALID_POST_MAPPED_VRS_ALLELE @pytest.mark.parametrize( @@ -87,7 +87,7 @@ def test_score_range_check_returns_false_when_calibrations_present_with_empty_ra mock_mapped_variant = request.getfixturevalue(variant_fixture) for calibration in mock_mapped_variant.variant.score_set.score_calibrations: - calibration.functional_ranges = None + calibration.functional_classifications = None assert ( _variant_score_calibrations_have_required_calibrations_and_ranges_for_annotation(mock_mapped_variant, kind) @@ -101,11 +101,11 @@ def test_pathogenicity_range_check_returns_false_when_no_acmg_calibration( for ( calibration ) in mock_mapped_variant_with_pathogenicity_calibration_score_set.variant.score_set.score_calibrations: - acmg_classification_removed = [deepcopy(r) for r in calibration.functional_ranges] + acmg_classification_removed = [deepcopy(r) for r in calibration.functional_classifications] for fr in acmg_classification_removed: fr["acmgClassification"] = None - calibration.functional_ranges = acmg_classification_removed + calibration.functional_classifications = acmg_classification_removed assert ( _variant_score_calibrations_have_required_calibrations_and_ranges_for_annotation( @@ -121,10 +121,10 @@ def test_pathogenicity_range_check_returns_true_when_some_acmg_calibration( for ( calibration ) in mock_mapped_variant_with_pathogenicity_calibration_score_set.variant.score_set.score_calibrations: - acmg_classification_removed = [deepcopy(r) for r in calibration.functional_ranges] + acmg_classification_removed = [deepcopy(r) for r in calibration.functional_classifications] acmg_classification_removed[0]["acmgClassification"] = None - calibration.functional_ranges = acmg_classification_removed + calibration.functional_classifications = acmg_classification_removed assert ( _variant_score_calibrations_have_required_calibrations_and_ranges_for_annotation( @@ -193,7 +193,7 @@ def test_functional_range_check_returns_false_when_base_assumptions_fail(mock_ma assert result is False -def test_functional_range_check_returns_false_when_functional_ranges_check_fails(mock_mapped_variant): +def test_functional_range_check_returns_false_when_functional_classifications_check_fails(mock_mapped_variant): with patch( "mavedb.lib.annotation.util._variant_score_calibrations_have_required_calibrations_and_ranges_for_annotation", return_value=False, diff --git a/tests/lib/conftest.py b/tests/lib/conftest.py index efdb254b..c281f5eb 100644 --- a/tests/lib/conftest.py +++ b/tests/lib/conftest.py @@ -36,7 +36,7 @@ TEST_MINIMAL_MAPPED_VARIANT, TEST_MINIMAL_VARIANT, TEST_PUBMED_IDENTIFIER, - TEST_SAVED_BRNICH_SCORE_CALIBRATION, + TEST_SAVED_BRNICH_SCORE_CALIBRATION_RANGE_BASED, TEST_SAVED_PATHOGENICITY_SCORE_CALIBRATION, TEST_SAVED_TAXONOMY, TEST_SEQ_SCORESET, @@ -187,7 +187,7 @@ def mock_experiment(): def mock_functional_calibration(mock_user): calibration = mock.Mock(spec=ScoreCalibration) - for key, value in TEST_SAVED_BRNICH_SCORE_CALIBRATION.items(): + for key, value in TEST_SAVED_BRNICH_SCORE_CALIBRATION_RANGE_BASED.items(): setattr(calibration, decamelize(key), deepcopy(value)) calibration.primary = True # Ensure functional calibration is primary for tests diff --git a/tests/lib/test_acmg.py b/tests/lib/test_acmg.py index faef40f0..cc5dfac0 100644 --- a/tests/lib/test_acmg.py +++ b/tests/lib/test_acmg.py @@ -1,6 +1,10 @@ +# ruff: noqa: E402 + import pytest from sqlalchemy import select +pytest.importorskip("psycopg2") + from mavedb.lib.acmg import ( ACMGCriterion, StrengthOfEvidenceProvided, diff --git a/tests/lib/test_score_calibrations.py b/tests/lib/test_score_calibrations.py index 286072a3..db9f9c7b 100644 --- a/tests/lib/test_score_calibrations.py +++ b/tests/lib/test_score_calibrations.py @@ -2,6 +2,8 @@ import pytest +from mavedb.models.score_calibration_functional_classification import ScoreCalibrationFunctionalClassification + pytest.importorskip("psycopg2") from unittest import mock @@ -19,16 +21,19 @@ modify_score_calibration, promote_score_calibration_to_primary, publish_score_calibration, + variants_for_functional_classification, ) from mavedb.models.enums.score_calibration_relation import ScoreCalibrationRelation from mavedb.models.score_calibration import ScoreCalibration from mavedb.models.score_set import ScoreSet from mavedb.models.user import User +from mavedb.models.variant import Variant from mavedb.view_models.score_calibration import ScoreCalibrationCreate, ScoreCalibrationModify from tests.helpers.constants import ( EXTRA_USER, TEST_BIORXIV_IDENTIFIER, - TEST_BRNICH_SCORE_CALIBRATION, + TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED, + TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, TEST_CROSSREF_IDENTIFIER, TEST_LICENSE, TEST_PATHOGENICITY_SCORE_CALIBRATION, @@ -37,7 +42,7 @@ VALID_SCORE_SET_URN, ) from tests.helpers.util.contributor import add_contributor -from tests.helpers.util.score_calibration import create_test_score_calibration_in_score_set +from tests.helpers.util.score_calibration import create_test_range_based_score_calibration_in_score_set ################################################################################ # Tests for create_functional_classification @@ -49,26 +54,26 @@ def test_create_functional_classification_without_acmg_classification(setup_lib_ calibration = ScoreCalibration() # Create mock functional range without ACMG classification - MockFunctionalRangeCreate = create_model( - "MockFunctionalRangeCreate", + MockFunctionalClassificationCreate = create_model( + "MockFunctionalClassificationCreate", label=(str, "Test Label"), description=(str, "Test Description"), range=(list, [0.0, 1.0]), inclusive_lower_bound=(bool, True), inclusive_upper_bound=(bool, False), - classification=(str, "pathogenic"), + functional_classification=(str, "pathogenic"), oddspaths_ratio=(float, 1.5), positive_likelihood_ratio=(float, 2.0), acmg_classification=(type(None), None), ) - result = create_functional_classification(session, MockFunctionalRangeCreate(), calibration) + result = create_functional_classification(session, MockFunctionalClassificationCreate(), calibration) assert result.description == "Test Description" assert result.range == [0.0, 1.0] assert result.inclusive_lower_bound is True assert result.inclusive_upper_bound is False - assert result.classification == "pathogenic" + assert result.functional_classification == "pathogenic" assert result.oddspaths_ratio == 1.5 assert result.positive_likelihood_ratio == 2.0 assert result.acmg_classification is None @@ -92,20 +97,20 @@ def test_create_functional_classification_with_acmg_classification(setup_lib_db, ) # Create mock functional range with ACMG classification - MockFunctionalRangeCreate = create_model( - "MockFunctionalRangeCreate", + MockFunctionalClassificationCreate = create_model( + "MockFunctionalClassificationCreate", label=(str, "Test Label"), description=(str, "Test Description"), range=(list, [0.0, 1.0]), inclusive_lower_bound=(bool, True), inclusive_upper_bound=(bool, False), - classification=(str, "pathogenic"), + functional_classification=(str, "pathogenic"), oddspaths_ratio=(float, 1.5), positive_likelihood_ratio=(float, 2.0), acmg_classification=(MockAcmgClassification, MockAcmgClassification()), ) - functional_range_create = MockFunctionalRangeCreate() + functional_range_create = MockFunctionalClassificationCreate() with mock.patch("mavedb.lib.score_calibrations.find_or_create_acmg_classification") as mock_find_or_create: # Mock the ACMG classification with an ID @@ -132,7 +137,7 @@ def test_create_functional_classification_with_acmg_classification(setup_lib_db, assert result.range == [0.0, 1.0] assert result.inclusive_lower_bound is True assert result.inclusive_upper_bound is False - assert result.classification == "pathogenic" + assert result.functional_classification == "pathogenic" assert result.oddspaths_ratio == 1.5 assert result.positive_likelihood_ratio == 2.0 assert result.acmg_classification == mocked_persisted_acmg_classification @@ -153,20 +158,20 @@ def test_create_functional_classification_propagates_acmg_errors(setup_lib_db, s ) # Create mock functional range with ACMG classification - MockFunctionalRangeCreate = create_model( - "MockFunctionalRangeCreate", + MockFunctionalClassificationCreate = create_model( + "MockFunctionalClassificationCreate", label=(str, "Test Label"), description=(str, "Test Description"), range=(list, [0.0, 1.0]), inclusive_lower_bound=(bool, True), inclusive_upper_bound=(bool, False), - classification=(str, "pathogenic"), + functional_classification=(str, "pathogenic"), oddspaths_ratio=(float, 1.5), positive_likelihood_ratio=(float, 2.0), acmg_classification=(MockAcmgClassification, MockAcmgClassification()), ) - functional_range_create = MockFunctionalRangeCreate() + functional_range_create = MockFunctionalClassificationCreate() with ( pytest.raises(ValueError, match="ACMG error"), @@ -183,21 +188,21 @@ def test_create_functional_classification_does_not_commit_transaction(setup_lib_ calibration = ScoreCalibration() # Create mock functional range without ACMG classification - MockFunctionalRangeCreate = create_model( - "MockFunctionalRangeCreate", + MockFunctionalClassificationCreate = create_model( + "MockFunctionalClassificationCreate", label=(str, "Test Label"), description=(str, "Test Description"), range=(list, [0.0, 1.0]), inclusive_lower_bound=(bool, True), inclusive_upper_bound=(bool, False), - classification=(str, "pathogenic"), + functional_classification=(str, "pathogenic"), oddspaths_ratio=(float, 1.5), positive_likelihood_ratio=(float, 2.0), acmg_classification=(type(None), None), ) with mock.patch.object(session, "commit") as mock_commit: - create_functional_classification(session, MockFunctionalRangeCreate(), calibration) + create_functional_classification(session, MockFunctionalClassificationCreate(), calibration) mock_commit.assert_not_called() @@ -245,7 +250,7 @@ async def test_create_score_calibration_in_score_set_creates_score_calibration_w threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), - functional_ranges=(list, []), + functional_classifications=(list, []), ) calibration = await create_score_calibration_in_score_set(session, MockCalibrationCreate(), test_user) @@ -265,7 +270,7 @@ async def test_create_score_calibration_in_score_set_investigator_provided_set_w threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), - functional_ranges=(list, []), + functional_classifications=(list, []), ) calibration = await create_score_calibration_in_score_set(session, MockCalibrationCreate(), test_user) @@ -297,7 +302,7 @@ async def test_create_score_calibration_in_score_set_investigator_provided_set_w threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), - functional_ranges=(list, []), + functional_classifications=(list, []), ) calibration = await create_score_calibration_in_score_set(session, MockCalibrationCreate(), extra_user) @@ -318,7 +323,7 @@ async def test_create_score_calibration_in_score_set_investigator_provided_not_s threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), - functional_ranges=(list, []), + functional_classifications=(list, []), ) # invoke from a different user context @@ -357,7 +362,7 @@ async def test_create_score_calibration_creates_score_calibration_when_score_set threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), - functional_ranges=(list, []), + functional_classifications=(list, []), ) calibration = await create_score_calibration(session, MockCalibrationCreate(), test_user) @@ -392,7 +397,7 @@ async def test_create_score_calibration_propagates_errors_from_publication_find_ ), classification_sources=(list, []), method_sources=(list, []), - functional_ranges=(list, []), + functional_classifications=(list, []), ) with ( pytest.raises( @@ -445,7 +450,7 @@ async def test_create_score_calibration_publication_identifier_associations_crea threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), - functional_ranges=(list, []), + functional_classifications=(list, []), ) test_user = session.execute(select(User)).scalars().first() @@ -481,7 +486,7 @@ async def test_create_score_calibration_user_is_set_as_creator_and_modifier( threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), - functional_ranges=(list, []), + functional_classifications=(list, []), ) test_user = session.execute(select(User)).scalars().first() @@ -509,20 +514,32 @@ async def test_create_score_calibration_user_is_set_as_creator_and_modifier( ], indirect=["mock_publication_fetch"], ) +@pytest.mark.parametrize( + "valid_score_calibration_data", + [ + TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, + TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED, + ], +) async def test_create_score_calibration_fully_valid_calibration( - setup_lib_db_with_score_set, session, create_function_to_call, score_set_urn, mock_publication_fetch + setup_lib_db_with_score_set, + session, + create_function_to_call, + score_set_urn, + mock_publication_fetch, + valid_score_calibration_data, ): - calibration_create = ScoreCalibrationCreate(**TEST_BRNICH_SCORE_CALIBRATION, score_set_urn=score_set_urn) + calibration_create = ScoreCalibrationCreate(**valid_score_calibration_data, score_set_urn=score_set_urn) test_user = session.execute(select(User)).scalars().first() calibration = await create_function_to_call(session, calibration_create, test_user) - for field in TEST_BRNICH_SCORE_CALIBRATION: + for field in valid_score_calibration_data: # Sources are tested elsewhere # XXX: Ranges are a pain to compare between JSONB and dict input, so are assumed correct - if "sources" not in field and "functional_ranges" not in field: - assert getattr(calibration, field) == TEST_BRNICH_SCORE_CALIBRATION[field] + if "sources" not in field and "functional_classifications" not in field: + assert getattr(calibration, field) == valid_score_calibration_data[field] ################################################################################ @@ -570,7 +587,7 @@ async def test_modify_score_calibration_modifies_score_calibration_when_score_se ): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) @@ -581,7 +598,7 @@ async def test_modify_score_calibration_modifies_score_calibration_when_score_se threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), - functional_ranges=(list, []), + functional_classifications=(list, []), ) modified_calibration = await modify_score_calibration( @@ -608,7 +625,7 @@ async def test_modify_score_calibration_clears_existing_publication_identifier_a ): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) @@ -618,7 +635,7 @@ async def test_modify_score_calibration_clears_existing_publication_identifier_a threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), - functional_ranges=(list, []), + functional_classifications=(list, []), ) mocked_calibration = MockCalibrationModify() @@ -655,7 +672,7 @@ async def test_modify_score_calibration_publication_identifier_associations_crea ): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) @@ -665,7 +682,7 @@ async def test_modify_score_calibration_publication_identifier_associations_crea threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), - functional_ranges=(list, []), + functional_classifications=(list, []), ) mocked_calibration = MockCalibrationModify() @@ -698,7 +715,7 @@ async def test_modify_score_calibration_retains_existing_publication_relationshi ): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) calibration_publication_relations = existing_calibration.publication_identifier_associations.copy() @@ -714,7 +731,7 @@ async def test_modify_score_calibration_retains_existing_publication_relationshi db_name=(str, pub_dict["db_name"]), identifier=(str, pub_dict["identifier"]), )() - for pub_dict in TEST_BRNICH_SCORE_CALIBRATION["threshold_sources"] + for pub_dict in TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED["threshold_sources"] ], ), classification_sources=( @@ -725,7 +742,7 @@ async def test_modify_score_calibration_retains_existing_publication_relationshi db_name=(str, pub_dict["db_name"]), identifier=(str, pub_dict["identifier"]), )() - for pub_dict in TEST_BRNICH_SCORE_CALIBRATION["classification_sources"] + for pub_dict in TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED["classification_sources"] ], ), method_sources=( @@ -736,10 +753,10 @@ async def test_modify_score_calibration_retains_existing_publication_relationshi db_name=(str, pub_dict["db_name"]), identifier=(str, pub_dict["identifier"]), )() - for pub_dict in TEST_BRNICH_SCORE_CALIBRATION["method_sources"] + for pub_dict in TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED["method_sources"] ], ), - functional_ranges=(list, []), + functional_classifications=(list, []), ) modified_calibration = await modify_score_calibration( @@ -766,7 +783,7 @@ async def test_modify_score_calibration_adds_new_publication_association( ): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) @@ -785,7 +802,7 @@ async def test_modify_score_calibration_adds_new_publication_association( ), classification_sources=(list, []), method_sources=(list, []), - functional_ranges=(list, []), + functional_classifications=(list, []), ) modified_calibration = await modify_score_calibration( @@ -816,7 +833,7 @@ async def test_modify_score_calibration_user_is_set_as_modifier( ): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) @@ -826,7 +843,7 @@ async def test_modify_score_calibration_user_is_set_as_modifier( threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), - functional_ranges=(list, []), + functional_classifications=(list, []), ) modify_user = session.execute(select(User).where(User.id != test_user.id)).scalars().first() @@ -866,7 +883,7 @@ async def test_modify_score_calibration_new_score_set(setup_lib_db_with_score_se session.refresh(new_containing_score_set) test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, new_containing_score_set.urn, test_user ) @@ -876,7 +893,7 @@ async def test_modify_score_calibration_new_score_set(setup_lib_db_with_score_se threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), - functional_ranges=(list, []), + functional_classifications=(list, []), ) modified_calibration = await modify_score_calibration( @@ -897,12 +914,12 @@ async def test_modify_score_calibration_new_score_set(setup_lib_db_with_score_se ], indirect=["mock_publication_fetch"], ) -async def test_modify_score_calibration_clears_functional_ranges( +async def test_modify_score_calibration_clears_functional_classifications( setup_lib_db_with_score_set, session, mock_publication_fetch ): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) @@ -912,14 +929,14 @@ async def test_modify_score_calibration_clears_functional_ranges( threshold_sources=(list, []), classification_sources=(list, []), method_sources=(list, []), - functional_ranges=(list, []), + functional_classifications=(list, []), ) modified_calibration = await modify_score_calibration( session, existing_calibration, MockCalibrationModify(), test_user ) assert modified_calibration is not None - assert len(modified_calibration.functional_ranges) == 0 + assert len(modified_calibration.functional_classifications) == 0 @pytest.mark.asyncio @@ -938,7 +955,7 @@ async def test_modify_score_calibration_fully_valid_calibration( ): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) @@ -950,7 +967,7 @@ async def test_modify_score_calibration_fully_valid_calibration( for field in TEST_PATHOGENICITY_SCORE_CALIBRATION: # Sources are tested elsewhere # XXX: Ranges are a pain to compare between JSONB and dict input, so are assumed correct - if "sources" not in field and "functional_ranges" not in field: + if "sources" not in field and "functional_classifications" not in field: assert getattr(modified_calibration, field) == TEST_PATHOGENICITY_SCORE_CALIBRATION[field] @@ -975,7 +992,7 @@ async def test_cannot_publish_already_published_calibration( ): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) existing_calibration.private = False @@ -1003,7 +1020,7 @@ async def test_publish_score_calibration_marks_calibration_public( ): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) assert existing_calibration.private is True @@ -1028,7 +1045,7 @@ async def test_publish_score_calibration_user_is_set_as_modifier( ): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) @@ -1058,7 +1075,7 @@ async def test_publish_score_calibration_user_is_set_as_modifier( async def test_cannot_promote_already_primary_calibration(setup_lib_db_with_score_set, session, mock_publication_fetch): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) existing_calibration.primary = True @@ -1086,7 +1103,7 @@ async def test_cannot_promote_calibration_when_calibration_is_research_use_only( ): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) existing_calibration.research_use_only = True @@ -1114,7 +1131,7 @@ async def test_cannot_promote_calibration_when_calibration_is_private( ): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) existing_calibration.private = True @@ -1142,10 +1159,10 @@ async def test_cannot_promote_calibration_when_another_primary_exists( ): test_user = session.execute(select(User)).scalars().first() - existing_primary_calibration = await create_test_score_calibration_in_score_set( + existing_primary_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) existing_primary_calibration.private = False @@ -1179,7 +1196,7 @@ async def test_promote_score_calibration_to_primary_marks_calibration_primary( ): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) existing_calibration.private = False @@ -1208,10 +1225,10 @@ async def test_promote_score_calibration_to_primary_demotes_existing_primary_whe ): test_user = session.execute(select(User)).scalars().first() - existing_primary_calibration = await create_test_score_calibration_in_score_set( + existing_primary_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) existing_primary_calibration.private = False @@ -1251,7 +1268,7 @@ async def test_promote_score_calibration_to_primary_user_is_set_as_modifier( ): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) existing_calibration.private = False @@ -1285,10 +1302,10 @@ async def test_promote_score_calibration_to_primary_demoted_existing_primary_use ): test_user = session.execute(select(User)).scalars().first() - existing_primary_calibration = await create_test_score_calibration_in_score_set( + existing_primary_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) existing_primary_calibration.private = False @@ -1334,7 +1351,7 @@ async def test_promote_score_calibration_to_primary_demoted_existing_primary_use async def test_cannot_demote_non_primary_calibration(setup_lib_db_with_score_set, session, mock_publication_fetch): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) existing_calibration.primary = False @@ -1362,7 +1379,7 @@ async def test_demote_score_calibration_from_primary_marks_calibration_non_prima ): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) existing_calibration.primary = True @@ -1391,7 +1408,7 @@ async def test_demote_score_calibration_from_primary_user_is_set_as_modifier( ): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) existing_calibration.primary = True @@ -1425,7 +1442,7 @@ async def test_demote_score_calibration_from_primary_user_is_set_as_modifier( async def test_cannot_delete_primary_calibration(setup_lib_db_with_score_set, session, mock_publication_fetch): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) existing_calibration.primary = True @@ -1453,7 +1470,7 @@ async def test_delete_score_calibration_deletes_calibration( ): test_user = session.execute(select(User)).scalars().first() - existing_calibration = await create_test_score_calibration_in_score_set( + existing_calibration = await create_test_range_based_score_calibration_in_score_set( session, setup_lib_db_with_score_set.urn, test_user ) calibration_id = existing_calibration.id @@ -1463,3 +1480,415 @@ async def test_delete_score_calibration_deletes_calibration( with pytest.raises(NoResultFound, match="No row was found when one was required"): session.execute(select(ScoreCalibration).where(ScoreCalibration.id == calibration_id)).scalars().one() + + +################################################################################ +# Tests for variants_for_functional_classification +################################################################################ + + +def test_variants_for_functional_classification_returns_empty_list_when_range_is_none(setup_lib_db, session): + mock_calibration = mock.Mock(spec=ScoreCalibration) + mock_functional_calibration = mock.Mock(spec=ScoreCalibrationFunctionalClassification) + mock_functional_calibration.range = None + mock_functional_calibration.calibration = mock_calibration + + result = variants_for_functional_classification(session, mock_functional_calibration) + + assert result == [] + + +def test_variants_for_functional_classification_returns_empty_list_when_range_is_empty_list(setup_lib_db, session): + mock_calibration = mock.Mock(spec=ScoreCalibration) + mock_functional_calibration = mock.Mock(spec=ScoreCalibrationFunctionalClassification) + mock_functional_calibration.range = [] + mock_functional_calibration.calibration = mock_calibration + + result = variants_for_functional_classification(session, mock_functional_calibration) + + assert result == [] + + +def test_variants_for_functional_classification_python_filtering_with_valid_variants( + setup_lib_db_with_score_set, session +): + variant_1 = Variant( + data={"score_data": {"score": 0.5}}, + score_set_id=setup_lib_db_with_score_set.id, + urn="urn:mavedb:variant-1", + ) + variant_2 = Variant( + data={"score_data": {"score": 1.5}}, + score_set_id=setup_lib_db_with_score_set.id, + urn="urn:mavedb:variant-2", + ) + variant_3 = Variant( + data={"score_data": {"score": 2.5}}, + score_set_id=setup_lib_db_with_score_set.id, + urn="urn:mavedb:variant-3", + ) + + session.add_all([variant_1, variant_2, variant_3]) + session.commit() + + mock_calibration = mock.Mock(spec=ScoreCalibration) + mock_calibration.score_set_id = setup_lib_db_with_score_set.id + mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) + mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.calibration = mock_calibration + mock_functional_classification.score_is_contained_in_range = mock.Mock(side_effect=lambda x: 1.0 <= x <= 2.0) + + result = variants_for_functional_classification(session, mock_functional_classification, use_sql=False) + + assert len(result) == 1 + assert result[0].data["score_data"]["score"] == 1.5 + + +def test_variants_for_functional_classification_python_filtering_skips_variants_without_score_data( + setup_lib_db_with_score_set, session, mock_functional_calibration +): + # Create variant without score_data + variant_without_score_data = Variant( + data={"other_data": {"value": 1.0}}, + score_set_id=setup_lib_db_with_score_set.id, + urn="urn:mavedb:variant-1", + ) + + # Create variant with valid score + variant_with_score = Variant( + data={"score_data": {"score": 1.5}}, + score_set_id=setup_lib_db_with_score_set.id, + urn="urn:mavedb:variant-2", + ) + + session.add_all([variant_without_score_data, variant_with_score]) + session.commit() + + mock_calibration = mock.Mock(spec=ScoreCalibration) + mock_calibration.score_set_id = setup_lib_db_with_score_set.id + mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) + mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.calibration = mock_calibration + mock_functional_classification.score_is_contained_in_range = mock.Mock(side_effect=lambda x: 1.0 <= x <= 2.0) + + result = variants_for_functional_classification(session, mock_functional_classification, use_sql=False) + + assert len(result) == 1 + assert result[0].data["score_data"]["score"] == 1.5 + + +def test_variants_for_functional_classification_python_filtering_skips_variants_with_non_dict_score_data( + setup_lib_db_with_score_set, session +): + # Create variant with non-dict score_data + variant_invalid_score_data = Variant( + data={"score_data": "not_a_dict"}, + score_set_id=setup_lib_db_with_score_set.id, + urn="urn:mavedb:variant-1", + ) + + # Create variant with valid score + variant_with_score = Variant( + data={"score_data": {"score": 1.5}}, + score_set_id=setup_lib_db_with_score_set.id, + urn="urn:mavedb:variant-2", + ) + + session.add_all([variant_invalid_score_data, variant_with_score]) + session.commit() + + mock_calibration = mock.Mock(spec=ScoreCalibration) + mock_calibration.score_set_id = setup_lib_db_with_score_set.id + mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) + mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.calibration = mock_calibration + mock_functional_classification.score_is_contained_in_range = mock.Mock(side_effect=lambda x: 1.0 <= x <= 2.0) + + result = variants_for_functional_classification(session, mock_functional_classification, use_sql=False) + + assert len(result) == 1 + assert result[0].data["score_data"]["score"] == 1.5 + + +def test_variants_for_functional_classification_python_filtering_skips_variants_with_none_score( + setup_lib_db_with_score_set, session +): + # Create variant with None score + variant_none_score = Variant( + data={"score_data": {"score": None}}, + score_set_id=setup_lib_db_with_score_set.id, + urn="urn:mavedb:variant-1", + ) + + # Create variant with valid score + variant_with_score = Variant( + data={"score_data": {"score": 1.5}}, + score_set_id=setup_lib_db_with_score_set.id, + urn="urn:mavedb:variant-2", + ) + + session.add_all([variant_none_score, variant_with_score]) + session.commit() + + mock_calibration = mock.Mock(spec=ScoreCalibration) + mock_calibration.score_set_id = setup_lib_db_with_score_set.id + mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) + mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.calibration = mock_calibration + mock_functional_classification.score_is_contained_in_range = mock.Mock(side_effect=lambda x: 1.0 <= x <= 2.0) + + result = variants_for_functional_classification(session, mock_functional_classification, use_sql=False) + + assert len(result) == 1 + assert result[0].data["score_data"]["score"] == 1.5 + + +def test_variants_for_functional_classification_python_filtering_skips_variants_with_non_numeric_score( + setup_lib_db_with_score_set, session +): + # Create variant with non-numeric score + variant_string_score = Variant( + data={"score_data": {"score": "not_a_number"}}, + score_set_id=setup_lib_db_with_score_set.id, + urn="urn:mavedb:variant-1", + ) + + # Create variant with valid score + variant_with_score = Variant( + data={"score_data": {"score": 1.5}}, + score_set_id=setup_lib_db_with_score_set.id, + urn="urn:mavedb:variant-2", + ) + + session.add_all([variant_string_score, variant_with_score]) + session.commit() + + mock_calibration = mock.Mock(spec=ScoreCalibration) + mock_calibration.score_set_id = setup_lib_db_with_score_set.id + mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) + mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.calibration = mock_calibration + mock_functional_classification.score_is_contained_in_range = mock.Mock(side_effect=lambda x: 1.0 <= x <= 2.0) + + result = variants_for_functional_classification(session, mock_functional_classification, use_sql=False) + + assert len(result) == 1 + assert result[0].data["score_data"]["score"] == 1.5 + + +def test_variants_for_functional_classification_python_filtering_skips_variants_with_non_dict_data( + setup_lib_db_with_score_set, session +): + # Create variant with non-dict data + variant_invalid_data = Variant( + data="not_a_dict", score_set_id=setup_lib_db_with_score_set.id, urn="urn:mavedb:variant-1" + ) + + # Create variant with valid score + variant_with_score = Variant( + data={"score_data": {"score": 1.5}}, + score_set_id=setup_lib_db_with_score_set.id, + urn="urn:mavedb:variant-2", + ) + + session.add_all([variant_invalid_data, variant_with_score]) + session.commit() + + mock_calibration = mock.Mock(spec=ScoreCalibration) + mock_calibration.score_set_id = setup_lib_db_with_score_set.id + mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) + mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.calibration = mock_calibration + mock_functional_classification.score_is_contained_in_range = mock.Mock(side_effect=lambda x: 1.0 <= x <= 2.0) + + result = variants_for_functional_classification(session, mock_functional_classification, use_sql=False) + + assert len(result) == 1 + assert result[0].data["score_data"]["score"] == 1.5 + + +@pytest.mark.parametrize( + "use_sql", + [True, False], +) +def test_variants_for_functional_classification_filters_by_score_range(setup_lib_db_with_score_set, session, use_sql): + # Create variants with different scores + variants = [] + scores = [0.5, 1.0, 1.5, 2.0, 2.5] + for i, score in enumerate(scores): + variant = Variant( + data={"score_data": {"score": score}}, + score_set_id=setup_lib_db_with_score_set.id, + urn=f"urn:mavedb:variant-{i}", + ) + variants.append(variant) + + session.add_all(variants) + session.commit() + + mock_calibration = mock.Mock(spec=ScoreCalibration) + mock_calibration.score_set_id = setup_lib_db_with_score_set.id + mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) + mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.inclusive_lower_bound = True + mock_functional_classification.inclusive_upper_bound = True + mock_functional_classification.calibration = mock_calibration + mock_functional_classification.score_is_contained_in_range = mock.Mock(side_effect=lambda x: 1.0 <= x <= 2.0) + + with mock.patch("mavedb.lib.score_calibrations.inf_or_float", side_effect=lambda x, lower: float(x)): + result = variants_for_functional_classification(session, mock_functional_classification, use_sql=use_sql) + + # Should return variants with scores 1.0, 1.5, 2.0 + result_scores = [v.data["score_data"]["score"] for v in result] + expected_scores = [1.0, 1.5, 2.0] + assert sorted(result_scores) == sorted(expected_scores) + + +def test_variants_for_functional_classification_sql_fallback_on_exception(setup_lib_db_with_score_set, session): + # Create a variant + variant = Variant( + data={"score_data": {"score": 1.5}}, + score_set_id=setup_lib_db_with_score_set.id, + urn="urn:mavedb:variant-1", + ) + session.add(variant) + session.commit() + + mock_calibration = mock.Mock(spec=ScoreCalibration) + mock_calibration.score_set_id = setup_lib_db_with_score_set.id + mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) + mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.calibration = mock_calibration + mock_functional_classification.score_is_contained_in_range = mock.Mock(side_effect=lambda x: 1.0 <= x <= 2.0) + + # Mock db.execute to raise an exception during SQL execution + with mock.patch.object( + session, + "execute", + side_effect=[ + Exception("SQL error"), + session.execute(select(Variant).where(Variant.score_set_id == setup_lib_db_with_score_set.id)), + ], + ) as mocked_execute: + result = variants_for_functional_classification(session, mock_functional_classification, use_sql=True) + mocked_execute.assert_called() + + # Should fall back to Python filtering and return the matching variant + assert len(result) == 1 + assert result[0].data["score_data"]["score"] == 1.5 + + +def test_variants_for_functional_classification_sql_with_infinite_bound(setup_lib_db_with_score_set, session): + # Create variants with different scores + variants = [] + scores = [0.5, 1.5, 2.5] + for i, score in enumerate(scores): + variant = Variant( + data={"score_data": {"score": score}}, + score_set_id=setup_lib_db_with_score_set.id, + urn=f"urn:mavedb:variant-{i}", + ) + variants.append(variant) + + session.add_all(variants) + session.commit() + + # Mock functional classification with infinite upper bound + mock_calibration = mock.Mock(spec=ScoreCalibration) + mock_calibration.score_set_id = setup_lib_db_with_score_set.id + mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) + mock_functional_classification.range = [1.0, float("inf")] + mock_functional_classification.calibration = mock_calibration + mock_functional_classification.inclusive_lower_bound = True + mock_functional_classification.inclusive_upper_bound = False + + with mock.patch( + "mavedb.lib.score_calibrations.inf_or_float", + side_effect=lambda x, lower: float("inf") if x == float("inf") else float(x), + ): + with mock.patch("math.isinf", side_effect=lambda x: x == float("inf")): + result = variants_for_functional_classification(session, mock_functional_classification, use_sql=True) + + # Should return variants with scores >= 1.0 + result_scores = [v.data["score_data"]["score"] for v in result] + expected_scores = [1.5, 2.5] + assert sorted(result_scores) == sorted(expected_scores) + + +def test_variants_for_functional_classification_sql_with_exclusive_bounds(setup_lib_db_with_score_set, session): + # Create variants with boundary scores + variants = [] + scores = [1.0, 1.5, 2.0] + for i, score in enumerate(scores): + variant = Variant( + data={"score_data": {"score": score}}, + score_set_id=setup_lib_db_with_score_set.id, + urn=f"urn:mavedb:variant-{i}", + ) + variants.append(variant) + + session.add_all(variants) + session.commit() + + # Mock functional classification with exclusive bounds + mock_calibration = mock.Mock(spec=ScoreCalibration) + mock_calibration.score_set_id = setup_lib_db_with_score_set.id + mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) + mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.calibration = mock_calibration + mock_functional_classification.inclusive_lower_bound = False + mock_functional_classification.inclusive_upper_bound = False + + with mock.patch("mavedb.lib.score_calibrations.inf_or_float", side_effect=lambda x, lower: float(x)): + result = variants_for_functional_classification(session, mock_functional_classification, use_sql=True) + + # Should return only variant with score 1.5 (exclusive bounds) + result_scores = [v.data["score_data"]["score"] for v in result] + assert result_scores == [1.5] + + +def test_variants_for_functional_classification_only_returns_variants_from_correct_score_set( + setup_lib_db_with_score_set, session +): + # Create another score set + other_score_set = ScoreSet( + urn="urn:mavedb:00000000-B-0", + experiment_id=setup_lib_db_with_score_set.experiment_id, + licence_id=TEST_LICENSE["id"], + title="Other Score Set", + method_text="Other method", + abstract_text="Other abstract", + short_description="Other description", + created_by=setup_lib_db_with_score_set.created_by, + modified_by=setup_lib_db_with_score_set.modified_by, + extra_metadata={}, + ) + session.add(other_score_set) + session.commit() + + # Create variants in both score sets + variant_in_target_set = Variant( + data={"score_data": {"score": 1.5}}, + score_set_id=setup_lib_db_with_score_set.id, + urn="urn:mavedb:variant-target", + ) + variant_in_other_set = Variant( + data={"score_data": {"score": 1.5}}, score_set_id=other_score_set.id, urn="urn:mavedb:variant-other" + ) + + session.add_all([variant_in_target_set, variant_in_other_set]) + session.commit() + + mock_calibration = mock.Mock(spec=ScoreCalibration) + mock_calibration.score_set_id = setup_lib_db_with_score_set.id + mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) + mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.calibration = mock_calibration + mock_functional_classification.score_is_contained_in_range = mock.Mock(side_effect=lambda x: 1.0 <= x <= 2.0) + + result = variants_for_functional_classification(session, mock_functional_classification, use_sql=False) + + # Should only return variant from the target score set + assert len(result) == 1 + assert result[0].score_set_id == setup_lib_db_with_score_set.id + assert result[0].urn == "urn:mavedb:variant-target" diff --git a/tests/routers/test_mapped_variants.py b/tests/routers/test_mapped_variants.py index 81bd62e1..b071dcfd 100644 --- a/tests/routers/test_mapped_variants.py +++ b/tests/routers/test_mapped_variants.py @@ -21,7 +21,11 @@ from mavedb.models.score_set import ScoreSet as ScoreSetDbModel from mavedb.models.variant import Variant from mavedb.view_models.mapped_variant import SavedMappedVariant -from tests.helpers.constants import TEST_BIORXIV_IDENTIFIER, TEST_BRNICH_SCORE_CALIBRATION, TEST_PUBMED_IDENTIFIER +from tests.helpers.constants import ( + TEST_BIORXIV_IDENTIFIER, + TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, + TEST_PUBMED_IDENTIFIER, +) from tests.helpers.util.common import deepcamelize from tests.helpers.util.experiment import create_experiment from tests.helpers.util.score_calibration import create_publish_and_promote_score_calibration @@ -209,7 +213,9 @@ def test_show_mapped_variant_functional_impact_statement( experiment["urn"], data_files / "scores.csv", ) - create_publish_and_promote_score_calibration(client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION)) + create_publish_and_promote_score_calibration( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) response = client.get(f"/api/v1/mapped-variants/{quote_plus(score_set['urn'] + '#1')}/va/functional-impact") response_data = response.json() @@ -288,7 +294,9 @@ def test_cannot_show_mapped_variant_functional_impact_statement_when_no_mapping_ experiment["urn"], data_files / "scores.csv", ) - create_publish_and_promote_score_calibration(client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION)) + create_publish_and_promote_score_calibration( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) item = session.scalar(select(MappedVariant).join(Variant).where(Variant.urn == f'{score_set["urn"]}#1')) assert item is not None @@ -352,7 +360,9 @@ def test_show_mapped_variant_clinical_evidence_line( experiment["urn"], data_files / "scores.csv", ) - create_publish_and_promote_score_calibration(client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION)) + create_publish_and_promote_score_calibration( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) response = client.get(f"/api/v1/mapped-variants/{quote_plus(score_set['urn'] + '#2')}/va/clinical-evidence") response_data = response.json() @@ -431,7 +441,9 @@ def test_cannot_show_mapped_variant_clinical_evidence_line_when_no_mapping_data_ experiment["urn"], data_files / "scores.csv", ) - create_publish_and_promote_score_calibration(client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION)) + create_publish_and_promote_score_calibration( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) item = session.scalar(select(MappedVariant).join(Variant).where(Variant.urn == f'{score_set["urn"]}#1')) assert item is not None diff --git a/tests/routers/test_score_calibrations.py b/tests/routers/test_score_calibrations.py index 5235decb..9949b639 100644 --- a/tests/routers/test_score_calibrations.py +++ b/tests/routers/test_score_calibrations.py @@ -16,7 +16,7 @@ from tests.helpers.constants import ( EXTRA_USER, TEST_BIORXIV_IDENTIFIER, - TEST_BRNICH_SCORE_CALIBRATION, + TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, TEST_PATHOGENICITY_SCORE_CALIBRATION, TEST_PUBMED_IDENTIFIER, VALID_CALIBRATION_URN, @@ -67,7 +67,7 @@ def test_anonymous_user_cannot_get_score_calibration_when_private( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(anonymous_app_overrides): @@ -100,7 +100,7 @@ def test_other_user_cannot_get_score_calibration_when_private( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(extra_user_app_overrides): @@ -133,7 +133,7 @@ def test_creating_user_can_get_score_calibration_when_private( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) response = client.get(f"/api/v1/score-calibrations/{calibration['urn']}") @@ -166,7 +166,7 @@ def test_contributing_user_can_get_score_calibration_when_private_and_investigat data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) add_contributor( @@ -218,7 +218,7 @@ def test_contributing_user_cannot_get_score_calibration_when_private_and_not_inv with DependencyOverrider(admin_app_overrides): calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) add_contributor( @@ -260,7 +260,7 @@ def test_admin_user_can_get_score_calibration_when_private( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(admin_app_overrides): @@ -294,7 +294,7 @@ def test_anonymous_user_can_get_score_calibration_when_public( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) calibration = publish_test_score_calibration_via_client(client, calibration["urn"]) @@ -329,7 +329,7 @@ def test_other_user_can_get_score_calibration_when_public( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) calibration = publish_test_score_calibration_via_client(client, calibration["urn"]) @@ -364,7 +364,7 @@ def test_creating_user_can_get_score_calibration_when_public( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) calibration = publish_test_score_calibration_via_client(client, calibration["urn"]) @@ -398,7 +398,7 @@ def test_contributing_user_can_get_score_calibration_when_public( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) calibration = publish_test_score_calibration_via_client(client, calibration["urn"]) @@ -442,7 +442,7 @@ def test_admin_user_can_get_score_calibration_when_public( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) calibration = publish_test_score_calibration_via_client(client, calibration["urn"]) @@ -511,7 +511,7 @@ def test_anonymous_user_cannot_get_score_calibrations_for_score_set_when_private data_files / "scores.csv", ) create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(anonymous_app_overrides): @@ -544,7 +544,7 @@ def test_other_user_cannot_get_score_calibrations_for_score_set_when_private( data_files / "scores.csv", ) create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(extra_user_app_overrides): @@ -577,7 +577,7 @@ def test_anonymous_user_cannot_get_score_calibrations_for_score_set_when_publish data_files / "scores.csv", ) create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with patch.object(ArqRedis, "enqueue_job", return_value=None): @@ -613,7 +613,7 @@ def test_other_user_cannot_get_score_calibrations_for_score_set_when_published_b data_files / "scores.csv", ) create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with patch.object(ArqRedis, "enqueue_job", return_value=None): @@ -649,7 +649,7 @@ def test_creating_user_can_get_score_calibrations_for_score_set_when_private( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) response = client.get(f"/api/v1/score-calibrations/score-set/{score_set['urn']}") @@ -692,11 +692,11 @@ def test_contributing_user_can_get_investigator_provided_score_calibrations_for_ with DependencyOverrider(admin_app_overrides): create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) investigator_calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) add_contributor( @@ -740,7 +740,7 @@ def test_admin_user_can_get_score_calibrations_for_score_set_when_private( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(admin_app_overrides): @@ -775,12 +775,12 @@ def test_anonymous_user_can_get_score_calibrations_for_score_set_when_public( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) # add another calibration that will remain private. The anonymous user should not see this one calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) publish_test_score_calibration_via_client(client, calibration["urn"]) @@ -820,12 +820,12 @@ def test_other_user_can_get_score_calibrations_for_score_set_when_public( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) # add another calibration that will remain private. The other user should not see this one create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) publish_test_score_calibration_via_client(client, calibration["urn"]) @@ -865,12 +865,12 @@ def test_anonymous_user_cannot_get_score_calibrations_for_score_set_when_calibra data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) # add another calibration that will remain private. The anonymous user should not see this one calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) publish_test_score_calibration_via_client(client, calibration["urn"]) @@ -905,12 +905,12 @@ def test_other_user_cannot_get_score_calibrations_for_score_set_when_calibration data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) # add another calibration that will remain private. The other user should not see this one create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) publish_test_score_calibration_via_client(client, calibration["urn"]) @@ -945,13 +945,13 @@ def test_creating_user_can_get_score_calibrations_for_score_set_when_public( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) publish_test_score_calibration_via_client(client, calibration["urn"]) # add another calibration that is private. The creating user should see this one too create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) response = client.get(f"/api/v1/score-calibrations/score-set/{score_set['urn']}") @@ -985,13 +985,13 @@ def test_contributing_user_can_get_score_calibrations_for_score_set_when_public( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) publish_test_score_calibration_via_client(client, calibration["urn"]) # add another calibration that is private. The contributing user should see this one too create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) add_contributor( @@ -1035,13 +1035,13 @@ def test_admin_user_can_get_score_calibrations_for_score_set_when_public( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) publish_test_score_calibration_via_client(client, calibration["urn"]) # add another calibration that is private. The admin user should see this one too create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(admin_app_overrides): @@ -1110,7 +1110,7 @@ def test_cannot_get_primary_score_calibration_for_score_set_when_none_exist( data_files / "scores.csv", ) create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) response = client.get(f"/api/v1/score-calibrations/score-set/{score_set['urn']}/primary") @@ -1145,7 +1145,7 @@ def test_get_primary_score_calibration_for_score_set_when_exists( data_files / "scores.csv", ) calibration = create_publish_and_promote_score_calibration( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) response = client.get(f"/api/v1/score-calibrations/score-set/{score_set['urn']}/primary") @@ -1180,9 +1180,11 @@ def test_get_primary_score_calibration_for_score_set_when_multiple_exist( data_files / "scores.csv", ) - create_publish_and_promote_score_calibration(client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION)) + create_publish_and_promote_score_calibration( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) calibration2 = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) publish_test_score_calibration_via_client(client, calibration2["urn"]) @@ -1208,7 +1210,7 @@ def test_get_primary_score_calibration_for_score_set_when_multiple_exist( def test_cannot_create_score_calibration_when_missing_score_set_urn(client, setup_router_db): response = client.post( "/api/v1/score-calibrations", - json={**deepcamelize(TEST_BRNICH_SCORE_CALIBRATION)}, + json={**deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED)}, ) assert response.status_code == 422 @@ -1221,7 +1223,7 @@ def test_cannot_create_score_calibration_when_score_set_does_not_exist(client, s "/api/v1/score-calibrations", json={ "scoreSetUrn": "urn:ngs:score-set:nonexistent", - **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION), + **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED), }, ) @@ -1257,7 +1259,7 @@ def test_cannot_create_score_calibration_when_score_set_not_owned_by_user( "/api/v1/score-calibrations", json={ "scoreSetUrn": score_set["urn"], - **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION), + **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED), }, ) @@ -1296,7 +1298,7 @@ def test_cannot_create_score_calibration_in_public_score_set_when_score_set_not_ "/api/v1/score-calibrations", json={ "scoreSetUrn": score_set["urn"], - **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION), + **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED), }, ) @@ -1332,7 +1334,7 @@ def test_cannot_create_score_calibration_as_anonymous_user( "/api/v1/score-calibrations", json={ "scoreSetUrn": score_set["urn"], - **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION), + **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED), }, ) @@ -1367,7 +1369,7 @@ def test_can_create_score_calibration_as_score_set_owner( "/api/v1/score-calibrations", json={ "scoreSetUrn": score_set["urn"], - **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION), + **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED), }, ) @@ -1413,7 +1415,7 @@ def test_can_create_score_calibration_as_score_set_contributor( "/api/v1/score-calibrations", json={ "scoreSetUrn": score_set["urn"], - **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION), + **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED), }, ) @@ -1450,7 +1452,7 @@ def test_can_create_score_calibration_as_admin_user( "/api/v1/score-calibrations", json={ "scoreSetUrn": score_set["urn"], - **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION), + **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED), }, ) @@ -1487,7 +1489,7 @@ def test_cannot_update_score_calibration_when_score_set_not_exists( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) response = client.put( @@ -1560,7 +1562,7 @@ def test_cannot_update_score_calibration_as_anonymous_user( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(anonymous_app_overrides): @@ -1599,7 +1601,7 @@ def test_cannot_update_score_calibration_when_score_set_not_owned_by_user( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(extra_user_app_overrides): @@ -1638,7 +1640,7 @@ def test_cannot_update_score_calibration_in_published_score_set_when_score_set_n data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with patch.object(ArqRedis, "enqueue_job", return_value=None): @@ -1680,7 +1682,7 @@ def test_can_update_score_calibration_as_score_set_owner( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) response = client.put( @@ -1720,7 +1722,7 @@ def test_cannot_update_published_score_calibration_as_score_set_owner( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) publish_test_score_calibration_via_client(client, calibration["urn"]) @@ -1760,7 +1762,7 @@ def test_can_update_investigator_provided_score_calibration_as_score_set_contrib data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) add_contributor( @@ -1819,7 +1821,7 @@ def test_cannot_update_non_investigator_score_calibration_as_score_set_contribut with DependencyOverrider(admin_app_overrides): calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) add_contributor( @@ -1867,7 +1869,7 @@ def test_can_update_score_calibration_as_admin_user( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(admin_app_overrides): @@ -1908,7 +1910,7 @@ def test_can_update_published_score_calibration_as_admin_user( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) publish_test_score_calibration_via_client(client, calibration["urn"]) @@ -1958,7 +1960,7 @@ def test_anonymous_user_may_not_move_calibration_to_another_score_set( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set1["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set1["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(anonymous_app_overrides): @@ -1966,7 +1968,7 @@ def test_anonymous_user_may_not_move_calibration_to_another_score_set( f"/api/v1/score-calibrations/{calibration['urn']}", json={ "scoreSetUrn": score_set2["urn"], - **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION), + **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED), }, ) @@ -2004,7 +2006,7 @@ def test_user_may_not_move_investigator_calibration_when_lacking_permissions_on_ data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set1["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set1["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) # Give user permissions on the first score set only @@ -2022,7 +2024,7 @@ def test_user_may_not_move_investigator_calibration_when_lacking_permissions_on_ f"/api/v1/score-calibrations/{calibration['urn']}", json={ "scoreSetUrn": score_set2["urn"], - **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION), + **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED), }, ) @@ -2060,7 +2062,7 @@ def test_user_may_move_investigator_calibration_when_has_permissions_on_destinat data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set1["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set1["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) # Give user permissions on both score sets @@ -2090,7 +2092,7 @@ def test_user_may_move_investigator_calibration_when_has_permissions_on_destinat f"/api/v1/score-calibrations/{calibration['urn']}", json={ "scoreSetUrn": score_set2["urn"], - **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION), + **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED), }, ) @@ -2129,7 +2131,7 @@ def test_admin_user_may_move_calibration_to_another_score_set( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set1["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set1["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(admin_app_overrides): @@ -2137,7 +2139,7 @@ def test_admin_user_may_move_calibration_to_another_score_set( f"/api/v1/score-calibrations/{calibration['urn']}", json={ "scoreSetUrn": score_set2["urn"], - **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION), + **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED), }, ) @@ -2182,7 +2184,7 @@ def test_cannot_delete_score_calibration_as_anonymous_user( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(anonymous_app_overrides): @@ -2215,7 +2217,7 @@ def test_cannot_delete_score_calibration_when_score_set_not_owned_by_user( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(extra_user_app_overrides): @@ -2248,7 +2250,7 @@ def test_can_delete_score_calibration_as_score_set_owner( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) response = client.delete(f"/api/v1/score-calibrations/{calibration['urn']}") @@ -2282,7 +2284,7 @@ def test_cannot_delete_published_score_calibration_as_owner( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) publish_test_score_calibration_via_client(client, calibration["urn"]) @@ -2315,7 +2317,7 @@ def test_cannot_delete_investigator_score_calibration_as_score_set_contributor( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) add_contributor( @@ -2366,7 +2368,7 @@ def test_cannot_delete_non_investigator_calibration_as_score_set_contributor( with DependencyOverrider(admin_app_overrides): calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) add_contributor( @@ -2406,7 +2408,7 @@ def test_can_delete_score_calibration_as_admin_user( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(admin_app_overrides): @@ -2441,7 +2443,7 @@ def test_can_delete_published_score_calibration_as_admin_user( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) publish_test_score_calibration_via_client(client, calibration["urn"]) @@ -2477,7 +2479,7 @@ def test_cannot_delete_primary_score_calibration( data_files / "scores.csv", ) calibration = create_publish_and_promote_score_calibration( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) response = client.delete(f"/api/v1/score-calibrations/{calibration['urn']}") @@ -2525,7 +2527,7 @@ def test_cannot_promote_score_calibration_as_anonymous_user( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) publish_test_score_calibration_via_client(client, calibration["urn"]) @@ -2559,7 +2561,7 @@ def test_cannot_promote_score_calibration_when_score_calibration_not_owned_by_us data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) publish_test_score_calibration_via_client(client, calibration["urn"]) @@ -2595,7 +2597,7 @@ def test_can_promote_score_calibration_as_score_set_owner( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) publish_test_score_calibration_via_client(client, calibration["urn"]) response = client.post(f"/api/v1/score-calibrations/{calibration['urn']}/promote-to-primary") @@ -2629,7 +2631,7 @@ def test_can_promote_score_calibration_as_score_set_contributor( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) publish_test_score_calibration_via_client(client, calibration["urn"]) @@ -2674,7 +2676,7 @@ def test_can_promote_score_calibration_as_admin_user( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) publish_test_score_calibration_via_client(client, calibration["urn"]) @@ -2710,7 +2712,7 @@ def test_can_promote_existing_primary_to_primary( data_files / "scores.csv", ) primary_calibration = create_publish_and_promote_score_calibration( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) response = client.post(f"/api/v1/score-calibrations/{primary_calibration['urn']}/promote-to-primary") @@ -2746,7 +2748,7 @@ def test_cannot_promote_research_use_only_to_primary( calibration = create_test_score_calibration_in_score_set_via_client( client, score_set["urn"], - deepcamelize({**TEST_BRNICH_SCORE_CALIBRATION, "researchUseOnly": True}), + deepcamelize({**TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, "researchUseOnly": True}), ) publish_test_score_calibration_via_client(client, calibration["urn"]) @@ -2781,7 +2783,7 @@ def test_cannot_promote_private_calibration_to_primary( calibration = create_test_score_calibration_in_score_set_via_client( client, score_set["urn"], - deepcamelize({**TEST_BRNICH_SCORE_CALIBRATION, "private": True}), + deepcamelize({**TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, "private": True}), ) response = client.post(f"/api/v1/score-calibrations/{calibration['urn']}/promote-to-primary") @@ -2812,7 +2814,9 @@ def test_cannot_promote_to_primary_if_primary_exists( experiment["urn"], data_files / "scores.csv", ) - create_publish_and_promote_score_calibration(client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION)) + create_publish_and_promote_score_calibration( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) secondary_calibration = create_test_score_calibration_in_score_set_via_client( client, score_set["urn"], deepcamelize(TEST_PATHOGENICITY_SCORE_CALIBRATION) ) @@ -2847,7 +2851,7 @@ def test_can_promote_to_primary_if_primary_exists_when_demote_existing_is_true( data_files / "scores.csv", ) primary_calibration = create_publish_and_promote_score_calibration( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) secondary_calibration = create_test_score_calibration_in_score_set_via_client( client, score_set["urn"], deepcamelize(TEST_PATHOGENICITY_SCORE_CALIBRATION) @@ -2894,7 +2898,7 @@ def test_cannot_promote_to_primary_with_demote_existing_flag_if_user_does_not_ha ) with DependencyOverrider(admin_app_overrides): primary_calibration = create_publish_and_promote_score_calibration( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) secondary_calibration = create_test_score_calibration_in_score_set_via_client( client, score_set["urn"], deepcamelize(TEST_PATHOGENICITY_SCORE_CALIBRATION) @@ -2954,7 +2958,7 @@ def test_cannot_demote_score_calibration_as_anonymous_user( data_files / "scores.csv", ) calibration = create_publish_and_promote_score_calibration( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(anonymous_app_overrides): @@ -2989,7 +2993,7 @@ def test_cannot_demote_score_calibration_when_score_calibration_not_owned_by_use data_files / "scores.csv", ) calibration = create_publish_and_promote_score_calibration( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(extra_user_app_overrides): @@ -3024,7 +3028,7 @@ def test_can_demote_score_calibration_as_score_set_contributor( data_files / "scores.csv", ) calibration = create_publish_and_promote_score_calibration( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) add_contributor( @@ -3070,7 +3074,7 @@ def test_can_demote_score_calibration_as_score_set_owner( data_files / "scores.csv", ) calibration = create_publish_and_promote_score_calibration( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) response = client.post( @@ -3106,7 +3110,7 @@ def test_can_demote_score_calibration_as_admin_user( data_files / "scores.csv", ) calibration = create_publish_and_promote_score_calibration( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(admin_app_overrides): @@ -3142,7 +3146,9 @@ def test_can_demote_non_primary_score_calibration( experiment["urn"], data_files / "scores.csv", ) - create_publish_and_promote_score_calibration(client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION)) + create_publish_and_promote_score_calibration( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) secondary_calibration = create_test_score_calibration_in_score_set_via_client( client, score_set["urn"], deepcamelize(TEST_PATHOGENICITY_SCORE_CALIBRATION) ) @@ -3207,7 +3213,7 @@ def test_cannot_publish_score_calibration_as_anonymous_user( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(anonymous_app_overrides): @@ -3242,7 +3248,7 @@ def test_cannot_publish_score_calibration_when_score_calibration_not_owned_by_us data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(extra_user_app_overrides): @@ -3277,7 +3283,7 @@ def test_can_publish_score_calibration_as_score_set_owner( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) response = client.post( @@ -3313,7 +3319,7 @@ def test_can_publish_score_calibration_as_admin_user( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with DependencyOverrider(admin_app_overrides): @@ -3350,7 +3356,7 @@ def test_can_publish_already_published_calibration( data_files / "scores.csv", ) calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) # publish it first diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 09a2c25b..a20f47fc 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -35,7 +35,7 @@ SAVED_PUBMED_PUBLICATION, SAVED_SHORT_EXTRA_LICENSE, TEST_BIORXIV_IDENTIFIER, - TEST_BRNICH_SCORE_CALIBRATION, + TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, TEST_CROSSREF_IDENTIFIER, TEST_GNOMAD_DATA_VERSION, TEST_INACTIVE_LICENSE, @@ -48,7 +48,7 @@ TEST_ORCID_ID, TEST_PATHOGENICITY_SCORE_CALIBRATION, TEST_PUBMED_IDENTIFIER, - TEST_SAVED_BRNICH_SCORE_CALIBRATION, + TEST_SAVED_BRNICH_SCORE_CALIBRATION_RANGE_BASED, TEST_SAVED_CLINVAR_CONTROL, TEST_SAVED_GENERIC_CLINICAL_CONTROL, TEST_SAVED_GNOMAD_VARIANT, @@ -204,7 +204,7 @@ def test_create_score_set_with_score_calibration(client, mock_publication_fetch, score_set["experimentUrn"] = experiment["urn"] score_set.update( { - "scoreCalibrations": [deepcamelize(TEST_BRNICH_SCORE_CALIBRATION)], + "scoreCalibrations": [deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED)], } ) @@ -219,7 +219,7 @@ def test_create_score_set_with_score_calibration(client, mock_publication_fetch, deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), experiment, response_data ) expected_response["experiment"].update({"numScoreSets": 1}) - expected_calibration = deepcopy(TEST_SAVED_BRNICH_SCORE_CALIBRATION) + expected_calibration = deepcopy(TEST_SAVED_BRNICH_SCORE_CALIBRATION_RANGE_BASED) expected_calibration["urn"] = response_data["scoreCalibrations"][0]["urn"] expected_calibration["private"] = True expected_calibration["primary"] = False @@ -815,12 +815,12 @@ def test_extra_user_can_only_view_published_score_calibrations_in_score_set( worker_queue.assert_called_once() create_test_score_calibration_in_score_set_via_client( - client, published_score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, published_score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) public_calibration = create_publish_and_promote_score_calibration( client, published_score_set["urn"], - deepcamelize(TEST_BRNICH_SCORE_CALIBRATION), + deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED), ) with DependencyOverrider(extra_user_app_overrides): @@ -848,12 +848,12 @@ def test_creating_user_can_view_all_score_calibrations_in_score_set(client, setu experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) private_calibration = create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) public_calibration = create_publish_and_promote_score_calibration( client, score_set["urn"], - deepcamelize(TEST_BRNICH_SCORE_CALIBRATION), + deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED), ) response = client.get(f"/api/v1/score-sets/{score_set['urn']}") @@ -1346,7 +1346,7 @@ def test_score_calibrations_remain_private_when_score_set_is_published( ) score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") create_test_score_calibration_in_score_set_via_client( - client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION) + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: @@ -3054,7 +3054,9 @@ def test_get_annotated_pathogenicity_evidence_lines_for_score_set( experiment["urn"], data_files / "scores.csv", ) - create_publish_and_promote_score_calibration(client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION)) + create_publish_and_promote_score_calibration( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) # The contents of the annotated variants objects should be tested in more detail elsewhere. response = client.get(f"/api/v1/score-sets/{score_set['urn']}/annotated-variants/pathogenicity-evidence-line") @@ -3141,7 +3143,9 @@ def test_get_annotated_pathogenicity_evidence_lines_for_score_set_when_some_vari experiment["urn"], data_files / "scores.csv", ) - create_publish_and_promote_score_calibration(client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION)) + create_publish_and_promote_score_calibration( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) first_var = clear_first_mapped_variant_post_mapped(session, score_set["urn"]) @@ -3181,7 +3185,9 @@ def test_get_annotated_functional_impact_statement_for_score_set( experiment["urn"], data_files / "scores.csv", ) - create_publish_and_promote_score_calibration(client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION)) + create_publish_and_promote_score_calibration( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) response = client.get(f"/api/v1/score-sets/{score_set['urn']}/annotated-variants/functional-impact-statement") response_data = parse_ndjson_response(response) @@ -3211,7 +3217,7 @@ def test_nonetype_annotated_functional_impact_statement_for_score_set_when_calib data_files / "scores.csv", update={ "secondaryPublicationIdentifiers": [{"dbName": "PubMed", "identifier": f"{TEST_PUBMED_IDENTIFIER}"}], - "scoreRanges": camelize([TEST_BRNICH_SCORE_CALIBRATION, TEST_PATHOGENICITY_SCORE_CALIBRATION]), + "scoreRanges": camelize([TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, TEST_PATHOGENICITY_SCORE_CALIBRATION]), }, ) @@ -3270,7 +3276,9 @@ def test_get_annotated_functional_impact_statement_for_score_set_when_some_varia experiment["urn"], data_files / "scores.csv", ) - create_publish_and_promote_score_calibration(client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION)) + create_publish_and_promote_score_calibration( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) first_var = clear_first_mapped_variant_post_mapped(session, score_set["urn"]) @@ -3334,7 +3342,7 @@ def test_annotated_functional_study_result_exists_for_score_set_when_thresholds_ data_files / "scores.csv", update={ "secondaryPublicationIdentifiers": [{"dbName": "PubMed", "identifier": f"{TEST_PUBMED_IDENTIFIER}"}], - "scoreRanges": camelize([TEST_BRNICH_SCORE_CALIBRATION, TEST_PATHOGENICITY_SCORE_CALIBRATION]), + "scoreRanges": camelize([TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, TEST_PATHOGENICITY_SCORE_CALIBRATION]), }, ) @@ -3366,7 +3374,7 @@ def test_annotated_functional_study_result_exists_for_score_set_when_ranges_not_ data_files / "scores.csv", update={ "secondaryPublicationIdentifiers": [{"dbName": "PubMed", "identifier": f"{TEST_PUBMED_IDENTIFIER}"}], - "scoreRanges": camelize([TEST_BRNICH_SCORE_CALIBRATION, TEST_PATHOGENICITY_SCORE_CALIBRATION]), + "scoreRanges": camelize([TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, TEST_PATHOGENICITY_SCORE_CALIBRATION]), }, ) @@ -3421,7 +3429,7 @@ def test_annotated_functional_study_result_exists_for_score_set_when_some_varian data_files / "scores.csv", update={ "secondaryPublicationIdentifiers": [{"dbName": "PubMed", "identifier": f"{TEST_PUBMED_IDENTIFIER}"}], - "scoreRanges": camelize([TEST_BRNICH_SCORE_CALIBRATION, TEST_PATHOGENICITY_SCORE_CALIBRATION]), + "scoreRanges": camelize([TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, TEST_PATHOGENICITY_SCORE_CALIBRATION]), }, ) diff --git a/tests/view_models/test_score_calibration.py b/tests/view_models/test_score_calibration.py index 11985f26..1c600b26 100644 --- a/tests/view_models/test_score_calibration.py +++ b/tests/view_models/test_score_calibration.py @@ -6,26 +6,31 @@ from mavedb.lib.acmg import ACMGCriterion from mavedb.models.enums.score_calibration_relation import ScoreCalibrationRelation from mavedb.view_models.score_calibration import ( - FunctionalRangeCreate, + FunctionalClassificationCreate, ScoreCalibration, ScoreCalibrationCreate, ScoreCalibrationWithScoreSetUrn, ) from tests.helpers.constants import ( - TEST_BRNICH_SCORE_CALIBRATION, + TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED, + TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, + TEST_FUNCTIONAL_CLASSIFICATION_ABNORMAL, + TEST_FUNCTIONAL_CLASSIFICATION_NORMAL, + TEST_FUNCTIONAL_CLASSIFICATION_NOT_SPECIFIED, TEST_FUNCTIONAL_RANGE_ABNORMAL, TEST_FUNCTIONAL_RANGE_INCLUDING_NEGATIVE_INFINITY, TEST_FUNCTIONAL_RANGE_INCLUDING_POSITIVE_INFINITY, TEST_FUNCTIONAL_RANGE_NORMAL, TEST_FUNCTIONAL_RANGE_NOT_SPECIFIED, TEST_PATHOGENICITY_SCORE_CALIBRATION, - TEST_SAVED_BRNICH_SCORE_CALIBRATION, + TEST_SAVED_BRNICH_SCORE_CALIBRATION_CLASS_BASED, + TEST_SAVED_BRNICH_SCORE_CALIBRATION_RANGE_BASED, TEST_SAVED_PATHOGENICITY_SCORE_CALIBRATION, ) from tests.helpers.util.common import dummy_attributed_object_from_dict ############################################################################## -# Tests for FunctionalRange view models +# Tests for FunctionalClassification view models ############################################################################## @@ -33,38 +38,78 @@ @pytest.mark.parametrize( - "functional_range", + "functional_classification", [ TEST_FUNCTIONAL_RANGE_NORMAL, TEST_FUNCTIONAL_RANGE_ABNORMAL, TEST_FUNCTIONAL_RANGE_NOT_SPECIFIED, + TEST_FUNCTIONAL_CLASSIFICATION_NORMAL, + TEST_FUNCTIONAL_CLASSIFICATION_ABNORMAL, + TEST_FUNCTIONAL_CLASSIFICATION_NOT_SPECIFIED, TEST_FUNCTIONAL_RANGE_INCLUDING_POSITIVE_INFINITY, TEST_FUNCTIONAL_RANGE_INCLUDING_NEGATIVE_INFINITY, ], ) -def test_can_create_valid_functional_range(functional_range): - fr = FunctionalRangeCreate.model_validate(functional_range) +def test_can_create_valid_functional_classification(functional_classification): + fr = FunctionalClassificationCreate.model_validate(functional_classification) - assert fr.label == functional_range["label"] - assert fr.description == functional_range.get("description") - assert fr.classification.value == functional_range["classification"] - assert fr.range == tuple(functional_range["range"]) - assert fr.inclusive_lower_bound == functional_range.get("inclusive_lower_bound", True) - assert fr.inclusive_upper_bound == functional_range.get("inclusive_upper_bound", False) + assert fr.label == functional_classification["label"] + assert fr.description == functional_classification.get("description") + assert fr.functional_classification.value == functional_classification["functional_classification"] + assert fr.inclusive_lower_bound == functional_classification.get("inclusive_lower_bound") + assert fr.inclusive_upper_bound == functional_classification.get("inclusive_upper_bound") + if "range" in functional_classification: + assert fr.range == tuple(functional_classification["range"]) + assert fr.range_based is True + assert fr.class_based is False + elif "class" in functional_classification: + assert fr.class_ == functional_classification["class"] + assert fr.range_based is False + assert fr.class_based is True -def test_cannot_create_functional_range_with_reversed_range(): + +@pytest.mark.parametrize( + "property_name", + [ + "label", + "class", + ], +) +def test_cannot_create_functional_classification_when_string_fields_empty(property_name): + invalid_data = deepcopy(TEST_FUNCTIONAL_CLASSIFICATION_NORMAL) + invalid_data[property_name] = " " + with pytest.raises(ValidationError, match="This field may not be empty or contain only whitespace."): + FunctionalClassificationCreate.model_validate(invalid_data) + + +def test_cannot_create_functional_classification_without_range_or_class(): + invalid_data = deepcopy(TEST_FUNCTIONAL_RANGE_NORMAL) + invalid_data["range"] = None + invalid_data["class"] = None + with pytest.raises(ValidationError, match="A functional range must specify either a numeric range or a class."): + FunctionalClassificationCreate.model_validate(invalid_data) + + +def test_cannot_create_functional_classification_with_both_range_and_class(): + invalid_data = deepcopy(TEST_FUNCTIONAL_RANGE_NORMAL) + invalid_data["class"] = "some_class" + with pytest.raises(ValidationError, match="A functional range may not specify both a numeric range and a class."): + FunctionalClassificationCreate.model_validate(invalid_data) + + +def test_cannot_create_functional_classification_with_reversed_range(): invalid_data = deepcopy(TEST_FUNCTIONAL_RANGE_NORMAL) invalid_data["range"] = (2, 1) with pytest.raises(ValidationError, match="The lower bound cannot exceed the upper bound."): - FunctionalRangeCreate.model_validate(invalid_data) + FunctionalClassificationCreate.model_validate(invalid_data) -def test_cannot_create_functional_range_with_equal_bounds(): +def test_cannot_create_functional_classification_with_equal_bounds(): invalid_data = deepcopy(TEST_FUNCTIONAL_RANGE_NORMAL) invalid_data["range"] = (1, 1) with pytest.raises(ValidationError, match="The lower and upper bounds cannot be identical."): - FunctionalRangeCreate.model_validate(invalid_data) + FunctionalClassificationCreate.model_validate(invalid_data) def test_can_create_range_with_infinity_bounds(): @@ -73,71 +118,71 @@ def test_can_create_range_with_infinity_bounds(): valid_data["inclusive_upper_bound"] = False valid_data["range"] = (None, None) - fr = FunctionalRangeCreate.model_validate(valid_data) + fr = FunctionalClassificationCreate.model_validate(valid_data) assert fr.range == (None, None) @pytest.mark.parametrize("ratio_property", ["oddspaths_ratio", "positive_likelihood_ratio"]) -def test_cannot_create_functional_range_with_negative_ratios(ratio_property): +def test_cannot_create_functional_classification_with_negative_ratios(ratio_property): invalid_data = deepcopy(TEST_FUNCTIONAL_RANGE_NORMAL) invalid_data[ratio_property] = -1.0 with pytest.raises(ValidationError, match="The ratio must be greater than or equal to 0."): - FunctionalRangeCreate.model_validate(invalid_data) + FunctionalClassificationCreate.model_validate(invalid_data) -def test_cannot_create_functional_range_with_inclusive_bounds_at_infinity(): +def test_cannot_create_functional_classification_with_inclusive_bounds_at_infinity(): invalid_data = deepcopy(TEST_FUNCTIONAL_RANGE_INCLUDING_POSITIVE_INFINITY) invalid_data["inclusive_upper_bound"] = True with pytest.raises(ValidationError, match="An inclusive upper bound may not include positive infinity."): - FunctionalRangeCreate.model_validate(invalid_data) + FunctionalClassificationCreate.model_validate(invalid_data) invalid_data = deepcopy(TEST_FUNCTIONAL_RANGE_INCLUDING_NEGATIVE_INFINITY) invalid_data["inclusive_lower_bound"] = True with pytest.raises(ValidationError, match="An inclusive lower bound may not include negative infinity."): - FunctionalRangeCreate.model_validate(invalid_data) + FunctionalClassificationCreate.model_validate(invalid_data) @pytest.mark.parametrize( - "functional_range, opposite_criterion", + "functional_classification, opposite_criterion", [(TEST_FUNCTIONAL_RANGE_NORMAL, ACMGCriterion.PS3), (TEST_FUNCTIONAL_RANGE_ABNORMAL, ACMGCriterion.BS3)], ) -def test_cannot_create_functional_range_when_classification_disagrees_with_acmg_criterion( - functional_range, opposite_criterion +def test_cannot_create_functional_classification_when_classification_disagrees_with_acmg_criterion( + functional_classification, opposite_criterion ): - invalid_data = deepcopy(functional_range) + invalid_data = deepcopy(functional_classification) invalid_data["acmg_classification"]["criterion"] = opposite_criterion.value with pytest.raises(ValidationError, match="must agree with the functional range classification"): - FunctionalRangeCreate.model_validate(invalid_data) + FunctionalClassificationCreate.model_validate(invalid_data) def test_none_type_classification_and_evidence_strength_count_as_agreement(): valid_data = deepcopy(TEST_FUNCTIONAL_RANGE_NORMAL) valid_data["acmg_classification"] = {"criterion": None, "evidence_strength": None} - fr = FunctionalRangeCreate.model_validate(valid_data) + fr = FunctionalClassificationCreate.model_validate(valid_data) assert fr.acmg_classification.criterion is None assert fr.acmg_classification.evidence_strength is None -def test_cannot_create_functional_range_when_oddspaths_evidence_disagrees_with_classification(): +def test_cannot_create_functional_classification_when_oddspaths_evidence_disagrees_with_classification(): invalid_data = deepcopy(TEST_FUNCTIONAL_RANGE_NORMAL) # Abnormal evidence strength for a normal range invalid_data["oddspaths_ratio"] = 350 with pytest.raises(ValidationError, match="implies criterion"): - FunctionalRangeCreate.model_validate(invalid_data) + FunctionalClassificationCreate.model_validate(invalid_data) invalid_data = deepcopy(TEST_FUNCTIONAL_RANGE_ABNORMAL) # Normal evidence strength for an abnormal range invalid_data["oddspaths_ratio"] = 0.1 with pytest.raises(ValidationError, match="implies criterion"): - FunctionalRangeCreate.model_validate(invalid_data) + FunctionalClassificationCreate.model_validate(invalid_data) def test_is_contained_by_range(): - fr = FunctionalRangeCreate.model_validate( + fr = FunctionalClassificationCreate.model_validate( { "label": "test range", - "classification": "abnormal", + "functional_classification": "abnormal", "range": (0.0, 1.0), "inclusive_lower_bound": True, "inclusive_upper_bound": True, @@ -157,10 +202,10 @@ def test_is_contained_by_range(): def test_inclusive_bounds_get_default_when_unset_and_range_exists(): - fr = FunctionalRangeCreate.model_validate( + fr = FunctionalClassificationCreate.model_validate( { "label": "test range", - "classification": "abnormal", + "functional_classification": "abnormal", "range": (0.0, 1.0), } ) @@ -170,11 +215,11 @@ def test_inclusive_bounds_get_default_when_unset_and_range_exists(): def test_inclusive_bounds_remain_none_when_range_is_none(): - fr = FunctionalRangeCreate.model_validate( + fr = FunctionalClassificationCreate.model_validate( { "label": "test range", - "classification": "abnormal", - "range": None, + "functional_classification": "abnormal", + "class": "some_class", } ) @@ -185,19 +230,27 @@ def test_inclusive_bounds_remain_none_when_range_is_none(): @pytest.mark.parametrize( "bound_property, bound_value, match_text", [ - ("inclusive_lower_bound", True, "An inclusive lower bound requires a defined range."), - ("inclusive_upper_bound", True, "An inclusive upper bound requires a defined range."), + ( + "inclusive_lower_bound", + True, + "An inclusive lower bound may not be set on a class based functional classification.", + ), + ( + "inclusive_upper_bound", + True, + "An inclusive upper bound may not be set on a class based functional classification.", + ), ], ) def test_cant_set_inclusive_bounds_when_range_is_none(bound_property, bound_value, match_text): invalid_data = { "label": "test range", - "classification": "abnormal", - "range": None, + "functional_classification": "abnormal", + "class": "some_class", bound_property: bound_value, } with pytest.raises(ValidationError, match=match_text): - FunctionalRangeCreate.model_validate(invalid_data) + FunctionalClassificationCreate.model_validate(invalid_data) ############################################################################## @@ -209,7 +262,11 @@ def test_cant_set_inclusive_bounds_when_range_is_none(bound_property, bound_valu @pytest.mark.parametrize( "valid_calibration", - [TEST_BRNICH_SCORE_CALIBRATION, TEST_PATHOGENICITY_SCORE_CALIBRATION], + [ + TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, + TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED, + TEST_PATHOGENICITY_SCORE_CALIBRATION, + ], ) def test_can_create_valid_score_calibration(valid_calibration): sc = ScoreCalibrationCreate.model_validate(valid_calibration) @@ -219,11 +276,11 @@ def test_can_create_valid_score_calibration(valid_calibration): assert sc.baseline_score == valid_calibration.get("baseline_score") assert sc.baseline_score_description == valid_calibration.get("baseline_score_description") - if valid_calibration.get("functional_ranges") is not None: - assert len(sc.functional_ranges) == len(valid_calibration["functional_ranges"]) + if valid_calibration.get("functional_classifications") is not None: + assert len(sc.functional_classifications) == len(valid_calibration["functional_classifications"]) # functional range validation is presumed to be well tested separately. else: - assert sc.functional_ranges is None + assert sc.functional_classifications is None if valid_calibration.get("threshold_sources") is not None: assert len(sc.threshold_sources) == len(valid_calibration["threshold_sources"]) @@ -256,11 +313,11 @@ def test_can_create_valid_score_calibration(valid_calibration): # because of the large number of model validators that need to play nice with this case. @pytest.mark.parametrize( "valid_calibration", - [TEST_BRNICH_SCORE_CALIBRATION, TEST_PATHOGENICITY_SCORE_CALIBRATION], + [TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, TEST_PATHOGENICITY_SCORE_CALIBRATION], ) -def test_can_create_valid_score_calibration_without_functional_ranges(valid_calibration): +def test_can_create_valid_score_calibration_without_functional_classifications(valid_calibration): valid_calibration = deepcopy(valid_calibration) - valid_calibration["functional_ranges"] = None + valid_calibration["functional_classifications"] = None sc = ScoreCalibrationCreate.model_validate(valid_calibration) @@ -269,11 +326,11 @@ def test_can_create_valid_score_calibration_without_functional_ranges(valid_cali assert sc.baseline_score == valid_calibration.get("baseline_score") assert sc.baseline_score_description == valid_calibration.get("baseline_score_description") - if valid_calibration.get("functional_ranges") is not None: - assert len(sc.functional_ranges) == len(valid_calibration["functional_ranges"]) + if valid_calibration.get("functional_classifications") is not None: + assert len(sc.functional_classifications) == len(valid_calibration["functional_classifications"]) # functional range validation is presumed to be well tested separately. else: - assert sc.functional_ranges is None + assert sc.functional_classifications is None if valid_calibration.get("threshold_sources") is not None: assert len(sc.threshold_sources) == len(valid_calibration["threshold_sources"]) @@ -303,50 +360,59 @@ def test_can_create_valid_score_calibration_without_functional_ranges(valid_cali def test_cannot_create_score_calibration_when_classification_ranges_overlap(): - invalid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION) + invalid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) # Make the first two ranges overlap - invalid_data["functional_ranges"][0]["range"] = [1.0, 3.0] - invalid_data["functional_ranges"][1]["range"] = [2.0, 4.0] + invalid_data["functional_classifications"][0]["range"] = [1.0, 3.0] + invalid_data["functional_classifications"][1]["range"] = [2.0, 4.0] with pytest.raises(ValidationError, match="Classified score ranges may not overlap; `"): ScoreCalibrationCreate.model_validate(invalid_data) def test_can_create_score_calibration_when_unclassified_ranges_overlap_with_classified_ranges(): - valid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION) + valid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) # Make the first two ranges overlap, one being 'not_specified' - valid_data["functional_ranges"][0]["range"] = [1.5, 3.0] - valid_data["functional_ranges"][1]["range"] = [2.0, 4.0] - valid_data["functional_ranges"][0]["classification"] = "not_specified" + valid_data["functional_classifications"][0]["range"] = [1.5, 3.0] + valid_data["functional_classifications"][1]["range"] = [2.0, 4.0] + valid_data["functional_classifications"][0]["functional_classification"] = "not_specified" sc = ScoreCalibrationCreate.model_validate(valid_data) - assert len(sc.functional_ranges) == len(valid_data["functional_ranges"]) + assert len(sc.functional_classifications) == len(valid_data["functional_classifications"]) def test_can_create_score_calibration_when_unclassified_ranges_overlap_with_each_other(): - valid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION) + valid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) # Make the first two ranges overlap, both being 'not_specified' - valid_data["functional_ranges"][0]["range"] = [1.5, 3.0] - valid_data["functional_ranges"][1]["range"] = [2.0, 4.0] - valid_data["functional_ranges"][0]["classification"] = "not_specified" - valid_data["functional_ranges"][1]["classification"] = "not_specified" + valid_data["functional_classifications"][0]["range"] = [1.5, 3.0] + valid_data["functional_classifications"][1]["range"] = [2.0, 4.0] + valid_data["functional_classifications"][0]["functional_classification"] = "not_specified" + valid_data["functional_classifications"][1]["functional_classification"] = "not_specified" sc = ScoreCalibrationCreate.model_validate(valid_data) - assert len(sc.functional_ranges) == len(valid_data["functional_ranges"]) + assert len(sc.functional_classifications) == len(valid_data["functional_classifications"]) def test_cannot_create_score_calibration_when_ranges_touch_with_inclusive_ranges(): - invalid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION) + invalid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) # Make the first two ranges touch - invalid_data["functional_ranges"][0]["range"] = [1.0, 2.0] - invalid_data["functional_ranges"][1]["range"] = [2.0, 4.0] - invalid_data["functional_ranges"][0]["inclusive_upper_bound"] = True + invalid_data["functional_classifications"][0]["range"] = [1.0, 2.0] + invalid_data["functional_classifications"][1]["range"] = [2.0, 4.0] + invalid_data["functional_classifications"][0]["inclusive_upper_bound"] = True with pytest.raises(ValidationError, match="Classified score ranges may not overlap; `"): ScoreCalibrationCreate.model_validate(invalid_data) def test_cannot_create_score_calibration_with_duplicate_range_labels(): - invalid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION) + invalid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) # Make the first two ranges have the same label - invalid_data["functional_ranges"][0]["label"] = "duplicate label" - invalid_data["functional_ranges"][1]["label"] = "duplicate label" + invalid_data["functional_classifications"][0]["label"] = "duplicate label" + invalid_data["functional_classifications"][1]["label"] = "duplicate label" + with pytest.raises(ValidationError, match="Functional range labels must be unique"): + ScoreCalibrationCreate.model_validate(invalid_data) + + +def test_cannot_create_score_calibration_with_duplicate_range_classes(): + invalid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED) + # Make the first two ranges have the same label + invalid_data["functional_classifications"][0]["label"] = "duplicate label" + invalid_data["functional_classifications"][1]["label"] = "duplicate label" with pytest.raises(ValidationError, match="Functional range labels must be unique"): ScoreCalibrationCreate.model_validate(invalid_data) @@ -354,7 +420,7 @@ def test_cannot_create_score_calibration_with_duplicate_range_labels(): # Making an exception to usually not testing the ability to create models without optional fields, # since model validators sometimes rely on their absence. def test_can_create_score_calibration_without_baseline_score(): - valid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION) + valid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) valid_data["baseline_score"] = None sc = ScoreCalibrationCreate.model_validate(valid_data) @@ -362,7 +428,7 @@ def test_can_create_score_calibration_without_baseline_score(): def test_can_create_score_calibration_with_baseline_score_when_outside_all_ranges(): - valid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION) + valid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) valid_data["baseline_score"] = 10.0 sc = ScoreCalibrationCreate.model_validate(valid_data) @@ -370,7 +436,7 @@ def test_can_create_score_calibration_with_baseline_score_when_outside_all_range def test_can_create_score_calibration_with_baseline_score_when_inside_normal_range(): - valid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION) + valid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) valid_data["baseline_score"] = 3.0 sc = ScoreCalibrationCreate.model_validate(valid_data) @@ -378,7 +444,7 @@ def test_can_create_score_calibration_with_baseline_score_when_inside_normal_ran def test_cannot_create_score_calibration_with_baseline_score_when_inside_non_normal_range(): - invalid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION) + invalid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) invalid_data["baseline_score"] = -3.0 with pytest.raises(ValueError, match="Baseline scores may not fall within non-normal ranges"): ScoreCalibrationCreate.model_validate(invalid_data) @@ -389,7 +455,11 @@ def test_cannot_create_score_calibration_with_baseline_score_when_inside_non_nor @pytest.mark.parametrize( "valid_calibration", - [TEST_SAVED_BRNICH_SCORE_CALIBRATION, TEST_SAVED_PATHOGENICITY_SCORE_CALIBRATION], + [ + TEST_SAVED_BRNICH_SCORE_CALIBRATION_RANGE_BASED, + TEST_SAVED_BRNICH_SCORE_CALIBRATION_CLASS_BASED, + TEST_SAVED_PATHOGENICITY_SCORE_CALIBRATION, + ], ) def test_can_create_valid_score_calibration_from_attributed_object(valid_calibration): sc = ScoreCalibration.model_validate(dummy_attributed_object_from_dict(valid_calibration)) @@ -401,11 +471,11 @@ def test_can_create_valid_score_calibration_from_attributed_object(valid_calibra assert sc.baseline_score == valid_calibration.get("baselineScore") assert sc.baseline_score_description == valid_calibration.get("baselineScoreDescription") - if valid_calibration.get("functionalRanges") is not None: - assert len(sc.functional_ranges) == len(valid_calibration["functionalRanges"]) + if valid_calibration.get("functionalClassifications") is not None: + assert len(sc.functional_classifications) == len(valid_calibration["functionalClassifications"]) # functional range validation is presumed to be well tested separately. else: - assert sc.functional_ranges is None + assert sc.functional_classifications is None if valid_calibration.get("thresholdSources") is not None: assert len(sc.threshold_sources) == len(valid_calibration["thresholdSources"]) @@ -435,7 +505,7 @@ def test_can_create_valid_score_calibration_from_attributed_object(valid_calibra def test_cannot_create_score_calibration_when_publication_information_is_missing(): - invalid_data = deepcopy(TEST_SAVED_BRNICH_SCORE_CALIBRATION) + invalid_data = deepcopy(TEST_SAVED_BRNICH_SCORE_CALIBRATION_RANGE_BASED) # Add publication identifiers with missing information invalid_data.pop("thresholdSources", None) invalid_data.pop("classificationSources", None) @@ -445,7 +515,7 @@ def test_cannot_create_score_calibration_when_publication_information_is_missing def test_can_create_score_calibration_from_association_style_publication_identifiers_against_attributed_object(): - orig_data = TEST_SAVED_BRNICH_SCORE_CALIBRATION + orig_data = TEST_SAVED_BRNICH_SCORE_CALIBRATION_RANGE_BASED data = deepcopy(orig_data) threshold_sources = [ @@ -475,11 +545,11 @@ def test_can_create_score_calibration_from_association_style_publication_identif assert sc.baseline_score == orig_data.get("baselineScore") assert sc.baseline_score_description == orig_data.get("baselineScoreDescription") - if orig_data.get("functionalRanges") is not None: - assert len(sc.functional_ranges) == len(orig_data["functionalRanges"]) + if orig_data.get("functionalClassifications") is not None: + assert len(sc.functional_classifications) == len(orig_data["functionalClassifications"]) # functional range validation is presumed to be well tested separately. else: - assert sc.functional_ranges is None + assert sc.functional_classifications is None if orig_data.get("thresholdSources") is not None: assert len(sc.threshold_sources) == len(orig_data["thresholdSources"]) @@ -509,7 +579,7 @@ def test_can_create_score_calibration_from_association_style_publication_identif def test_primary_score_calibration_cannot_be_research_use_only(): - invalid_data = deepcopy(TEST_SAVED_BRNICH_SCORE_CALIBRATION) + invalid_data = deepcopy(TEST_SAVED_BRNICH_SCORE_CALIBRATION_RANGE_BASED) invalid_data["primary"] = True invalid_data["researchUseOnly"] = True with pytest.raises(ValidationError, match="Primary score calibrations may not be marked as research use only"): @@ -517,7 +587,7 @@ def test_primary_score_calibration_cannot_be_research_use_only(): def test_primary_score_calibration_cannot_be_private(): - invalid_data = deepcopy(TEST_SAVED_BRNICH_SCORE_CALIBRATION) + invalid_data = deepcopy(TEST_SAVED_BRNICH_SCORE_CALIBRATION_RANGE_BASED) invalid_data["primary"] = True invalid_data["private"] = True with pytest.raises(ValidationError, match="Primary score calibrations may not be marked as private"): @@ -525,7 +595,7 @@ def test_primary_score_calibration_cannot_be_private(): def test_score_calibration_with_score_set_urn_can_be_created_from_attributed_object(): - data = deepcopy(TEST_SAVED_BRNICH_SCORE_CALIBRATION) + data = deepcopy(TEST_SAVED_BRNICH_SCORE_CALIBRATION_RANGE_BASED) data["score_set"] = dummy_attributed_object_from_dict({"urn": "urn:mavedb:00000000-0000-0000-0000-000000000001"}) sc = ScoreCalibrationWithScoreSetUrn.model_validate(dummy_attributed_object_from_dict(data)) @@ -535,7 +605,47 @@ def test_score_calibration_with_score_set_urn_can_be_created_from_attributed_obj def test_score_calibration_with_score_set_urn_cannot_be_created_without_score_set_urn(): - invalid_data = deepcopy(TEST_SAVED_BRNICH_SCORE_CALIBRATION) + invalid_data = deepcopy(TEST_SAVED_BRNICH_SCORE_CALIBRATION_RANGE_BASED) invalid_data["score_set"] = dummy_attributed_object_from_dict({}) with pytest.raises(ValidationError, match="Unable to create ScoreCalibrationWithScoreSetUrn without attribute"): ScoreCalibrationWithScoreSetUrn.model_validate(dummy_attributed_object_from_dict(invalid_data)) + + +def test_cannot_create_score_calibration_with_mixed_range_and_class_based_functional_classifications(): + """Test that score calibrations cannot have both range-based and class-based functional classifications.""" + invalid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + # Add a class-based functional classification to a range-based calibration + invalid_data["functional_classifications"].append( + {"label": "class based classification", "functional_classification": "abnormal", "class": "some_class"} + ) + + with pytest.raises( + ValidationError, match="All functional classifications within a score calibration must be of the same type" + ): + ScoreCalibrationCreate.model_validate(invalid_data) + + +def test_score_calibration_range_based_property(): + """Test the range_based property works correctly.""" + range_based_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + sc = ScoreCalibrationCreate.model_validate(range_based_data) + assert sc.range_based is True + assert sc.class_based is False + + +def test_score_calibration_class_based_property(): + """Test the class_based property works correctly.""" + class_based_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED) + sc = ScoreCalibrationCreate.model_validate(class_based_data) + assert sc.class_based is True + assert sc.range_based is False + + +def test_score_calibration_properties_when_no_functional_classifications(): + """Test that properties return False when no functional classifications exist.""" + valid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + valid_data["functional_classifications"] = None + + sc = ScoreCalibrationCreate.model_validate(valid_data) + assert sc.range_based is False + assert sc.class_based is False diff --git a/tests/view_models/test_score_set.py b/tests/view_models/test_score_set.py index 754b8657..88da2c12 100644 --- a/tests/view_models/test_score_set.py +++ b/tests/view_models/test_score_set.py @@ -10,7 +10,7 @@ EXTRA_USER, SAVED_PUBMED_PUBLICATION, TEST_BIORXIV_IDENTIFIER, - TEST_BRNICH_SCORE_CALIBRATION, + TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, TEST_CROSSREF_IDENTIFIER, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_SEQ_SCORESET, @@ -231,7 +231,7 @@ def test_cannot_create_score_set_with_an_empty_method(): @pytest.mark.parametrize( - "calibration", [deepcopy(TEST_BRNICH_SCORE_CALIBRATION), deepcopy(TEST_PATHOGENICITY_SCORE_CALIBRATION)] + "calibration", [deepcopy(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED), deepcopy(TEST_PATHOGENICITY_SCORE_CALIBRATION)] ) def test_can_create_score_set_with_complete_and_valid_provided_calibrations(calibration): score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() @@ -247,8 +247,8 @@ def test_can_create_score_set_with_multiple_valid_calibrations(): score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() score_set_test["experiment_urn"] = VALID_EXPERIMENT_URN score_set_test["score_calibrations"] = [ - deepcopy(TEST_BRNICH_SCORE_CALIBRATION), - deepcopy(TEST_BRNICH_SCORE_CALIBRATION), + deepcopy(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED), + deepcopy(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED), deepcopy(TEST_PATHOGENICITY_SCORE_CALIBRATION), ] From 9dafe453ed4b82ae03633fc068992fce66fccc20 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 20 Nov 2025 12:09:58 -0800 Subject: [PATCH 08/24] feat: update standardize_dataframe to accept custom standard columns and adjust related tests --- .../lib/validation/dataframe/dataframe.py | 8 ++++---- tests/validation/dataframe/test_dataframe.py | 20 ++++++++++++------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/mavedb/lib/validation/dataframe/dataframe.py b/src/mavedb/lib/validation/dataframe/dataframe.py index 75a07db6..dcf6c8e5 100644 --- a/src/mavedb/lib/validation/dataframe/dataframe.py +++ b/src/mavedb/lib/validation/dataframe/dataframe.py @@ -82,8 +82,8 @@ def validate_and_standardize_dataframe_pair( if not targets: raise ValueError("Can't validate provided file with no targets.") - standardized_scores_df = standardize_dataframe(scores_df) - standardized_counts_df = standardize_dataframe(counts_df) if counts_df is not None else None + standardized_scores_df = standardize_dataframe(scores_df, STANDARD_COLUMNS) + standardized_counts_df = standardize_dataframe(counts_df, STANDARD_COLUMNS) if counts_df is not None else None validate_dataframe(standardized_scores_df, "scores", targets, hdp) @@ -224,7 +224,7 @@ def standardize_dict_keys(d: dict[str, Any]) -> dict[str, Any]: return {clean_col_name(k): v for k, v in d.items()} -def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame: +def standardize_dataframe(df: pd.DataFrame, standard_columns: tuple[str, ...]) -> pd.DataFrame: """Standardize a dataframe by sorting the columns and changing the standard column names to lowercase. Also strips leading and trailing whitespace from column names and removes any quoted strings from column names. @@ -250,7 +250,7 @@ def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame: cleaned_columns = {c: clean_col_name(c) for c in df.columns} df.rename(columns=cleaned_columns, inplace=True) - column_mapper = {x: x.lower() for x in df.columns if x.lower() in STANDARD_COLUMNS} + column_mapper = {x: x.lower() for x in df.columns if x.lower() in standard_columns} df.rename(columns=column_mapper, inplace=True) return sort_dataframe_columns(df) diff --git a/tests/validation/dataframe/test_dataframe.py b/tests/validation/dataframe/test_dataframe.py index 4c8334de..daf3fd63 100644 --- a/tests/validation/dataframe/test_dataframe.py +++ b/tests/validation/dataframe/test_dataframe.py @@ -13,6 +13,7 @@ required_score_column, ) from mavedb.lib.validation.dataframe.dataframe import ( + STANDARD_COLUMNS, choose_dataframe_index_column, sort_dataframe_columns, standardize_dataframe, @@ -93,32 +94,36 @@ def test_sort_dataframe_preserves_extras_order(self): class TestStandardizeDataframe(DfTestCase): def test_preserve_standardized(self): - standardized_df = standardize_dataframe(self.dataframe) + standardized_df = standardize_dataframe(self.dataframe, STANDARD_COLUMNS) pd.testing.assert_frame_equal(self.dataframe, standardized_df) def test_standardize_changes_case_variants(self): - standardized_df = standardize_dataframe(self.dataframe.rename(columns={hgvs_nt_column: hgvs_nt_column.upper()})) + standardized_df = standardize_dataframe( + self.dataframe.rename(columns={hgvs_nt_column: hgvs_nt_column.upper()}), STANDARD_COLUMNS + ) pd.testing.assert_frame_equal(self.dataframe, standardized_df) def test_standardize_changes_case_scores(self): standardized_df = standardize_dataframe( - self.dataframe.rename(columns={required_score_column: required_score_column.title()}) + self.dataframe.rename(columns={required_score_column: required_score_column.title()}), STANDARD_COLUMNS ) pd.testing.assert_frame_equal(self.dataframe, standardized_df) def test_standardize_preserves_extras_case(self): - standardized_df = standardize_dataframe(self.dataframe.rename(columns={"extra": "extra".upper()})) + standardized_df = standardize_dataframe( + self.dataframe.rename(columns={"extra": "extra".upper()}), STANDARD_COLUMNS + ) pd.testing.assert_frame_equal(self.dataframe.rename(columns={"extra": "extra".upper()}), standardized_df) def test_standardize_removes_quotes(self): standardized_df = standardize_dataframe( - self.dataframe.rename(columns={"extra": "'extra'", "extra2": '"extra2"'}) + self.dataframe.rename(columns={"extra": "'extra'", "extra2": '"extra2"'}), STANDARD_COLUMNS ) pd.testing.assert_frame_equal(self.dataframe, standardized_df) def test_standardize_removes_whitespace(self): standardized_df = standardize_dataframe( - self.dataframe.rename(columns={"extra": " extra ", "extra2": " extra2"}) + self.dataframe.rename(columns={"extra": " extra ", "extra2": " extra2"}), STANDARD_COLUMNS ) pd.testing.assert_frame_equal(self.dataframe, standardized_df) @@ -135,7 +140,8 @@ def test_standardize_sorts_columns(self): "count1", "extra", ], - ] + ], + STANDARD_COLUMNS, ) pd.testing.assert_frame_equal( self.dataframe[ From 605f7466719d3be00c1a1546827193eeedbe5ba4 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 24 Nov 2025 10:35:07 -0800 Subject: [PATCH 09/24] fix: update inclusive bound checks to allow None values in FunctionalClassificationBase --- src/mavedb/view_models/score_calibration.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mavedb/view_models/score_calibration.py b/src/mavedb/view_models/score_calibration.py index e8696943..4579a1f5 100644 --- a/src/mavedb/view_models/score_calibration.py +++ b/src/mavedb/view_models/score_calibration.py @@ -129,11 +129,11 @@ def class_and_range_mutually_exclusive( def inclusive_bounds_require_range(self: "FunctionalClassificationBase") -> "FunctionalClassificationBase": """Inclusive bounds may only be set if a range is provided. If they are unset, default them.""" if self.class_ is not None: - if self.inclusive_lower_bound: + if self.inclusive_lower_bound is not None: raise ValidationError( "An inclusive lower bound may not be set on a class based functional classification." ) - if self.inclusive_upper_bound: + if self.inclusive_upper_bound is not None: raise ValidationError( "An inclusive upper bound may not be set on a class based functional classification." ) From 407377b6a05f3da1c62fcab28f50fc583aaec8ad Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 24 Nov 2025 11:47:15 -0800 Subject: [PATCH 10/24] refactor: remove default values for inclusive bounds in ScoreCalibrationFunctionalClassification --- .../models/score_calibration_functional_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mavedb/models/score_calibration_functional_classification.py b/src/mavedb/models/score_calibration_functional_classification.py index 5afd4f69..1975310a 100644 --- a/src/mavedb/models/score_calibration_functional_classification.py +++ b/src/mavedb/models/score_calibration_functional_classification.py @@ -41,8 +41,8 @@ class ScoreCalibrationFunctionalClassification(Base): range = Column(JSONB(none_as_null=True), nullable=True) # (lower_bound, upper_bound) class_ = Column(String, nullable=True) - inclusive_lower_bound = Column(Boolean, nullable=True, default=True) - inclusive_upper_bound = Column(Boolean, nullable=True, default=False) + inclusive_lower_bound = Column(Boolean, nullable=True) + inclusive_upper_bound = Column(Boolean, nullable=True) oddspaths_ratio = Column(Float, nullable=True) positive_likelihood_ratio = Column(Float, nullable=True) From 3fda888b804e3c3ec045eb8a5046ab386a5c4dc8 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 24 Nov 2025 11:48:07 -0800 Subject: [PATCH 11/24] feat: add validation and standardization for calibration classes dataframe --- .../lib/validation/constants/general.py | 3 + .../lib/validation/dataframe/calibration.py | 190 +++++ .../validation/dataframe/test_calibration.py | 743 ++++++++++++++++++ 3 files changed, 936 insertions(+) create mode 100644 src/mavedb/lib/validation/dataframe/calibration.py create mode 100644 tests/validation/dataframe/test_calibration.py diff --git a/src/mavedb/lib/validation/constants/general.py b/src/mavedb/lib/validation/constants/general.py index 92b4fd5b..22ca4cbf 100644 --- a/src/mavedb/lib/validation/constants/general.py +++ b/src/mavedb/lib/validation/constants/general.py @@ -44,6 +44,9 @@ variant_count_data = "count_data" required_score_column = "score" +calibration_variant_column_name = "variant_urn" +calibration_class_column_name = "class_name" + valid_dataset_columns = [score_columns, count_columns] valid_variant_columns = [variant_score_data, variant_count_data] diff --git a/src/mavedb/lib/validation/dataframe/calibration.py b/src/mavedb/lib/validation/dataframe/calibration.py new file mode 100644 index 00000000..c7db74a6 --- /dev/null +++ b/src/mavedb/lib/validation/dataframe/calibration.py @@ -0,0 +1,190 @@ +import pandas as pd +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.lib.validation.constants.general import ( + calibration_class_column_name, + calibration_variant_column_name, +) +from mavedb.lib.validation.dataframe.column import validate_data_column, validate_variant_column +from mavedb.lib.validation.dataframe.dataframe import standardize_dataframe, validate_no_null_rows +from mavedb.lib.validation.exceptions import ValidationError +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.view_models import score_calibration + +STANDARD_CALIBRATION_COLUMNS = (calibration_variant_column_name, calibration_class_column_name) + + +def validate_and_standardize_calibration_classes_dataframe( + db: Session, + score_set: ScoreSet, + calibration: score_calibration.ScoreCalibrationCreate | score_calibration.ScoreCalibrationModify, + classes_df: pd.DataFrame, +) -> pd.DataFrame: + """ + Validate and standardize a calibration classes dataframe for functional classification calibrations. + + This function performs comprehensive validation of a calibration classes dataframe, ensuring + it meets the requirements for functional classification calibrations. It standardizes column + names, validates data integrity, and checks that variants and classes are properly formatted. + + Args: + db (Session): Database session for validation queries. + score_set (ScoreSet): The score set associated with the calibration. + calibration (ScoreCalibrationCreate | ScoreCalibrationModify): The calibration object + containing configuration details. Must be class-based. + classes_df (pd.DataFrame): The input dataframe containing calibration classes data. + + Returns: + pd.DataFrame: The standardized and validated calibration classes dataframe. + + Raises: + ValueError: If the calibration is not class-based. + ValidationError: If the dataframe contains invalid data, unexpected columns, + invalid variant URNs, or improperly formatted classes. + + Note: + The function expects the dataframe to contain specific columns for variants and + calibration classes, and performs strict validation on both column structure + and data content. + """ + if not calibration.class_based: + raise ValueError("Calibration classes file can only be provided for functional classification calibrations.") + + standardized_classes_df = standardize_dataframe(classes_df, STANDARD_CALIBRATION_COLUMNS) + validate_calibration_df_column_names(standardized_classes_df) + validate_no_null_rows(standardized_classes_df) + + column_mapping = {c.lower(): c for c in standardized_classes_df.columns} + index_column = column_mapping[calibration_variant_column_name] + + for c in column_mapping: + if c == calibration_variant_column_name: + validate_variant_column(standardized_classes_df[c], column_mapping[c] == index_column) + validate_calibration_variant_urns(db, score_set, standardized_classes_df[c]) + elif c == calibration_class_column_name: + validate_data_column(standardized_classes_df[c], force_numeric=False) + validate_calibration_classes(calibration, standardized_classes_df[c]) + + # handle unexpected columns. These should have already been caught by + # validate_calibration_df_column_names, but we include this for completeness. + else: # pragma: no cover + raise ValidationError(f"unexpected column in calibration classes file: '{c}'") + + return standardized_classes_df + + +def validate_calibration_df_column_names(df: pd.DataFrame) -> None: + """ + Validate the column names of a calibration DataFrame. + + This function performs comprehensive validation of DataFrame column names to ensure + they meet the required format and structure for calibration data processing. + + Args: + df (pd.DataFrame): The DataFrame whose columns need to be validated. + + Raises: + ValidationError: If any of the following validation checks fail: + - Column names are not strings + - Column names are empty or contain only whitespace + - Required calibration variant column is missing + - Required calibration class column is missing + - DataFrame contains unexpected columns (must match STANDARD_CALIBRATION_COLUMNS exactly) + + Returns: + None: This function performs validation only and returns nothing on success. + + Note: + Column name comparison is case-insensitive. The function converts all column + names to lowercase before performing validation checks. + """ + if any(type(c) is not str for c in df.columns): + raise ValidationError("column names must be strings") + + if any(c.isspace() for c in df.columns) or any(len(c) == 0 for c in df.columns): + raise ValidationError("column names cannot be empty or whitespace") + + if len(df.columns) != len(set(c.lower() for c in df.columns)): + raise ValidationError("duplicate column names are not allowed (case-insensitive)") + + columns = [c.lower() for c in df.columns] + + if calibration_variant_column_name not in columns: + raise ValidationError(f"missing required column: '{calibration_variant_column_name}'") + + if calibration_class_column_name not in columns: + raise ValidationError(f"missing required column: '{calibration_class_column_name}'") + + if set(STANDARD_CALIBRATION_COLUMNS) != set(columns): + raise ValidationError( + f"unexpected column(s) in calibration classes file: {', '.join(sorted(set(columns) - set(STANDARD_CALIBRATION_COLUMNS)))}" + ) + + +def validate_calibration_variant_urns(db: Session, score_set: ScoreSet, variant_urns: pd.Series) -> None: + """ + Validate that all provided variant URNs exist in the given score set. + + Args: + db (Session): Database session for querying variants. + score_set (ScoreSet): The score set to validate variants against. + variant_urns (pd.Series): Series of variant URNs to validate. + + Raises: + ValidationError: If any variant URNs do not exist in the score set. + + Returns: + None: Function returns nothing if validation passes. + """ + existing_variant_urns = set( + db.scalars( + select(Variant.urn).where(Variant.score_set_id == score_set.id, Variant.urn.in_(variant_urns.tolist())) + ).all() + ) + + missing_variant_urns = set(variant_urns.tolist()) - existing_variant_urns + if missing_variant_urns: + raise ValidationError( + f"The following variant URNs do not exist in the score set: {', '.join(sorted(missing_variant_urns))}" + ) + + +def validate_calibration_classes( + calibration: score_calibration.ScoreCalibrationCreate | score_calibration.ScoreCalibrationModify, classes: pd.Series +) -> None: + """ + Validate that the functional classifications in a calibration match the provided classes. + + This function ensures that: + 1. The calibration has functional classifications defined + 2. All classes in the provided series are defined in the calibration + 3. All classes defined in the calibration are present in the provided series + + Args: + calibration: A ScoreCalibrationCreate or ScoreCalibrationModify object containing + functional classifications to validate against. + classes: A pandas Series containing class labels to validate. + + Raises: + ValueError: If the calibration does not have functional classifications defined. + ValidationError: If there are classes in the series that are not defined in the + calibration, or if there are classes defined in the calibration + that are missing from the series. + """ + if not calibration.functional_classifications: + raise ValueError("Calibration must have functional classifications defined for class validation.") + + defined_classes = {c.class_ for c in calibration.functional_classifications} + provided_classes = set(classes.tolist()) + + undefined_classes = provided_classes - defined_classes + if undefined_classes: + raise ValidationError( + f"The following classes are not defined in the calibration: {', '.join(sorted(undefined_classes))}" + ) + + unprovided_classes = defined_classes - provided_classes + if unprovided_classes: + raise ValidationError("Some defined classes in the calibration are missing from the classes file.") diff --git a/tests/validation/dataframe/test_calibration.py b/tests/validation/dataframe/test_calibration.py new file mode 100644 index 00000000..6a7a7676 --- /dev/null +++ b/tests/validation/dataframe/test_calibration.py @@ -0,0 +1,743 @@ +# ruff: noqa: E402 + +from unittest.mock import Mock, patch + +import pytest + +pytest.importorskip("psycopg2") + +import pandas as pd + +from mavedb.lib.validation.constants.general import calibration_class_column_name, calibration_variant_column_name +from mavedb.lib.validation.dataframe.calibration import ( + validate_and_standardize_calibration_classes_dataframe, + validate_calibration_classes, + validate_calibration_df_column_names, + validate_calibration_variant_urns, +) +from mavedb.lib.validation.exceptions import ValidationError +from mavedb.view_models import score_calibration + + +class TestValidateAndStandardizeCalibrationClassesDataframe: + """Test suite for validate_and_standardize_calibration_classes_dataframe function.""" + + @pytest.fixture + def mock_dependencies(self): + """Mock all external dependencies for the function.""" + with ( + patch("mavedb.lib.validation.dataframe.calibration.standardize_dataframe") as mock_standardize, + patch("mavedb.lib.validation.dataframe.calibration.validate_no_null_rows") as mock_validate_no_null, + patch("mavedb.lib.validation.dataframe.calibration.validate_variant_column") as mock_validate_variant, + patch("mavedb.lib.validation.dataframe.calibration.validate_data_column") as mock_validate_data, + ): + yield { + "standardize_dataframe": mock_standardize, + "validate_no_null_rows": mock_validate_no_null, + "validate_variant_column": mock_validate_variant, + "validate_data_column": mock_validate_data, + } + + def test_validate_and_standardize_calibration_classes_dataframe_success(self, mock_dependencies): + """Test successful validation and standardization.""" + mock_db = Mock() + mock_score_set = Mock() + mock_score_set.id = 123 + + mock_calibration = Mock() + mock_calibration.class_based = True + + input_df = pd.DataFrame( + { + calibration_variant_column_name.upper(): ["var1", "var2"], + calibration_class_column_name.upper(): ["A", "B"], + } + ) + standardized_df = pd.DataFrame( + {calibration_variant_column_name: ["var1", "var2"], calibration_class_column_name: ["A", "B"]} + ) + + mock_dependencies["standardize_dataframe"].return_value = standardized_df + + mock_scalars = Mock() + mock_scalars.all.return_value = ["var1", "var2"] + mock_db.scalars.return_value = mock_scalars + + mock_classification1 = Mock() + mock_classification1.class_ = "A" + mock_classification2 = Mock() + mock_classification2.class_ = "B" + mock_calibration.functional_classifications = [mock_classification1, mock_classification2] + + result = validate_and_standardize_calibration_classes_dataframe( + mock_db, mock_score_set, mock_calibration, input_df + ) + + assert result.equals(standardized_df) + mock_dependencies["standardize_dataframe"].assert_called_once() + mock_dependencies["validate_no_null_rows"].assert_called_once_with(standardized_df) + mock_dependencies["validate_variant_column"].assert_called_once() + mock_dependencies["validate_data_column"].assert_called_once() + + def test_validate_and_standardize_calibration_classes_dataframe_not_class_based(self): + """Test ValueError when calibration is not class-based.""" + mock_db = Mock() + mock_score_set = Mock() + mock_calibration = Mock() + mock_calibration.class_based = False + input_df = pd.DataFrame({"variant": ["var1"], "class": ["A"]}) + + with pytest.raises( + ValueError, + match="Calibration classes file can only be provided for functional classification calibrations.", + ): + validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) + + def test_validate_and_standardize_calibration_classes_dataframe_invalid_column_names(self, mock_dependencies): + """Test ValidationError when column validation fails.""" + mock_db = Mock() + mock_score_set = Mock() + mock_calibration = Mock() + mock_calibration.class_based = True + + input_df = pd.DataFrame({calibration_variant_column_name: ["var1"], "invalid": ["A"]}) + standardized_df = pd.DataFrame({calibration_variant_column_name: ["var1"], "invalid": ["A"]}) + + mock_dependencies["standardize_dataframe"].return_value = standardized_df + + with pytest.raises(ValidationError, match=f"missing required column: '{calibration_class_column_name}'"): + validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) + + def test_validate_and_standardize_calibration_classes_dataframe_null_rows(self, mock_dependencies): + """Test ValidationError when null rows validation fails.""" + mock_db = Mock() + mock_score_set = Mock() + mock_calibration = Mock() + mock_calibration.class_based = True + + input_df = pd.DataFrame({calibration_variant_column_name: ["var1"], calibration_class_column_name: ["A"]}) + standardized_df = pd.DataFrame( + {calibration_variant_column_name: ["var1"], calibration_class_column_name: ["A"]} + ) + + mock_dependencies["standardize_dataframe"].return_value = standardized_df + mock_dependencies["validate_no_null_rows"].side_effect = ValidationError("null rows detected") + + with pytest.raises(ValidationError, match="null rows detected"): + validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) + + def test_validate_and_standardize_calibration_classes_dataframe_invalid_variants(self, mock_dependencies): + """Test ValidationError when variant URN validation fails.""" + mock_db = Mock() + mock_score_set = Mock() + mock_score_set.id = 123 + mock_calibration = Mock() + mock_calibration.class_based = True + + input_df = pd.DataFrame({calibration_variant_column_name: ["var1"], calibration_class_column_name: ["A"]}) + standardized_df = pd.DataFrame( + {calibration_variant_column_name: ["var1"], calibration_class_column_name: ["A"]} + ) + + mock_dependencies["standardize_dataframe"].return_value = standardized_df + + mock_scalars = Mock() + mock_scalars.all.return_value = [] + mock_db.scalars.return_value = mock_scalars + + with pytest.raises(ValidationError, match="The following variant URNs do not exist in the score set: var1"): + validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) + + def test_validate_and_standardize_calibration_classes_dataframe_invalid_classes(self, mock_dependencies): + """Test ValidationError when class validation fails.""" + mock_db = Mock() + mock_score_set = Mock() + mock_score_set.id = 123 + mock_calibration = Mock() + mock_calibration.class_based = True + + input_df = pd.DataFrame({calibration_variant_column_name: ["var1"], calibration_class_column_name: ["A"]}) + standardized_df = pd.DataFrame( + {calibration_variant_column_name: ["var1"], calibration_class_column_name: ["A"]} + ) + + mock_dependencies["standardize_dataframe"].return_value = standardized_df + + mock_scalars = Mock() + mock_scalars.all.return_value = ["var1"] + mock_db.scalars.return_value = mock_scalars + + mock_calibration.functional_classifications = None + + with pytest.raises( + ValueError, match="Calibration must have functional classifications defined for class validation." + ): + validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) + + def test_validate_and_standardize_calibration_classes_dataframe_unexpected_column(self, mock_dependencies): + """Test ValidationError when unexpected column is present.""" + mock_db = Mock() + mock_score_set = Mock() + mock_calibration = Mock() + mock_calibration.class_based = True + + input_df = pd.DataFrame( + { + calibration_variant_column_name: ["var1"], + calibration_class_column_name: ["A"], + "extra1": ["X"], + "extra2": ["Y"], + } + ) + standardized_df = pd.DataFrame( + { + calibration_variant_column_name: ["var1"], + calibration_class_column_name: ["A"], + "extra1": ["X"], + "extra2": ["Y"], + } + ) + + mock_dependencies["standardize_dataframe"].return_value = standardized_df + + with pytest.raises(ValidationError, match="unexpected column\(s\) in calibration classes file: extra1, extra2"): + validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) + + def test_validate_and_standardize_calibration_classes_dataframe_variant_column_validation_fails( + self, mock_dependencies + ): + """Test ValidationError when variant column validation fails.""" + mock_db = Mock() + mock_score_set = Mock() + mock_calibration = Mock() + mock_calibration.class_based = True + + input_df = pd.DataFrame({calibration_variant_column_name: ["var1"], calibration_class_column_name: ["A"]}) + standardized_df = pd.DataFrame( + {calibration_variant_column_name: ["var1"], calibration_class_column_name: ["A"]} + ) + + mock_dependencies["standardize_dataframe"].return_value = standardized_df + mock_dependencies["validate_variant_column"].side_effect = ValidationError("invalid variant column") + + with pytest.raises(ValidationError, match="invalid variant column"): + validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) + + def test_validate_and_standardize_calibration_classes_dataframe_data_column_validation_fails( + self, mock_dependencies + ): + """Test ValidationError when data column validation fails.""" + mock_db = Mock() + mock_score_set = Mock() + mock_score_set.id = 123 + mock_calibration = Mock() + mock_calibration.class_based = True + + input_df = pd.DataFrame({calibration_variant_column_name: ["var1"], calibration_class_column_name: ["A"]}) + standardized_df = pd.DataFrame( + {calibration_variant_column_name: ["var1"], calibration_class_column_name: ["A"]} + ) + + mock_dependencies["standardize_dataframe"].return_value = standardized_df + mock_dependencies["validate_data_column"].side_effect = ValidationError("invalid data column") + + mock_scalars = Mock() + mock_scalars.all.return_value = ["var1"] + mock_db.scalars.return_value = mock_scalars + + with pytest.raises(ValidationError, match="invalid data column"): + validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) + + def test_validate_and_standardize_calibration_classes_dataframe_mixed_case_columns(self, mock_dependencies): + """Test successful validation with mixed case column names.""" + mock_db = Mock() + mock_score_set = Mock() + mock_score_set.id = 123 + mock_calibration = Mock() + mock_calibration.class_based = True + + input_df = pd.DataFrame( + {calibration_variant_column_name: ["var1"], calibration_class_column_name.upper(): ["A"]} + ) + standardized_df = pd.DataFrame( + {calibration_variant_column_name: ["var1"], calibration_class_column_name: ["A"]} + ) + + mock_dependencies["standardize_dataframe"].return_value = standardized_df + + mock_scalars = Mock() + mock_scalars.all.return_value = ["var1"] + mock_db.scalars.return_value = mock_scalars + + mock_classification = Mock() + mock_classification.class_ = "A" + mock_calibration.functional_classifications = [mock_classification] + + result = validate_and_standardize_calibration_classes_dataframe( + mock_db, mock_score_set, mock_calibration, input_df + ) + + assert result.equals(standardized_df) + mock_dependencies["validate_data_column"].assert_called_once_with( + standardized_df[calibration_class_column_name], force_numeric=False + ) + + def test_validate_and_standardize_calibration_classes_dataframe_with_score_calibration_modify( + self, mock_dependencies + ): + """Test function works with ScoreCalibrationModify object.""" + mock_db = Mock() + mock_score_set = Mock() + mock_score_set.id = 123 + mock_calibration = Mock(spec=score_calibration.ScoreCalibrationModify) + mock_calibration.class_based = True + + input_df = pd.DataFrame({calibration_variant_column_name: ["var1"], calibration_class_column_name: ["A"]}) + standardized_df = pd.DataFrame( + {calibration_variant_column_name: ["var1"], calibration_class_column_name: ["A"]} + ) + + mock_dependencies["standardize_dataframe"].return_value = standardized_df + + mock_scalars = Mock() + mock_scalars.all.return_value = ["var1"] + mock_db.scalars.return_value = mock_scalars + + mock_classification = Mock() + mock_classification.class_ = "A" + mock_calibration.functional_classifications = [mock_classification] + + result = validate_and_standardize_calibration_classes_dataframe( + mock_db, mock_score_set, mock_calibration, input_df + ) + + assert result.equals(standardized_df) + + def test_validate_and_standardize_calibration_classes_dataframe_empty_dataframe(self, mock_dependencies): + """Test ValidationError with empty dataframe.""" + mock_db = Mock() + mock_score_set = Mock() + mock_calibration = Mock() + mock_calibration.class_based = True + + input_df = pd.DataFrame() + standardized_df = pd.DataFrame() + + mock_dependencies["standardize_dataframe"].return_value = standardized_df + + with pytest.raises(ValidationError, match=f"missing required column: '{calibration_variant_column_name}'"): + validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) + + +class TestValidateCalibrationDfColumnNames: + """Test suite for validate_calibration_df_column_names function.""" + + def test_validate_calibration_df_column_names_success(self): + """Test successful validation with correct column names.""" + df = pd.DataFrame( + {calibration_variant_column_name: ["var1", "var2"], calibration_class_column_name: ["A", "B"]} + ) + + validate_calibration_df_column_names(df) + + def test_validate_calibration_df_column_names_case_insensitive(self): + """Test successful validation with different case column names.""" + df = pd.DataFrame( + { + calibration_variant_column_name.upper(): ["var1", "var2"], + calibration_class_column_name.upper(): ["A", "B"], + } + ) + + validate_calibration_df_column_names(df) + + def test_validate_calibration_df_column_names_mixed_case(self): + """Test successful validation with mixed case column names.""" + df = pd.DataFrame( + { + calibration_variant_column_name.capitalize(): ["var1", "var2"], + calibration_class_column_name.capitalize(): ["A", "B"], + } + ) + + validate_calibration_df_column_names(df) + + def test_validate_calibration_df_column_names_non_string_columns(self): + """Test ValidationError when column names are not strings.""" + df = pd.DataFrame({123: ["var1", "var2"], calibration_class_column_name: ["A", "B"]}) + + # Act & Assert + with pytest.raises(ValidationError, match="column names must be strings"): + validate_calibration_df_column_names(df) + + def test_validate_calibration_df_column_names_empty_column_name(self): + """Test ValidationError when column names are empty.""" + df = pd.DataFrame(columns=["", calibration_variant_column_name]) + + # Act & Assert + with pytest.raises(ValidationError, match="column names cannot be empty or whitespace"): + validate_calibration_df_column_names(df) + + def test_validate_calibration_df_column_names_whitespace_column_name(self): + """Test ValidationError when column names contain only whitespace.""" + df = pd.DataFrame(columns=[" ", calibration_class_column_name]) + + # Act & Assert + with pytest.raises(ValidationError, match="column names cannot be empty or whitespace"): + validate_calibration_df_column_names(df) + + def test_validate_calibration_df_column_names_tab_whitespace(self): + """Test ValidationError when column names contain only tab characters.""" + df = pd.DataFrame(columns=["\t\t", calibration_class_column_name]) + + # Act & Assert + with pytest.raises(ValidationError, match="column names cannot be empty or whitespace"): + validate_calibration_df_column_names(df) + + def test_validate_calibration_df_column_names_missing_variant_column(self): + """Test ValidationError when variant column is missing.""" + df = pd.DataFrame({calibration_class_column_name: ["A", "B"], "other": ["X", "Y"]}) + + # Act & Assert + with pytest.raises(ValidationError, match=f"missing required column: '{calibration_variant_column_name}'"): + validate_calibration_df_column_names(df) + + def test_validate_calibration_df_column_names_missing_class_column(self): + """Test ValidationError when class column is missing.""" + df = pd.DataFrame({calibration_variant_column_name: ["var1", "var2"], "other": ["X", "Y"]}) + + # Act & Assert + with pytest.raises(ValidationError, match=f"missing required column: '{calibration_class_column_name}'"): + validate_calibration_df_column_names(df) + + def test_validate_calibration_df_column_names_missing_both_required_columns(self): + """Test ValidationError when both required columns are missing.""" + df = pd.DataFrame({"other1": ["X", "Y"], "other2": ["A", "B"]}) + + # Act & Assert + with pytest.raises(ValidationError, match=f"missing required column: '{calibration_variant_column_name}'"): + validate_calibration_df_column_names(df) + + def test_validate_calibration_df_column_names_unexpected_extra_columns(self): + """Test ValidationError when unexpected columns are present.""" + df = pd.DataFrame( + { + calibration_variant_column_name: ["var1", "var2"], + calibration_class_column_name: ["A", "B"], + "extra_column": ["X", "Y"], + } + ) + + # Act & Assert + with pytest.raises(ValidationError, match="unexpected column\(s\) in calibration classes file: extra_column"): + validate_calibration_df_column_names(df) + + def test_validate_calibration_df_column_names_fewer_than_expected_columns(self): + """Test ValidationError when fewer columns than expected are present.""" + df = pd.DataFrame({calibration_variant_column_name: ["var1", "var2"]}) + + # Act & Assert + with pytest.raises(ValidationError, match=f"missing required column: '{calibration_class_column_name}'"): + validate_calibration_df_column_names(df) + + def test_validate_calibration_df_column_names_duplicate_columns_different_case(self): + """Test ValidationError when duplicate columns exist with different cases.""" + df = pd.DataFrame( + columns=[ + calibration_variant_column_name, + calibration_variant_column_name.upper(), + calibration_class_column_name, + ] + ) + + # Act & Assert + with pytest.raises(ValidationError, match="duplicate column names are not allowed \(case-insensitive\)"): + validate_calibration_df_column_names(df) + + def test_validate_calibration_df_column_names_empty_dataframe(self): + """Test ValidationError when dataframe has no columns.""" + df = pd.DataFrame() + + # Act & Assert + with pytest.raises(ValidationError, match=f"missing required column: '{calibration_variant_column_name}'"): + validate_calibration_df_column_names(df) + + def test_validate_calibration_df_column_names_with_numeric_and_string_mix(self): + """Test ValidationError when columns mix numeric and string types.""" + df = pd.DataFrame(columns=["variant", 42.5]) + + # Act & Assert + with pytest.raises(ValidationError, match="column names must be strings"): + validate_calibration_df_column_names(df) + + def test_validate_calibration_df_column_names_newline_in_whitespace(self): + """Test ValidationError when column names contain newline characters.""" + df = pd.DataFrame(columns=["\n\n", "class"]) + + # Act & Assert + with pytest.raises(ValidationError, match="column names cannot be empty or whitespace"): + validate_calibration_df_column_names(df) + + +class TestValidateCalibrationVariantUrns: + """Test suite for validate_calibration_variant_urns function.""" + + def test_validate_calibration_variant_urns_success(self): + """Test successful validation when all variant URNs exist in score set.""" + mock_db = Mock() + mock_scalars = Mock() + mock_scalars.all.return_value = ["urn:variant:1", "urn:variant:2", "urn:variant:3"] + mock_db.scalars.return_value = mock_scalars + + mock_score_set = Mock() + mock_score_set.id = 123 + + variant_urns = pd.Series(["urn:variant:1", "urn:variant:2", "urn:variant:3"]) + + validate_calibration_variant_urns(mock_db, mock_score_set, variant_urns) + + mock_db.scalars.assert_called_once() + + def test_validate_calibration_variant_urns_missing_variants(self): + """Test ValidationError when some variant URNs don't exist in score set.""" + mock_db = Mock() + mock_scalars = Mock() + mock_scalars.all.return_value = ["urn:variant:1", "urn:variant:2"] + mock_db.scalars.return_value = mock_scalars + + mock_score_set = Mock() + mock_score_set.id = 123 + + variant_urns = pd.Series(["urn:variant:1", "urn:variant:2", "urn:variant:3"]) + + # Act & Assert + with pytest.raises( + ValidationError, match="The following variant URNs do not exist in the score set: urn:variant:3" + ): + validate_calibration_variant_urns(mock_db, mock_score_set, variant_urns) + + def test_validate_calibration_variant_urns_multiple_missing_variants(self): + """Test ValidationError when multiple variant URNs don't exist in score set.""" + mock_db = Mock() + mock_scalars = Mock() + mock_scalars.all.return_value = ["urn:variant:1"] + mock_db.scalars.return_value = mock_scalars + + mock_score_set = Mock() + mock_score_set.id = 456 + + variant_urns = pd.Series(["urn:variant:1", "urn:variant:2", "urn:variant:3"]) + + # Act & Assert + with pytest.raises( + ValidationError, + match="The following variant URNs do not exist in the score set: urn:variant:2, urn:variant:3", + ): + validate_calibration_variant_urns(mock_db, mock_score_set, variant_urns) + + def test_validate_calibration_variant_urns_all_missing(self): + """Test ValidationError when all variant URNs are missing from score set.""" + mock_db = Mock() + mock_scalars = Mock() + mock_scalars.all.return_value = [] + mock_db.scalars.return_value = mock_scalars + + mock_score_set = Mock() + mock_score_set.id = 789 + + variant_urns = pd.Series(["urn:variant:1", "urn:variant:2"]) + + # Act & Assert + with pytest.raises( + ValidationError, + match="The following variant URNs do not exist in the score set: urn:variant:1, urn:variant:2", + ): + validate_calibration_variant_urns(mock_db, mock_score_set, variant_urns) + + def test_validate_calibration_variant_urns_empty_series(self): + """Test successful validation with empty variant URNs series.""" + mock_db = Mock() + mock_scalars = Mock() + mock_scalars.all.return_value = [] + mock_db.scalars.return_value = mock_scalars + + mock_score_set = Mock() + mock_score_set.id = 123 + + variant_urns = pd.Series([], dtype=object) + + # Act & Assert - should not raise any exception + validate_calibration_variant_urns(mock_db, mock_score_set, variant_urns) + + def test_validate_calibration_variant_urns_single_variant(self): + """Test successful validation with single variant URN.""" + mock_db = Mock() + mock_scalars = Mock() + mock_scalars.all.return_value = ["urn:variant:single"] + mock_db.scalars.return_value = mock_scalars + + mock_score_set = Mock() + mock_score_set.id = 123 + + variant_urns = pd.Series(["urn:variant:single"]) + + # Act & Assert - should not raise any exception + validate_calibration_variant_urns(mock_db, mock_score_set, variant_urns) + + def test_validate_calibration_variant_urns_duplicate_urns_in_series(self): + """Test validation with duplicate URNs in input series.""" + mock_db = Mock() + mock_scalars = Mock() + mock_scalars.all.return_value = ["urn:variant:1", "urn:variant:2"] + mock_db.scalars.return_value = mock_scalars + + mock_score_set = Mock() + mock_score_set.id = 123 + + variant_urns = pd.Series(["urn:variant:1", "urn:variant:2", "urn:variant:1", "urn:variant:2"]) + + # Act & Assert - should not raise any exception + validate_calibration_variant_urns(mock_db, mock_score_set, variant_urns) + + def test_validate_calibration_variant_urns_database_query_parameters(self): + """Test that database query is constructed with correct parameters.""" + mock_db = Mock() + mock_scalars = Mock() + mock_scalars.all.return_value = ["urn:variant:1", "urn:variant:2"] + mock_db.scalars.return_value = mock_scalars + + mock_score_set = Mock() + mock_score_set.id = 999 + + variant_urns = pd.Series(["urn:variant:1", "urn:variant:2"]) + + validate_calibration_variant_urns(mock_db, mock_score_set, variant_urns) + + mock_db.scalars.assert_called_once() + + +class TestValidateCalibrationClasses: + """Test suite for validate_calibration_classes function.""" + + def test_validate_calibration_classes_success(self): + """Test successful validation when all classes match.""" + mock_classification1 = Mock() + mock_classification1.class_ = "class_a" + mock_classification2 = Mock() + mock_classification2.class_ = "class_b" + + calibration = Mock(spec=score_calibration.ScoreCalibrationCreate) + calibration.functional_classifications = [mock_classification1, mock_classification2] + + classes = pd.Series(["class_a", "class_b", "class_a"]) + + validate_calibration_classes(calibration, classes) + + def test_validate_calibration_classes_no_functional_classifications(self): + """Test ValueError when calibration has no functional classifications.""" + calibration = Mock(spec=score_calibration.ScoreCalibrationCreate) + calibration.functional_classifications = None + classes = pd.Series(["class_a", "class_b"]) + + with pytest.raises( + ValueError, match="Calibration must have functional classifications defined for class validation." + ): + validate_calibration_classes(calibration, classes) + + def test_validate_calibration_classes_empty_functional_classifications(self): + """Test ValueError when calibration has empty functional classifications.""" + calibration = Mock(spec=score_calibration.ScoreCalibrationCreate) + calibration.functional_classifications = [] + classes = pd.Series(["class_a", "class_b"]) + + with pytest.raises( + ValueError, match="Calibration must have functional classifications defined for class validation." + ): + validate_calibration_classes(calibration, classes) + + def test_validate_calibration_classes_undefined_classes_in_series(self): + """Test ValidationError when series contains undefined classes.""" + mock_classification = Mock() + mock_classification.class_ = "class_a" + + calibration = Mock(spec=score_calibration.ScoreCalibrationCreate) + calibration.functional_classifications = [mock_classification] + + classes = pd.Series(["class_a", "class_b", "class_c"]) + + with pytest.raises( + ValidationError, match="The following classes are not defined in the calibration: class_b, class_c" + ): + validate_calibration_classes(calibration, classes) + + def test_validate_calibration_classes_missing_defined_classes(self): + """Test ValidationError when defined classes are missing from series.""" + mock_classification1 = Mock() + mock_classification1.class_ = "class_a" + mock_classification2 = Mock() + mock_classification2.class_ = "class_b" + mock_classification3 = Mock() + mock_classification3.class_ = "class_c" + + calibration = Mock(spec=score_calibration.ScoreCalibrationCreate) + calibration.functional_classifications = [mock_classification1, mock_classification2, mock_classification3] + + classes = pd.Series(["class_a", "class_b"]) + + with pytest.raises( + ValidationError, match="Some defined classes in the calibration are missing from the classes file." + ): + validate_calibration_classes(calibration, classes) + + def test_validate_calibration_classes_with_modify_object(self): + """Test function works with ScoreCalibrationModify object.""" + mock_classification = Mock() + mock_classification.class_ = "class_a" + + calibration = Mock(spec=score_calibration.ScoreCalibrationModify) + calibration.functional_classifications = [mock_classification] + + classes = pd.Series(["class_a"]) + + validate_calibration_classes(calibration, classes) + + def test_validate_calibration_classes_empty_series(self): + """Test ValidationError when classes series is empty but calibration has classifications.""" + mock_classification = Mock() + mock_classification.class_ = "class_a" + + calibration = Mock(spec=score_calibration.ScoreCalibrationCreate) + calibration.functional_classifications = [mock_classification] + + classes = pd.Series([], dtype=object) + + with pytest.raises( + ValidationError, match="Some defined classes in the calibration are missing from the classes file." + ): + validate_calibration_classes(calibration, classes) + + def test_validate_calibration_classes_duplicate_classes_in_series(self): + """Test successful validation with duplicate classes in series.""" + mock_classification1 = Mock() + mock_classification1.class_ = "class_a" + mock_classification2 = Mock() + mock_classification2.class_ = "class_b" + + calibration = Mock(spec=score_calibration.ScoreCalibrationCreate) + calibration.functional_classifications = [mock_classification1, mock_classification2] + + classes = pd.Series(["class_a", "class_a", "class_b", "class_b", "class_a"]) + + validate_calibration_classes(calibration, classes) + + def test_validate_calibration_classes_single_class(self): + """Test successful validation with single class.""" + mock_classification = Mock() + mock_classification.class_ = "single_class" + + calibration = Mock(spec=score_calibration.ScoreCalibrationCreate) + calibration.functional_classifications = [mock_classification] + + classes = pd.Series(["single_class", "single_class"]) + + validate_calibration_classes(calibration, classes) From aec2010a7d26e4f59d9f573eaf72de73d5c58fc6 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 24 Nov 2025 11:49:41 -0800 Subject: [PATCH 12/24] feat: allow creation and modification of class based calibrations - Added router functionality for validation and standardization of class based calibration files. - Added lib functionality for creation/modification of class based calibrations. - Invoked lib functionality from routers to allow client creation/modification of class based calibrations. - Introduced a new CSV file `calibration_classes.csv` containing variant URNs and their corresponding class names. - Implemented tests for creating and updating score calibrations using class-based classifications. - Enhanced existing test suite with parameterized tests to validate score calibration creation and modification. - Ensured that the response includes correct functional classifications and variant counts. --- src/mavedb/lib/score_calibrations.py | 307 +++++---- src/mavedb/lib/score_sets.py | 9 +- src/mavedb/routers/score_calibrations.py | 224 ++++++- tests/lib/test_score_calibrations.py | 724 +++++++++++++++++++-- tests/routers/data/calibration_classes.csv | 4 + tests/routers/test_score_calibrations.py | 176 +++++ 6 files changed, 1259 insertions(+), 185 deletions(-) create mode 100644 tests/routers/data/calibration_classes.csv diff --git a/src/mavedb/lib/score_calibrations.py b/src/mavedb/lib/score_calibrations.py index 1fa00068..82bff826 100644 --- a/src/mavedb/lib/score_calibrations.py +++ b/src/mavedb/lib/score_calibrations.py @@ -1,13 +1,15 @@ """Utilities for building and mutating score calibration ORM objects.""" import math -from typing import Union +from typing import Optional, Union +import pandas as pd from sqlalchemy import Float, and_, select from sqlalchemy.orm import Session from mavedb.lib.acmg import find_or_create_acmg_classification from mavedb.lib.identifiers import find_or_create_publication_identifier +from mavedb.lib.validation.constants.general import calibration_class_column_name, calibration_variant_column_name from mavedb.lib.validation.utilities import inf_or_float from mavedb.models.enums.score_calibration_relation import ScoreCalibrationRelation from mavedb.models.score_calibration import ScoreCalibration @@ -25,6 +27,7 @@ def create_functional_classification( score_calibration.FunctionalClassificationCreate, score_calibration.FunctionalClassificationModify ], containing_calibration: ScoreCalibration, + variant_classes: Optional[dict[str, list[str]]] = None, ) -> ScoreCalibrationFunctionalClassification: """ Create a functional classification entity for score calibration. @@ -38,6 +41,9 @@ def create_functional_classification( Input data containing the functional range parameters including label, description, range bounds, inclusivity flags, and optional ACMG classification information. + containing_calibration (ScoreCalibration): The ScoreCalibration instance. + variant_classes (Optional[dict[str, list[str]]]): Optional dictionary mapping variant classes + to their corresponding variant identifiers. Returns: ScoreCalibrationFunctionalClassification: The newly created functional @@ -63,6 +69,7 @@ def create_functional_classification( label=functional_range_create.label, description=functional_range_create.description, range=functional_range_create.range, + class_=functional_range_create.class_, inclusive_lower_bound=functional_range_create.inclusive_lower_bound, inclusive_upper_bound=functional_range_create.inclusive_upper_bound, acmg_classification=acmg_classification, @@ -73,14 +80,20 @@ def create_functional_classification( calibration=containing_calibration, ) - contained_variants = variants_for_functional_classification(db, functional_classification, use_sql=True) + contained_variants = variants_for_functional_classification( + db, functional_classification, variant_classes=variant_classes, use_sql=True + ) functional_classification.variants = contained_variants return functional_classification async def _create_score_calibration( - db: Session, calibration_create: score_calibration.ScoreCalibrationCreate, user: User + db: Session, + calibration_create: score_calibration.ScoreCalibrationCreate, + user: User, + variant_classes: Optional[dict[str, list[str]]] = None, + containing_score_set: Optional[ScoreSet] = None, ) -> ScoreCalibration: """ Create a ScoreCalibration ORM instance (not yet persisted) together with its @@ -170,9 +183,13 @@ async def _create_score_calibration( modified_by=user, ) # type: ignore[call-arg] + if containing_score_set: + calibration.score_set = containing_score_set + calibration.score_set_id = containing_score_set.id + for functional_range_create in calibration_create.functional_classifications or []: persisted_functional_range = create_functional_classification( - db, functional_range_create, containing_calibration=calibration + db, functional_range_create, containing_calibration=calibration, variant_classes=variant_classes ) db.add(persisted_functional_range) calibration.functional_classifications.append(persisted_functional_range) @@ -181,7 +198,10 @@ async def _create_score_calibration( async def create_score_calibration_in_score_set( - db: Session, calibration_create: score_calibration.ScoreCalibrationCreate, user: User + db: Session, + calibration_create: score_calibration.ScoreCalibrationCreate, + user: User, + variant_classes: Optional[dict[str, list[str]]] = None, ) -> ScoreCalibration: """ Create a new score calibration and associate it with an existing score set. @@ -197,6 +217,8 @@ async def create_score_calibration_in_score_set( object containing the fields required to create a score calibration. Must include a non-empty score_set_urn. user (User): Authenticated user information used for auditing + variant_classes (Optional[dict[str, list[str]]]): Optional dictionary mapping variant classes + to their corresponding variant identifiers. Returns: ScoreCalibration: The newly created and persisted score calibration object with its @@ -219,8 +241,7 @@ async def create_score_calibration_in_score_set( raise ValueError("score_set_urn must be provided to create a score calibration within a score set.") containing_score_set = db.query(ScoreSet).where(ScoreSet.urn == calibration_create.score_set_urn).one() - calibration = await _create_score_calibration(db, calibration_create, user) - calibration.score_set = containing_score_set + calibration = await _create_score_calibration(db, calibration_create, user, variant_classes, containing_score_set) if user.username in [contributor.orcid_id for contributor in containing_score_set.contributors] + [ containing_score_set.created_by.username, @@ -235,7 +256,10 @@ async def create_score_calibration_in_score_set( async def create_score_calibration( - db: Session, calibration_create: score_calibration.ScoreCalibrationCreate, user: User + db: Session, + calibration_create: score_calibration.ScoreCalibrationCreate, + user: User, + variant_classes: Optional[dict[str, list[str]]] = None, ) -> ScoreCalibration: """ Asynchronously create and persist a new ScoreCalibration record. @@ -253,6 +277,8 @@ async def create_score_calibration( score set identifiers). user : User Authenticated user context; the user to be recorded for audit + variant_classes (Optional[dict[str, list[str]]]): Optional dictionary mapping variant classes + to their corresponding variant identifiers. Returns ------- @@ -284,7 +310,9 @@ async def create_score_calibration( if calibration_create.score_set_urn: raise ValueError("score_set_urn must not be provided to create a score calibration outside a score set.") - created_calibration = await _create_score_calibration(db, calibration_create, user) + created_calibration = await _create_score_calibration( + db, calibration_create, user, variant_classes, containing_score_set=None + ) db.add(created_calibration) return created_calibration @@ -295,76 +323,79 @@ async def modify_score_calibration( calibration: ScoreCalibration, calibration_update: score_calibration.ScoreCalibrationModify, user: User, + variant_classes: Optional[dict[str, list[str]]] = None, ) -> ScoreCalibration: """ - Asynchronously modify an existing ScoreCalibration record and its related publication - identifier associations. - - This function: - 1. Validates that a score_set_urn is provided in the update model (raises ValueError if absent). - 2. Loads (via SELECT ... WHERE urn = :score_set_urn) the ScoreSet that will contain the calibration. - 3. Reconciles publication identifier associations for three relation categories: - - threshold_sources -> ScoreCalibrationRelation.threshold - - classification_sources -> ScoreCalibrationRelation.classification - - method_sources -> ScoreCalibrationRelation.method - For each provided source identifier: - * Calls find_or_create_publication_identifier to obtain (or persist) the identifier row. - * Preserves an existing association if already present. - * Creates a new association if missing. - Any previously existing associations not referenced in the update are deleted from the session. - 4. Updates mutable scalar fields on the calibration instance from calibration_update, excluding: - threshold_sources, classification_sources, method_sources, created_at, created_by, - modified_at, modified_by. - 5. Reassigns the calibration to the resolved ScoreSet, replaces its association collection, - and stamps modified_by with the requesting user. - 6. Adds the modified calibration back into the SQLAlchemy session and returns it (no commit). - - Parameters - ---------- - db : Session - An active SQLAlchemy session (synchronous engine session used within an async context). - calibration : ScoreCalibration - The existing calibration ORM instance to be modified (must be persistent or pending). - del carrying updated field values plus source identifier lists: - - score_set_urn (required) - - threshold_sources, classification_sources, method_sources (iterables of identifier objects) - - Additional mutable calibration attributes. - user : User - Context for the authenticated user; the user to be recorded for audit. - - Returns - ------- - ScoreCalibration - The in-memory (and session-added) updated calibration instance. Changes are not committed. - - Raises - ------ - ValueError - If score_set_urn is missing in the update model. - sqlalchemy.orm.exc.NoResultFound - If no ScoreSet exists with the provided URN. - sqlalchemy.orm.exc.MultipleResultsFound - If more than one ScoreSet matches the provided URN. - Any exception raised by find_or_create_publication_identifier - If identifier resolution/creation fails. - - Side Effects - ------------ - - Issues SELECT statements for the ScoreSet and publication identifiers. - - May INSERT new publication identifiers and association rows. - - May DELETE association rows no longer referenced. - - Mutates the provided calibration object in-place. - - Concurrency / Consistency Notes - ------------------------------- - The reconciliation of associations assumes no concurrent modification of the same calibration's - association set within the active transaction. To prevent races leading to duplicate associations, - enforce appropriate transaction isolation or unique constraints at the database level. - - Commit Responsibility - --------------------- - This function does NOT call commit or flush explicitly; the caller is responsible for committing - the session to persist changes. + Asynchronously modify an existing ScoreCalibration record and its related publication + identifier associations. + + This function: + 1. Validates that a score_set_urn is provided in the update model (raises ValueError if absent). + 2. Loads (via SELECT ... WHERE urn = :score_set_urn) the ScoreSet that will contain the calibration. + 3. Reconciles publication identifier associations for three relation categories: + - threshold_sources -> ScoreCalibrationRelation.threshold + - classification_sources -> ScoreCalibrationRelation.classification + - method_sources -> ScoreCalibrationRelation.method + For each provided source identifier: + * Calls find_or_create_publication_identifier to obtain (or persist) the identifier row. + * Preserves an existing association if already present. + * Creates a new association if missing. + Any previously existing associations not referenced in the update are deleted from the session. + 4. Updates mutable scalar fields on the calibration instance from calibration_update, excluding: + threshold_sources, classification_sources, method_sources, created_at, created_by, + modified_at, modified_by. + 5. Reassigns the calibration to the resolved ScoreSet, replaces its association collection, + and stamps modified_by with the requesting user. + 6. Adds the modified calibration back into the SQLAlchemy session and returns it (no commit). + + Parameters + ---------- + db : Session + An active SQLAlchemy session (synchronous engine session used within an async context). + calibration : ScoreCalibration + The existing calibration ORM instance to be modified (must be persistent or pending). + calibration_update : score_calibration.ScoreCalibrationModify + - score_set_urn (required) + - threshold_sources, classification_sources, method_sources (iterables of identifier objects) + - Additional mutable calibration attributes. + user : User + Context for the authenticated user; the user to be recorded for audit. + variant_classes (Optional[dict[str, list[str]]]): Optional dictionary mapping variant classes + to their corresponding variant identifiers. + + Returns + ------- + ScoreCalibration + The in-memory (and session-added) updated calibration instance. Changes are not committed. + + Raises + ------ + ValueError + If score_set_urn is missing in the update model. + sqlalchemy.orm.exc.NoResultFound + If no ScoreSet exists with the provided URN. + sqlalchemy.orm.exc.MultipleResultsFound + If more than one ScoreSet matches the provided URN. + Any exception raised by find_or_create_publication_identifier + If identifier resolution/creation fails. + + Side Effects + ------------ + - Issues SELECT statements for the ScoreSet and publication identifiers. + - May INSERT new publication identifiers and association rows. + - May DELETE association rows no longer referenced. + - Mutates the provided calibration object in-place. + + Concurrency / Consistency Notes + ------------------------------- + The reconciliation of associations assumes no concurrent modification of the same calibration's + association set within the active transaction. To prevent races leading to duplicate associations, + enforce appropriate transaction isolation or unique constraints at the database level. + + Commit Responsibility + --------------------- + This function does NOT call commit or flush explicitly; the caller is responsible for committing + the session to persist changes. """ if not calibration_update.score_set_urn: @@ -411,6 +442,7 @@ async def modify_score_calibration( for functional_classification in calibration.functional_classifications: db.delete(functional_classification) calibration.functional_classifications.clear() + db.flush() db.refresh(calibration) @@ -429,12 +461,13 @@ async def modify_score_calibration( setattr(calibration, attr, value) calibration.score_set = containing_score_set + calibration.score_set_id = containing_score_set.id calibration.publication_identifier_associations = updated_assocs calibration.modified_by = user for functional_range_update in calibration_update.functional_classifications or []: persisted_functional_range = create_functional_classification( - db, functional_range_update, containing_calibration=calibration + db, functional_range_update, variant_classes=variant_classes, containing_calibration=calibration ) db.add(persisted_functional_range) calibration.functional_classifications.append(persisted_functional_range) @@ -612,9 +645,11 @@ def delete_score_calibration(db: Session, calibration: ScoreCalibration) -> None def variants_for_functional_classification( db: Session, functional_classification: ScoreCalibrationFunctionalClassification, + variant_classes: Optional[dict[str, list[str]]] = None, use_sql: bool = False, ) -> list[Variant]: - """Return variants in the parent score set whose numeric score falls inside the + """ + Return variants in the parent score set whose numeric score falls inside the functional classification's range. The variant score is extracted from the JSONB ``Variant.data`` field using @@ -629,6 +664,9 @@ def variants_for_functional_classification( Active SQLAlchemy session. functional_classification : ScoreCalibrationFunctionalClassification The ORM row defining the interval to test against. + variant_classes : Optional[dict[str, list[str]]] + If provided, a dictionary mapping variant classes to their corresponding variant identifiers + to use for classification rather than the range property of the functional_classification. use_sql : bool When True, perform filtering in the database using JSONB extraction and range predicates; falls back to Python filtering if an error occurs. @@ -648,34 +686,39 @@ def variants_for_functional_classification( * If ``functional_classification.range`` is ``None`` an empty list is returned immediately. """ - if not functional_classification.range: - return [] - # Resolve score set id from attached calibration (relationship may be lazy) score_set_id = functional_classification.calibration.score_set_id # type: ignore[attr-defined] - if use_sql: try: # Build score extraction expression: data['score_data']['score']::text::float score_expr = Variant.data["score_data"]["score"].astext.cast(Float) - lower_raw, upper_raw = functional_classification.range - - # Convert 'inf' sentinels (or None) to float infinities for condition omission. - lower_bound = inf_or_float(lower_raw, lower=True) - upper_bound = inf_or_float(upper_raw, lower=False) - conditions = [Variant.score_set_id == score_set_id] - if not math.isinf(lower_bound): - if functional_classification.inclusive_lower_bound: - conditions.append(score_expr >= lower_bound) - else: - conditions.append(score_expr > lower_bound) - if not math.isinf(upper_bound): - if functional_classification.inclusive_upper_bound: - conditions.append(score_expr <= upper_bound) - else: - conditions.append(score_expr < upper_bound) + if variant_classes is not None and functional_classification.class_ is not None: + variant_urns = variant_classes.get(functional_classification.class_, []) + conditions.append(Variant.urn.in_(variant_urns)) + + elif functional_classification.range is not None and len(functional_classification.range) == 2: + lower_raw, upper_raw = functional_classification.range + + # Convert 'inf' sentinels (or None) to float infinities for condition omission. + lower_bound = inf_or_float(lower_raw, lower=True) + upper_bound = inf_or_float(upper_raw, lower=False) + + if not math.isinf(lower_bound): + if functional_classification.inclusive_lower_bound: + conditions.append(score_expr >= lower_bound) + else: + conditions.append(score_expr > lower_bound) + if not math.isinf(upper_bound): + if functional_classification.inclusive_upper_bound: + conditions.append(score_expr <= upper_bound) + else: + conditions.append(score_expr < upper_bound) + + else: + # No usable classification mechanism; return empty list. + return [] stmt = select(Variant).where(and_(*conditions)) return list(db.execute(stmt).scalars()) @@ -688,21 +731,63 @@ def variants_for_functional_classification( variants = db.execute(select(Variant).where(Variant.score_set_id == score_set_id)).scalars().all() matches: list[Variant] = [] for v in variants: - try: - container = v.data.get("score_data") if isinstance(v.data, dict) else None - if not container or not isinstance(container, dict): - continue + if variant_classes is not None and functional_classification.class_ is not None: + variant_urns = variant_classes.get(functional_classification.class_, []) + if v.urn in variant_urns: + matches.append(v) - raw = container.get("score") - if raw is None: - continue + elif functional_classification.range is not None and len(functional_classification.range) == 2: + try: + container = v.data.get("score_data") if isinstance(v.data, dict) else None + if not container or not isinstance(container, dict): + continue - score = float(raw) + raw = container.get("score") + if raw is None: + continue - except Exception: # noqa: BLE001 - continue + score = float(raw) - if functional_classification.score_is_contained_in_range(score): - matches.append(v) + except Exception: # noqa: BLE001 + continue + + if functional_classification.score_is_contained_in_range(score): + matches.append(v) return matches + + +def variant_classification_df_to_dict( + df: pd.DataFrame, +) -> dict[str, list[str]]: + """ + Convert a DataFrame of variant classifications into a dictionary mapping + functional class labels to lists of distinct variant URNs. + + The input DataFrame is expected to have at least two columns: + - The unique identifier for each variant (given by calibration_variant_column_name). + - The functional classification label for each variant (given by calibration_class_column_name). + + Parameters + ---------- + df : pd.DataFrame + DataFrame containing variant classifications with 'variant_urn' and + 'functional_class' columns. + + Returns + ------- + dict[str, list[str]] + A dictionary where keys are functional class labels and values are lists + of distinct variant URNs belonging to each class. + """ + classification_dict: dict[str, list[str]] = {} + for _, row in df.iterrows(): + variant_urn = row[calibration_variant_column_name] + functional_class = row[calibration_class_column_name] + + if functional_class not in classification_dict: + classification_dict[functional_class] = [] + + classification_dict[functional_class].append(variant_urn) + + return {k: list(set(v)) for k, v in classification_dict.items()} diff --git a/src/mavedb/lib/score_sets.py b/src/mavedb/lib/score_sets.py index 190d7b42..c019e8fc 100644 --- a/src/mavedb/lib/score_sets.py +++ b/src/mavedb/lib/score_sets.py @@ -1100,7 +1100,7 @@ def bulk_create_urns(n, score_set, reset_counter=False) -> list[str]: return child_urns -def csv_data_to_df(file_data: BinaryIO) -> pd.DataFrame: +def csv_data_to_df(file_data: BinaryIO, induce_hgvs_cols: bool = True) -> pd.DataFrame: extra_na_values = list( set( list(null_values_list) @@ -1121,9 +1121,10 @@ def csv_data_to_df(file_data: BinaryIO) -> pd.DataFrame: dtype={**{col: str for col in HGVSColumns.options()}, "scores": float}, ) - for c in HGVSColumns.options(): - if c not in ingested_df.columns: - ingested_df[c] = np.NaN + if induce_hgvs_cols: + for c in HGVSColumns.options(): + if c not in ingested_df.columns: + ingested_df[c] = np.NaN return ingested_df diff --git a/src/mavedb/routers/score_calibrations.py b/src/mavedb/routers/score_calibrations.py index d5bceb88..f5c30875 100644 --- a/src/mavedb/routers/score_calibrations.py +++ b/src/mavedb/routers/score_calibrations.py @@ -1,12 +1,13 @@ import logging from typing import Optional -from fastapi import APIRouter, Depends, HTTPException, Query +from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile from sqlalchemy.orm import Session, selectinload from mavedb import deps from mavedb.lib.authentication import UserData, get_current_user from mavedb.lib.authorization import require_current_user +from mavedb.lib.flexible_model_loader import json_or_form_loader from mavedb.lib.logging import LoggedRoute from mavedb.lib.logging.context import ( logging_context, @@ -20,7 +21,11 @@ modify_score_calibration, promote_score_calibration_to_primary, publish_score_calibration, + variant_classification_df_to_dict, ) +from mavedb.lib.score_sets import csv_data_to_df +from mavedb.lib.validation.constants.general import calibration_class_column_name, calibration_variant_column_name +from mavedb.lib.validation.dataframe.calibration import validate_and_standardize_calibration_classes_dataframe from mavedb.models.score_calibration import ScoreCalibration from mavedb.models.score_set import ScoreSet from mavedb.view_models import score_calibration @@ -29,11 +34,22 @@ router = APIRouter( prefix="/api/v1/score-calibrations", - tags=["score-calibrations"], + tags=["Score Calibrations"], responses={404: {"description": "Not found"}}, route_class=LoggedRoute, ) +# Create dependency loaders for flexible JSON/form parsing +calibration_create_loader = json_or_form_loader( + score_calibration.ScoreCalibrationCreate, + field_name="calibration_json", +) + +calibration_modify_loader = json_or_form_loader( + score_calibration.ScoreCalibrationModify, + field_name="calibration_json", +) + @router.get( "/{urn}", @@ -162,19 +178,95 @@ async def get_primary_score_calibrations_for_score_set( @router.post( "/", response_model=score_calibration.ScoreCalibrationWithScoreSetUrn, - responses={404: {}}, + responses={404: {}, 422: {"description": "Validation Error"}}, + openapi_extra={ + "requestBody": { + "content": { + "application/json": { + "schema": {"$ref": "#/components/schemas/ScoreCalibrationCreate"}, + }, + "multipart/form-data": { + "schema": { + "type": "object", + "properties": { + "calibration_json": { + "type": "string", + "description": "JSON string containing the calibration data", + "example": '{"score_set_urn":"urn:mavedb:0000000X-X-X","title":"My Calibration","description":"Functional score calibration","baseline_score":1.0}', + }, + "classes_file": { + "type": "string", + "format": "binary", + "description": "CSV file containing variant classifications", + }, + }, + } + }, + }, + "description": "Score calibration data. Can be sent as JSON body or multipart form data", + } + }, ) async def create_score_calibration_route( *, - calibration: score_calibration.ScoreCalibrationCreate, + calibration: score_calibration.ScoreCalibrationCreate = Depends(calibration_create_loader), + classes_file: Optional[UploadFile] = File( + None, + description=f"CSV file containing variant classifications. This file must contain two columns: '{calibration_variant_column_name}' and '{calibration_class_column_name}'.", + ), db: Session = Depends(deps.get_db), user_data: UserData = Depends(require_current_user), ) -> ScoreCalibration: """ Create a new score calibration. - The score set URN must be provided to associate the calibration with an existing score set. - The user must have write permission on the associated score set. + This endpoint supports two different request formats to accommodate various client needs: + + ## Method 1: JSON Request Body (application/json) + Send calibration data as a standard JSON request body. This method is ideal for + creating calibrations without file uploads. + + **Content-Type**: `application/json` + + **Example**: + ```json + { + "score_set_urn": "urn:mavedb:0000000X-X-X", + "title": "My Calibration", + "description": "Functional score calibration", + "baseline_score": 1.0 + } + ``` + + ## Method 2: Multipart Form Data (multipart/form-data) + Send calibration data as JSON in a form field, optionally with file uploads. + This method is required when uploading classification files. + + **Content-Type**: `multipart/form-data` + + **Form Fields**: + - `calibration_json` (string, required): JSON string containing the calibration data + - `classes_file` (file, optional): CSV file containing variant classifications + + **Example**: + ```bash + curl -X POST "/api/v1/score-calibrations/" \\ + -H "Authorization: Bearer your-token" \\ + -F 'calibration_json={"score_set_urn":"urn:mavedb:0000000X-X-X","title":"My Calibration","description":"Functional score calibration","baseline_score":"1.0"}' \\ + -F 'classes_file=@variant_classes.csv' + ``` + + ## Requirements + - The score set URN must be provided to associate the calibration with an existing score set + - User must have write permission on the associated score set + - If uploading a classes_file, it must be a valid CSV with variant classification data + + ## File Upload Details + The `classes_file` parameter accepts CSV files containing variant classification data. + The file should have appropriate headers and contain columns for variant urns and class names. + + ## Response + Returns the created score calibration with its generated URN and associated score set information. """ if not calibration.score_set_urn: raise HTTPException(status_code=422, detail="score_set_urn must be provided to create a score calibration.") @@ -190,7 +282,22 @@ async def create_score_calibration_route( # permission to update the score set itself. assert_permission(user_data, score_set, Action.UPDATE) - created_calibration = await create_score_calibration_in_score_set(db, calibration, user_data.user) + if classes_file: + try: + classes_df = csv_data_to_df(classes_file.file, induce_hgvs_cols=False) + except UnicodeDecodeError as e: + raise HTTPException( + status_code=400, detail=f"Error decoding file: {e}. Ensure the file has correct values." + ) + + standardized_classes_df = validate_and_standardize_calibration_classes_dataframe( + db, score_set, calibration, classes_df + ) + variant_classes = variant_classification_df_to_dict(standardized_classes_df) + + created_calibration = await create_score_calibration_in_score_set( + db, calibration, user_data.user, variant_classes if classes_file else None + ) db.commit() db.refresh(created_calibration) @@ -201,17 +308,99 @@ async def create_score_calibration_route( @router.put( "/{urn}", response_model=score_calibration.ScoreCalibrationWithScoreSetUrn, - responses={404: {}}, + responses={404: {}, 422: {"description": "Validation Error"}}, + openapi_extra={ + "requestBody": { + "content": { + "application/json": { + "schema": {"$ref": "#/components/schemas/ScoreCalibrationModify"}, + }, + "multipart/form-data": { + "schema": { + "type": "object", + "properties": { + "calibration_json": { + "type": "string", + "description": "JSON string containing the calibration update data", + "example": '{"title":"Updated Calibration","description":"Updated description","baseline_score":2.0}', + }, + "classes_file": { + "type": "string", + "format": "binary", + "description": "CSV file containing updated variant classifications", + }, + }, + } + }, + }, + "description": "Score calibration update data. Can be sent as JSON body or multipart form data", + } + }, ) async def modify_score_calibration_route( *, urn: str, - calibration_update: score_calibration.ScoreCalibrationModify, + calibration_update: score_calibration.ScoreCalibrationModify = Depends(calibration_modify_loader), + classes_file: Optional[UploadFile] = File( + None, + description=f"CSV file containing variant classifications. This file must contain two columns: '{calibration_variant_column_name}' and '{calibration_class_column_name}'.", + ), db: Session = Depends(deps.get_db), user_data: UserData = Depends(require_current_user), ) -> ScoreCalibration: """ Modify an existing score calibration by its URN. + + This endpoint supports two different request formats to accommodate various client needs: + + ## Method 1: JSON Request Body (application/json) + Send calibration update data as a standard JSON request body. This method is ideal for + modifying calibrations without file uploads. + + **Content-Type**: `application/json` + + **Example**: + ```json + { + "score_set_urn": "urn:mavedb:0000000X-X-X", + "title": "Updated Calibration Title", + "description": "Updated functional score calibration", + "baseline_score": 1.0 + } + ``` + + ## Method 2: Multipart Form Data (multipart/form-data) + Send calibration update data as JSON in a form field, optionally with file uploads. + This method is required when uploading new classification files. + + **Content-Type**: `multipart/form-data` + + **Form Fields**: + - `calibration_json` (string, required): JSON string containing the calibration update data + - `classes_file` (file, optional): CSV file containing updated variant classifications + + **Example**: + ```bash + curl -X PUT "/api/v1/score-calibrations/{urn}" \\ + -H "Authorization: Bearer your-token" \\ + -F 'calibration_json={"score_set_urn":"urn:mavedb:0000000X-X-X","title":"My Calibration","description":"Functional score calibration","baseline_score":"1.0"}' \\ + -F 'classes_file=@updated_variant_classes.csv' + ``` + + ## Requirements + - User must have update permission on the calibration + - If changing the score_set_urn, user must have permission on the new score set + - All fields in the update are optional - only provided fields will be modified + + ## File Upload Details + The `classes_file` parameter accepts CSV files containing updated variant classification data. + If provided, this will replace the existing classification data for the calibration. + The file should have appropriate headers and follow the expected format for variant + classifications within the associated score set. + + ## Response + Returns the updated score calibration with all modifications applied and any new + classification data from the uploaded file. """ save_to_logging_context({"requested_resource": urn}) @@ -241,7 +430,22 @@ async def modify_score_calibration_route( assert_permission(user_data, item, Action.UPDATE) - updated_calibration = await modify_score_calibration(db, item, calibration_update, user_data.user) + if classes_file: + try: + classes_df = csv_data_to_df(classes_file.file, induce_hgvs_cols=False) + except UnicodeDecodeError as e: + raise HTTPException( + status_code=400, detail=f"Error decoding file: {e}. Ensure the file has correct values." + ) + + standardized_classes_df = validate_and_standardize_calibration_classes_dataframe( + db, score_set, calibration_update, classes_df + ) + variant_classes = variant_classification_df_to_dict(standardized_classes_df) + + updated_calibration = await modify_score_calibration( + db, item, calibration_update, user_data.user, variant_classes if classes_file else None + ) db.commit() db.refresh(updated_calibration) diff --git a/tests/lib/test_score_calibrations.py b/tests/lib/test_score_calibrations.py index db9f9c7b..110633ab 100644 --- a/tests/lib/test_score_calibrations.py +++ b/tests/lib/test_score_calibrations.py @@ -2,12 +2,11 @@ import pytest -from mavedb.models.score_calibration_functional_classification import ScoreCalibrationFunctionalClassification - pytest.importorskip("psycopg2") from unittest import mock +import pandas as pd from pydantic import create_model from sqlalchemy import select from sqlalchemy.exc import NoResultFound @@ -21,10 +20,13 @@ modify_score_calibration, promote_score_calibration_to_primary, publish_score_calibration, + variant_classification_df_to_dict, variants_for_functional_classification, ) +from mavedb.lib.validation.constants.general import calibration_class_column_name, calibration_variant_column_name from mavedb.models.enums.score_calibration_relation import ScoreCalibrationRelation from mavedb.models.score_calibration import ScoreCalibration +from mavedb.models.score_calibration_functional_classification import ScoreCalibrationFunctionalClassification from mavedb.models.score_set import ScoreSet from mavedb.models.user import User from mavedb.models.variant import Variant @@ -59,6 +61,7 @@ def test_create_functional_classification_without_acmg_classification(setup_lib_ label=(str, "Test Label"), description=(str, "Test Description"), range=(list, [0.0, 1.0]), + class_=(type(None), None), inclusive_lower_bound=(bool, True), inclusive_upper_bound=(bool, False), functional_classification=(str, "pathogenic"), @@ -102,6 +105,7 @@ def test_create_functional_classification_with_acmg_classification(setup_lib_db, label=(str, "Test Label"), description=(str, "Test Description"), range=(list, [0.0, 1.0]), + class_=(type(None), None), inclusive_lower_bound=(bool, True), inclusive_upper_bound=(bool, False), functional_classification=(str, "pathogenic"), @@ -131,18 +135,71 @@ def test_create_functional_classification_with_acmg_classification(setup_lib_db, points=mock_points, ) - # Verify the result - assert result.label == "Test Label" - assert result.description == "Test Description" - assert result.range == [0.0, 1.0] - assert result.inclusive_lower_bound is True - assert result.inclusive_upper_bound is False - assert result.functional_classification == "pathogenic" - assert result.oddspaths_ratio == 1.5 - assert result.positive_likelihood_ratio == 2.0 - assert result.acmg_classification == mocked_persisted_acmg_classification - assert result.acmg_classification_id == 123 - assert result.calibration == calibration + # Verify the result + assert result.label == "Test Label" + assert result.description == "Test Description" + assert result.range == [0.0, 1.0] + assert result.inclusive_lower_bound is True + assert result.inclusive_upper_bound is False + assert result.functional_classification == "pathogenic" + assert result.oddspaths_ratio == 1.5 + assert result.positive_likelihood_ratio == 2.0 + assert result.acmg_classification == mocked_persisted_acmg_classification + assert result.acmg_classification_id == 123 + assert result.calibration == calibration + + +def test_create_functional_classification_with_variant_classes(setup_lib_db, session): + # Create a mock calibration + calibration = ScoreCalibration() + + # Create mock functional range with variant classes + MockFunctionalClassificationCreate = create_model( + "MockFunctionalClassificationCreate", + label=(str, "Test Label"), + description=(str, "Test Description"), + range=(type(None), None), + class_=(str, "test_class"), + inclusive_lower_bound=(type(None), None), + inclusive_upper_bound=(type(None), None), + functional_classification=(str, "pathogenic"), + oddspaths_ratio=(float, 1.5), + positive_likelihood_ratio=(float, 2.0), + acmg_classification=(type(None), None), + ) + + functional_range_create = MockFunctionalClassificationCreate() + + with mock.patch("mavedb.lib.score_calibrations.variants_for_functional_classification") as mock_classified_variants: + MockedClassifiedVariant = create_model( + "MockedVariant", + urn=(str, "variant_urn_3"), + ) + mock_classified_variants.return_value = [MockedClassifiedVariant()] + + result = create_functional_classification( + session, + functional_range_create, + calibration, + variant_classes={ + "pathogenic": ["variant_urn_1", "variant_urn_2"], + "benign": ["variant_urn_3"], + }, + ) + + mock_classified_variants.assert_called() + + assert result.description == "Test Description" + assert result.range is None + assert result.inclusive_lower_bound is None + assert result.inclusive_upper_bound is None + assert result.functional_classification == "pathogenic" + assert result.oddspaths_ratio == 1.5 + assert result.positive_likelihood_ratio == 2.0 + assert result.acmg_classification is None + assert result.acmg_classification_id is None + assert result.calibration == calibration + assert result.variants == [MockedClassifiedVariant()] def test_create_functional_classification_propagates_acmg_errors(setup_lib_db, session): @@ -163,6 +220,7 @@ def test_create_functional_classification_propagates_acmg_errors(setup_lib_db, s label=(str, "Test Label"), description=(str, "Test Description"), range=(list, [0.0, 1.0]), + class_=(type(None), None), inclusive_lower_bound=(bool, True), inclusive_upper_bound=(bool, False), functional_classification=(str, "pathogenic"), @@ -183,6 +241,37 @@ def test_create_functional_classification_propagates_acmg_errors(setup_lib_db, s create_functional_classification(session, functional_range_create, calibration) +def test_create_functional_classification_propagates_functional_classification_errors(setup_lib_db, session): + # Create a mock calibration + calibration = ScoreCalibration() + + # Create mock functional range + MockFunctionalClassificationCreate = create_model( + "MockFunctionalClassificationCreate", + label=(str, "Test Label"), + description=(str, "Test Description"), + range=(list, [0.0, 1.0]), + class_=(type(None), None), + inclusive_lower_bound=(bool, True), + inclusive_upper_bound=(bool, False), + functional_classification=(str, "pathogenic"), + oddspaths_ratio=(float, 1.5), + positive_likelihood_ratio=(float, 2.0), + acmg_classification=(type(None), None), + ) + + functional_range_create = MockFunctionalClassificationCreate() + + with ( + pytest.raises(ValueError, match="Functional classification error"), + mock.patch( + "mavedb.lib.score_calibrations.ScoreCalibrationFunctionalClassification", + side_effect=ValueError("Functional classification error"), + ), + ): + create_functional_classification(session, functional_range_create, calibration) + + def test_create_functional_classification_does_not_commit_transaction(setup_lib_db, session): # Create a mock calibration calibration = ScoreCalibration() @@ -193,6 +282,7 @@ def test_create_functional_classification_does_not_commit_transaction(setup_lib_ label=(str, "Test Label"), description=(str, "Test Description"), range=(list, [0.0, 1.0]), + class_=(type(None), None), inclusive_lower_bound=(bool, True), inclusive_upper_bound=(bool, False), functional_classification=(str, "pathogenic"), @@ -207,7 +297,7 @@ def test_create_functional_classification_does_not_commit_transaction(setup_lib_ ################################################################################ -# Tests for create_score_calibration +# Tests for _create_score_calibration (tested indirectly via the following tests to its callers) ################################################################################ @@ -536,11 +626,48 @@ async def test_create_score_calibration_fully_valid_calibration( calibration = await create_function_to_call(session, calibration_create, test_user) for field in valid_score_calibration_data: - # Sources are tested elsewhere - # XXX: Ranges are a pain to compare between JSONB and dict input, so are assumed correct + # Sources are tested elsewhere. if "sources" not in field and "functional_classifications" not in field: assert getattr(calibration, field) == valid_score_calibration_data[field] + # Verify functional classifications length. Assume the returned value of created classifications is correct, + # and test the content elsewhere. + if field == "functional_classifications": + assert len(calibration.functional_classifications) == len( + valid_score_calibration_data["functional_classifications"] + ) + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "create_function_to_call,score_set_urn", + [ + (create_score_calibration_in_score_set, VALID_SCORE_SET_URN), + (create_score_calibration, None), + ], +) +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ], + ], + indirect=["mock_publication_fetch"], +) +async def test_create_score_calibration_does_not_commit_transaction( + setup_lib_db_with_score_set, session, mock_user, create_function_to_call, score_set_urn, mock_publication_fetch +): + calibration_create = ScoreCalibrationCreate( + **TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, score_set_urn=score_set_urn + ) + test_user = session.execute(select(User)).scalars().first() + + with mock.patch.object(session, "commit") as mock_commit: + await create_function_to_call(session, calibration_create, test_user) + mock_commit.assert_not_called() + ################################################################################ # Tests for modify_score_calibration @@ -965,11 +1092,46 @@ async def test_modify_score_calibration_fully_valid_calibration( modified_calibration = await modify_score_calibration(session, existing_calibration, modify_calibration, test_user) for field in TEST_PATHOGENICITY_SCORE_CALIBRATION: - # Sources are tested elsewhere - # XXX: Ranges are a pain to compare between JSONB and dict input, so are assumed correct + # Sources are tested elsewhere. if "sources" not in field and "functional_classifications" not in field: assert getattr(modified_calibration, field) == TEST_PATHOGENICITY_SCORE_CALIBRATION[field] + # Verify functional classifications length. Assume the returned value of created classifications is correct, + # and test the content elsewhere. + if field == "functional_classifications": + assert len(modified_calibration.functional_classifications) == len( + TEST_PATHOGENICITY_SCORE_CALIBRATION["functional_classifications"] + ) + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ], + ], + indirect=["mock_publication_fetch"], +) +async def test_modify_score_calibration_does_not_commit_transaction( + setup_lib_db_with_score_set, session, mock_publication_fetch +): + test_user = session.execute(select(User)).scalars().first() + + existing_calibration = await create_test_range_based_score_calibration_in_score_set( + session, setup_lib_db_with_score_set.urn, test_user + ) + + modify_calibration = ScoreCalibrationModify( + **TEST_PATHOGENICITY_SCORE_CALIBRATION, score_set_urn=setup_lib_db_with_score_set.urn + ) + + with mock.patch.object(session, "commit") as mock_commit: + modify_score_calibration(session, existing_calibration, modify_calibration, test_user) + mock_commit.assert_not_called() + ################################################################################ # Tests for publish_score_calibration @@ -1332,6 +1494,36 @@ async def test_promote_score_calibration_to_primary_demoted_existing_primary_use assert promoted_calibration.created_by == test_user +@pytest.mark.asyncio +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ], + ], + indirect=["mock_publication_fetch"], +) +async def test_promote_score_calibration_to_primary_does_not_commit_transaction( + setup_lib_db_with_score_set, session, mock_publication_fetch +): + test_user = session.execute(select(User)).scalars().first() + + existing_calibration = await create_test_range_based_score_calibration_in_score_set( + session, setup_lib_db_with_score_set.urn, test_user + ) + existing_calibration.private = False + existing_calibration.primary = False + session.add(existing_calibration) + session.commit() + session.refresh(existing_calibration) + + with mock.patch.object(session, "commit") as mock_commit: + promote_score_calibration_to_primary(session, existing_calibration, test_user, force=False) + mock_commit.assert_not_called() + + ################################################################################ # Test demote_score_calibration_from_primary ################################################################################ @@ -1423,6 +1615,35 @@ async def test_demote_score_calibration_from_primary_user_is_set_as_modifier( assert demoted_calibration.created_by == test_user +@pytest.mark.asyncio +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ], + ], + indirect=["mock_publication_fetch"], +) +async def test_demote_score_calibration_from_primary_does_not_commit_transaction( + setup_lib_db_with_score_set, session, mock_publication_fetch +): + test_user = session.execute(select(User)).scalars().first() + + existing_calibration = await create_test_range_based_score_calibration_in_score_set( + session, setup_lib_db_with_score_set.urn, test_user + ) + existing_calibration.primary = True + session.add(existing_calibration) + session.commit() + session.refresh(existing_calibration) + + with mock.patch.object(session, "commit") as mock_commit: + demote_score_calibration_from_primary(session, existing_calibration, test_user) + mock_commit.assert_not_called() + + ################################################################################ # Test delete_score_calibration ################################################################################ @@ -1482,35 +1703,96 @@ async def test_delete_score_calibration_deletes_calibration( session.execute(select(ScoreCalibration).where(ScoreCalibration.id == calibration_id)).scalars().one() +@pytest.mark.asyncio +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ], + ], + indirect=["mock_publication_fetch"], +) +async def test_delete_score_calibration_does_not_commit_transaction( + setup_lib_db_with_score_set, session, mock_publication_fetch +): + test_user = session.execute(select(User)).scalars().first() + + existing_calibration = await create_test_range_based_score_calibration_in_score_set( + session, setup_lib_db_with_score_set.urn, test_user + ) + + with mock.patch.object(session, "commit") as mock_commit: + delete_score_calibration(session, existing_calibration) + mock_commit.assert_not_called() + + ################################################################################ # Tests for variants_for_functional_classification ################################################################################ -def test_variants_for_functional_classification_returns_empty_list_when_range_is_none(setup_lib_db, session): +def test_variants_for_functional_classification_returns_empty_list_when_range_and_classes_is_none( + setup_lib_db, session +): mock_calibration = mock.Mock(spec=ScoreCalibration) + mock_calibration.score_set_id = 1 mock_functional_calibration = mock.Mock(spec=ScoreCalibrationFunctionalClassification) mock_functional_calibration.range = None + mock_functional_calibration.class_ = None mock_functional_calibration.calibration = mock_calibration - result = variants_for_functional_classification(session, mock_functional_calibration) + result = variants_for_functional_classification( + session, mock_functional_calibration, variant_classes=None, use_sql=False + ) assert result == [] -def test_variants_for_functional_classification_returns_empty_list_when_range_is_empty_list(setup_lib_db, session): +def test_variants_for_functional_classification_returns_empty_list_when_range_is_empty_list_and_classes_is_none( + setup_lib_db, session +): mock_calibration = mock.Mock(spec=ScoreCalibration) + mock_calibration.score_set_id = 1 mock_functional_calibration = mock.Mock(spec=ScoreCalibrationFunctionalClassification) mock_functional_calibration.range = [] + mock_functional_calibration.class_ = None mock_functional_calibration.calibration = mock_calibration - result = variants_for_functional_classification(session, mock_functional_calibration) + result = variants_for_functional_classification( + session, mock_functional_calibration, variant_classes=None, use_sql=False + ) assert result == [] +@pytest.mark.parametrize( + "range_,class_,variant_classes", + [ + ([1.0, 2.0], None, None), + ( + None, + "benign", + pd.DataFrame( + { + calibration_variant_column_name: [ + "urn:mavedb:variant-1", + "urn:mavedb:variant-2", + "urn:mavedb:variant-3", + ], + calibration_class_column_name: [ + "pathogenic", + "benign", + "pathogenic", + ], + } + ), + ), + ], +) def test_variants_for_functional_classification_python_filtering_with_valid_variants( - setup_lib_db_with_score_set, session + setup_lib_db_with_score_set, session, range_, class_, variant_classes ): variant_1 = Variant( data={"score_data": {"score": 0.5}}, @@ -1534,18 +1816,31 @@ def test_variants_for_functional_classification_python_filtering_with_valid_vari mock_calibration = mock.Mock(spec=ScoreCalibration) mock_calibration.score_set_id = setup_lib_db_with_score_set.id mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) - mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.range = range_ + mock_functional_classification.class_ = class_ mock_functional_classification.calibration = mock_calibration mock_functional_classification.score_is_contained_in_range = mock.Mock(side_effect=lambda x: 1.0 <= x <= 2.0) - result = variants_for_functional_classification(session, mock_functional_classification, use_sql=False) + result = variants_for_functional_classification( + session, + mock_functional_classification, + variant_classes=variant_classification_df_to_dict(variant_classes) if variant_classes is not None else None, + use_sql=False, + ) assert len(result) == 1 assert result[0].data["score_data"]["score"] == 1.5 +@pytest.mark.parametrize( + "range_,class_,variant_classes", + [ + ([1.0, 2.0], None, None), + # not applicable when filtering by class + ], +) def test_variants_for_functional_classification_python_filtering_skips_variants_without_score_data( - setup_lib_db_with_score_set, session, mock_functional_calibration + setup_lib_db_with_score_set, session, range_, class_, variant_classes ): # Create variant without score_data variant_without_score_data = Variant( @@ -1567,18 +1862,31 @@ def test_variants_for_functional_classification_python_filtering_skips_variants_ mock_calibration = mock.Mock(spec=ScoreCalibration) mock_calibration.score_set_id = setup_lib_db_with_score_set.id mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) - mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.range = range_ + mock_functional_classification.class_ = class_ mock_functional_classification.calibration = mock_calibration mock_functional_classification.score_is_contained_in_range = mock.Mock(side_effect=lambda x: 1.0 <= x <= 2.0) - result = variants_for_functional_classification(session, mock_functional_classification, use_sql=False) + result = variants_for_functional_classification( + session, + mock_functional_classification, + variant_classes=variant_classification_df_to_dict(variant_classes) if variant_classes is not None else None, + use_sql=False, + ) assert len(result) == 1 assert result[0].data["score_data"]["score"] == 1.5 +@pytest.mark.parametrize( + "range_,class_,variant_classes", + [ + ([1.0, 2.0], None, None), + # not applicable when filtering by class + ], +) def test_variants_for_functional_classification_python_filtering_skips_variants_with_non_dict_score_data( - setup_lib_db_with_score_set, session + setup_lib_db_with_score_set, session, range_, class_, variant_classes ): # Create variant with non-dict score_data variant_invalid_score_data = Variant( @@ -1600,18 +1908,30 @@ def test_variants_for_functional_classification_python_filtering_skips_variants_ mock_calibration = mock.Mock(spec=ScoreCalibration) mock_calibration.score_set_id = setup_lib_db_with_score_set.id mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) - mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.range = range_ + mock_functional_classification.class_ = class_ mock_functional_classification.calibration = mock_calibration mock_functional_classification.score_is_contained_in_range = mock.Mock(side_effect=lambda x: 1.0 <= x <= 2.0) - result = variants_for_functional_classification(session, mock_functional_classification, use_sql=False) - + result = variants_for_functional_classification( + session, + mock_functional_classification, + variant_classes=variant_classification_df_to_dict(variant_classes) if variant_classes is not None else None, + use_sql=False, + ) assert len(result) == 1 assert result[0].data["score_data"]["score"] == 1.5 +@pytest.mark.parametrize( + "range_,class_,variant_classes", + [ + ([1.0, 2.0], None, None), + # not applicable when filtering by class + ], +) def test_variants_for_functional_classification_python_filtering_skips_variants_with_none_score( - setup_lib_db_with_score_set, session + setup_lib_db_with_score_set, session, range_, class_, variant_classes ): # Create variant with None score variant_none_score = Variant( @@ -1637,14 +1957,26 @@ def test_variants_for_functional_classification_python_filtering_skips_variants_ mock_functional_classification.calibration = mock_calibration mock_functional_classification.score_is_contained_in_range = mock.Mock(side_effect=lambda x: 1.0 <= x <= 2.0) - result = variants_for_functional_classification(session, mock_functional_classification, use_sql=False) + result = variants_for_functional_classification( + session, + mock_functional_classification, + variant_classes=variant_classification_df_to_dict(variant_classes) if variant_classes is not None else None, + use_sql=False, + ) assert len(result) == 1 assert result[0].data["score_data"]["score"] == 1.5 +@pytest.mark.parametrize( + "range_,class_,variant_classes", + [ + ([1.0, 2.0], None, None), + # not applicable when filtering by class + ], +) def test_variants_for_functional_classification_python_filtering_skips_variants_with_non_numeric_score( - setup_lib_db_with_score_set, session + setup_lib_db_with_score_set, session, range_, class_, variant_classes ): # Create variant with non-numeric score variant_string_score = Variant( @@ -1666,18 +1998,30 @@ def test_variants_for_functional_classification_python_filtering_skips_variants_ mock_calibration = mock.Mock(spec=ScoreCalibration) mock_calibration.score_set_id = setup_lib_db_with_score_set.id mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) - mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.range = range_ + mock_functional_classification.class_ = class_ mock_functional_classification.calibration = mock_calibration mock_functional_classification.score_is_contained_in_range = mock.Mock(side_effect=lambda x: 1.0 <= x <= 2.0) - result = variants_for_functional_classification(session, mock_functional_classification, use_sql=False) - + result = variants_for_functional_classification( + session, + mock_functional_classification, + variant_classes=variant_classification_df_to_dict(variant_classes) if variant_classes is not None else None, + use_sql=False, + ) assert len(result) == 1 assert result[0].data["score_data"]["score"] == 1.5 +@pytest.mark.parametrize( + "range_,class_,variant_classes", + [ + ([1.0, 2.0], None, None), + # not applicable when filtering by class + ], +) def test_variants_for_functional_classification_python_filtering_skips_variants_with_non_dict_data( - setup_lib_db_with_score_set, session + setup_lib_db_with_score_set, session, range_, class_, variant_classes ): # Create variant with non-dict data variant_invalid_data = Variant( @@ -1697,12 +2041,17 @@ def test_variants_for_functional_classification_python_filtering_skips_variants_ mock_calibration = mock.Mock(spec=ScoreCalibration) mock_calibration.score_set_id = setup_lib_db_with_score_set.id mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) - mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.range = range_ + mock_functional_classification.class_ = class_ mock_functional_classification.calibration = mock_calibration mock_functional_classification.score_is_contained_in_range = mock.Mock(side_effect=lambda x: 1.0 <= x <= 2.0) - result = variants_for_functional_classification(session, mock_functional_classification, use_sql=False) - + result = variants_for_functional_classification( + session, + mock_functional_classification, + variant_classes=variant_classification_df_to_dict(variant_classes) if variant_classes is not None else None, + use_sql=False, + ) assert len(result) == 1 assert result[0].data["score_data"]["score"] == 1.5 @@ -1711,11 +2060,41 @@ def test_variants_for_functional_classification_python_filtering_skips_variants_ "use_sql", [True, False], ) -def test_variants_for_functional_classification_filters_by_score_range(setup_lib_db_with_score_set, session, use_sql): +@pytest.mark.parametrize( + "range_,class_,variant_classes", + [ + ([1.0, 2.0], None, None), + ( + None, + "benign", + pd.DataFrame( + { + calibration_variant_column_name: [ + "urn:mavedb:variant-1", + "urn:mavedb:variant-2", + "urn:mavedb:variant-3", + "urn:mavedb:variant-4", + "urn:mavedb:variant-5", + ], + calibration_class_column_name: [ + "pathogenic", + "benign", + "benign", + "benign", + "pathogenic", + ], + } + ), + ), + ], +) +def test_variants_for_functional_classification_filters_by_conditions( + setup_lib_db_with_score_set, session, use_sql, range_, class_, variant_classes +): # Create variants with different scores variants = [] scores = [0.5, 1.0, 1.5, 2.0, 2.5] - for i, score in enumerate(scores): + for i, score in enumerate(scores, 1): variant = Variant( data={"score_data": {"score": score}}, score_set_id=setup_lib_db_with_score_set.id, @@ -1729,14 +2108,20 @@ def test_variants_for_functional_classification_filters_by_score_range(setup_lib mock_calibration = mock.Mock(spec=ScoreCalibration) mock_calibration.score_set_id = setup_lib_db_with_score_set.id mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) - mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.range = range_ + mock_functional_classification.class_ = class_ mock_functional_classification.inclusive_lower_bound = True mock_functional_classification.inclusive_upper_bound = True mock_functional_classification.calibration = mock_calibration mock_functional_classification.score_is_contained_in_range = mock.Mock(side_effect=lambda x: 1.0 <= x <= 2.0) with mock.patch("mavedb.lib.score_calibrations.inf_or_float", side_effect=lambda x, lower: float(x)): - result = variants_for_functional_classification(session, mock_functional_classification, use_sql=use_sql) + result = variants_for_functional_classification( + session, + mock_functional_classification, + variant_classes=variant_classification_df_to_dict(variant_classes) if variant_classes is not None else None, + use_sql=use_sql, + ) # Should return variants with scores 1.0, 1.5, 2.0 result_scores = [v.data["score_data"]["score"] for v in result] @@ -1744,7 +2129,33 @@ def test_variants_for_functional_classification_filters_by_score_range(setup_lib assert sorted(result_scores) == sorted(expected_scores) -def test_variants_for_functional_classification_sql_fallback_on_exception(setup_lib_db_with_score_set, session): +@pytest.mark.parametrize( + "range_,class_,variant_classes", + [ + ([1.0, 2.0], None, None), + ( + None, + "benign", + pd.DataFrame( + { + calibration_variant_column_name: [ + "urn:mavedb:variant-1", + "urn:mavedb:variant-2", + "urn:mavedb:variant-3", + ], + calibration_class_column_name: [ + "benign", + "pathogenic", + "pathogenic", + ], + } + ), + ), + ], +) +def test_variants_for_functional_classification_sql_fallback_on_exception( + setup_lib_db_with_score_set, session, range_, class_, variant_classes +): # Create a variant variant = Variant( data={"score_data": {"score": 1.5}}, @@ -1757,7 +2168,8 @@ def test_variants_for_functional_classification_sql_fallback_on_exception(setup_ mock_calibration = mock.Mock(spec=ScoreCalibration) mock_calibration.score_set_id = setup_lib_db_with_score_set.id mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) - mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.range = range_ + mock_functional_classification.class_ = class_ mock_functional_classification.calibration = mock_calibration mock_functional_classification.score_is_contained_in_range = mock.Mock(side_effect=lambda x: 1.0 <= x <= 2.0) @@ -1770,7 +2182,12 @@ def test_variants_for_functional_classification_sql_fallback_on_exception(setup_ session.execute(select(Variant).where(Variant.score_set_id == setup_lib_db_with_score_set.id)), ], ) as mocked_execute: - result = variants_for_functional_classification(session, mock_functional_classification, use_sql=True) + result = variants_for_functional_classification( + session, + mock_functional_classification, + variant_classes=variant_classification_df_to_dict(variant_classes) if variant_classes is not None else None, + use_sql=True, + ) mocked_execute.assert_called() # Should fall back to Python filtering and return the matching variant @@ -1778,7 +2195,16 @@ def test_variants_for_functional_classification_sql_fallback_on_exception(setup_ assert result[0].data["score_data"]["score"] == 1.5 -def test_variants_for_functional_classification_sql_with_infinite_bound(setup_lib_db_with_score_set, session): +@pytest.mark.parametrize( + "range_,class_,variant_classes", + [ + ([1.0, float("inf")], None, None), + # not applicable when filtering by class + ], +) +def test_variants_for_functional_classification_sql_with_infinite_bound( + setup_lib_db_with_score_set, session, range_, class_, variant_classes +): # Create variants with different scores variants = [] scores = [0.5, 1.5, 2.5] @@ -1797,7 +2223,8 @@ def test_variants_for_functional_classification_sql_with_infinite_bound(setup_li mock_calibration = mock.Mock(spec=ScoreCalibration) mock_calibration.score_set_id = setup_lib_db_with_score_set.id mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) - mock_functional_classification.range = [1.0, float("inf")] + mock_functional_classification.range = range_ + mock_functional_classification.class_ = class_ mock_functional_classification.calibration = mock_calibration mock_functional_classification.inclusive_lower_bound = True mock_functional_classification.inclusive_upper_bound = False @@ -1807,7 +2234,14 @@ def test_variants_for_functional_classification_sql_with_infinite_bound(setup_li side_effect=lambda x, lower: float("inf") if x == float("inf") else float(x), ): with mock.patch("math.isinf", side_effect=lambda x: x == float("inf")): - result = variants_for_functional_classification(session, mock_functional_classification, use_sql=True) + result = variants_for_functional_classification( + session, + mock_functional_classification, + variant_classes=variant_classification_df_to_dict(variant_classes) + if variant_classes is not None + else None, + use_sql=True, + ) # Should return variants with scores >= 1.0 result_scores = [v.data["score_data"]["score"] for v in result] @@ -1815,7 +2249,16 @@ def test_variants_for_functional_classification_sql_with_infinite_bound(setup_li assert sorted(result_scores) == sorted(expected_scores) -def test_variants_for_functional_classification_sql_with_exclusive_bounds(setup_lib_db_with_score_set, session): +@pytest.mark.parametrize( + "range_,class_,variant_classes", + [ + ([1.0, 2.0], None, None), + # not applicable when filtering by class + ], +) +def test_variants_for_functional_classification_sql_with_exclusive_bounds( + setup_lib_db_with_score_set, session, range_, class_, variant_classes +): # Create variants with boundary scores variants = [] scores = [1.0, 1.5, 2.0] @@ -1834,21 +2277,34 @@ def test_variants_for_functional_classification_sql_with_exclusive_bounds(setup_ mock_calibration = mock.Mock(spec=ScoreCalibration) mock_calibration.score_set_id = setup_lib_db_with_score_set.id mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) - mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.range = range_ + mock_functional_classification.class_ = class_ mock_functional_classification.calibration = mock_calibration mock_functional_classification.inclusive_lower_bound = False mock_functional_classification.inclusive_upper_bound = False with mock.patch("mavedb.lib.score_calibrations.inf_or_float", side_effect=lambda x, lower: float(x)): - result = variants_for_functional_classification(session, mock_functional_classification, use_sql=True) + result = variants_for_functional_classification( + session, + mock_functional_classification, + variant_classes=variant_classification_df_to_dict(variant_classes) if variant_classes is not None else None, + use_sql=True, + ) # Should return only variant with score 1.5 (exclusive bounds) result_scores = [v.data["score_data"]["score"] for v in result] assert result_scores == [1.5] +@pytest.mark.parametrize( + "range_,class_,variant_classes", + [ + ([1.0, 2.0], None, None), + # not applicable when filtering by class + ], +) def test_variants_for_functional_classification_only_returns_variants_from_correct_score_set( - setup_lib_db_with_score_set, session + setup_lib_db_with_score_set, session, range_, class_, variant_classes ): # Create another score set other_score_set = ScoreSet( @@ -1882,13 +2338,161 @@ def test_variants_for_functional_classification_only_returns_variants_from_corre mock_calibration = mock.Mock(spec=ScoreCalibration) mock_calibration.score_set_id = setup_lib_db_with_score_set.id mock_functional_classification = mock.Mock(spec=ScoreCalibrationFunctionalClassification) - mock_functional_classification.range = [1.0, 2.0] + mock_functional_classification.range = range_ + mock_functional_classification.class_ = class_ mock_functional_classification.calibration = mock_calibration mock_functional_classification.score_is_contained_in_range = mock.Mock(side_effect=lambda x: 1.0 <= x <= 2.0) - result = variants_for_functional_classification(session, mock_functional_classification, use_sql=False) - + result = variants_for_functional_classification( + session, + mock_functional_classification, + variant_classes=variant_classification_df_to_dict(variant_classes) if variant_classes is not None else None, + use_sql=False, + ) # Should only return variant from the target score set assert len(result) == 1 assert result[0].score_set_id == setup_lib_db_with_score_set.id assert result[0].urn == "urn:mavedb:variant-target" + + +################################################################################ +# Tests for variant_classification_df_to_dict +################################################################################ + + +def test_variant_classification_df_to_dict_with_single_class(): + """Test conversion with DataFrame containing variants of a single functional class.""" + df = pd.DataFrame( + { + calibration_variant_column_name: ["var1", "var2", "var3"], + calibration_class_column_name: ["pathogenic", "pathogenic", "pathogenic"], + } + ) + + result = variant_classification_df_to_dict(df) + + expected = {"pathogenic": sorted(["var1", "var2", "var3"])} + assert {k: sorted(v) for k, v in result.items()} == expected + + +def test_variant_classification_df_to_dict_with_multiple_classes(): + """Test conversion with DataFrame containing variants of multiple functional classes.""" + df = pd.DataFrame( + { + calibration_variant_column_name: ["var1", "var2", "var3", "var4", "var5"], + calibration_class_column_name: ["pathogenic", "benign", "pathogenic", "uncertain", "benign"], + } + ) + + result = variant_classification_df_to_dict(df) + + expected = {"pathogenic": ["var1", "var3"], "benign": sorted(["var2", "var5"]), "uncertain": ["var4"]} + assert {k: sorted(v) for k, v in result.items()} == expected + + +def test_variant_classification_df_to_dict_with_empty_dataframe(): + """Test conversion with empty DataFrame.""" + df = pd.DataFrame(columns=[calibration_variant_column_name, calibration_class_column_name]) + + result = variant_classification_df_to_dict(df) + + assert result == {} + + +def test_variant_classification_df_to_dict_with_single_row(): + """Test conversion with DataFrame containing single row.""" + df = pd.DataFrame({calibration_variant_column_name: ["var1"], calibration_class_column_name: ["pathogenic"]}) + + result = variant_classification_df_to_dict(df) + + expected = {"pathogenic": ["var1"]} + assert result == expected + + +def test_variant_classification_df_to_dict_preserves_order_within_classes(): + """Test that variant order is preserved within each functional class.""" + df = pd.DataFrame( + { + calibration_variant_column_name: ["var1", "var2", "var3", "var4"], + calibration_class_column_name: ["pathogenic", "pathogenic", "benign", "pathogenic"], + } + ) + + result = variant_classification_df_to_dict(df) + + expected = {"pathogenic": sorted(["var1", "var2", "var4"]), "benign": ["var3"]} + assert {k: sorted(v) for k, v in result.items()} == expected + + +def test_variant_classification_df_to_dict_with_extra_columns(): + """Test conversion ignores extra columns in DataFrame.""" + df = pd.DataFrame( + { + calibration_variant_column_name: ["var1", "var2"], + calibration_class_column_name: ["pathogenic", "benign"], + "extra_column": ["value1", "value2"], + "another_column": [1, 2], + } + ) + + result = variant_classification_df_to_dict(df) + + expected = {"pathogenic": ["var1"], "benign": ["var2"]} + assert {k: sorted(v) for k, v in result.items()} == expected + + +def test_variant_classification_df_to_dict_with_duplicate_variants_in_same_class(): + """Test handling of duplicate variant URNs in the same functional class.""" + df = pd.DataFrame( + { + calibration_variant_column_name: ["var1", "var1", "var2"], + calibration_class_column_name: ["pathogenic", "pathogenic", "benign"], + } + ) + + result = variant_classification_df_to_dict(df) + + expected = {"pathogenic": ["var1"], "benign": ["var2"]} + assert {k: sorted(v) for k, v in result.items()} == expected + + +def test_variant_classification_df_to_dict_with_none_values(): + """Test handling of None values in functional class column.""" + df = pd.DataFrame( + { + calibration_variant_column_name: ["var1", "var2", "var3"], + calibration_class_column_name: ["pathogenic", None, "benign"], + } + ) + + result = variant_classification_df_to_dict(df) + + expected = {"pathogenic": ["var1"], None: ["var2"], "benign": ["var3"]} + assert {k: sorted(v) for k, v in result.items()} == expected + + +def test_variant_classification_df_to_dict_with_numeric_classes(): + """Test handling of numeric functional class labels.""" + df = pd.DataFrame( + {calibration_variant_column_name: ["var1", "var2", "var3"], calibration_class_column_name: [1, 2, 1]} + ) + + result = variant_classification_df_to_dict(df) + + expected = {1: sorted(["var1", "var3"]), 2: ["var2"]} + assert {k: sorted(v) for k, v in result.items()} == expected + + +def test_variant_classification_df_to_dict_with_mixed_type_classes(): + """Test handling of mixed data types in functional class column.""" + df = pd.DataFrame( + { + calibration_variant_column_name: ["var1", "var2", "var3", "var4"], + calibration_class_column_name: ["pathogenic", 1, "benign", 1], + } + ) + + result = variant_classification_df_to_dict(df) + + expected = {"pathogenic": ["var1"], 1: sorted(["var2", "var4"]), "benign": ["var3"]} + assert {k: sorted(v) for k, v in result.items()} == expected diff --git a/tests/routers/data/calibration_classes.csv b/tests/routers/data/calibration_classes.csv new file mode 100644 index 00000000..d7654e67 --- /dev/null +++ b/tests/routers/data/calibration_classes.csv @@ -0,0 +1,4 @@ +variant_urn,class_name +urn:mavedb:00000001-a-1#1,normal_class +urn:mavedb:00000001-a-1#2,abnormal_class +urn:mavedb:00000001-a-1#3,not_specified_class \ No newline at end of file diff --git a/tests/routers/test_score_calibrations.py b/tests/routers/test_score_calibrations.py index 9949b639..90d0d564 100644 --- a/tests/routers/test_score_calibrations.py +++ b/tests/routers/test_score_calibrations.py @@ -6,6 +6,7 @@ cdot = pytest.importorskip("cdot") fastapi = pytest.importorskip("fastapi") +import json from unittest.mock import patch from arq import ArqRedis @@ -16,6 +17,7 @@ from tests.helpers.constants import ( EXTRA_USER, TEST_BIORXIV_IDENTIFIER, + TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED, TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, TEST_PATHOGENICITY_SCORE_CALIBRATION, TEST_PUBMED_IDENTIFIER, @@ -1379,6 +1381,43 @@ def test_can_create_score_calibration_as_score_set_owner( assert calibration_response["private"] is True +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ] + ], + indirect=["mock_publication_fetch"], +) +def test_can_create_score_calibration_as_score_set_owner_form( + client, setup_router_db, mock_publication_fetch, session, data_provider, data_files +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + + response = client.post( + "/api/v1/score-calibrations", + data={ + "calibration_json": json.dumps( + {"scoreSetUrn": score_set["urn"], **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED)} + ), + }, + ) + + assert response.status_code == 200 + calibration_response = response.json() + assert calibration_response["scoreSetUrn"] == score_set["urn"] + assert calibration_response["private"] is True + + @pytest.mark.parametrize( "mock_publication_fetch", [ @@ -1462,6 +1501,53 @@ def test_can_create_score_calibration_as_admin_user( assert calibration_response["private"] is True +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ] + ], + indirect=["mock_publication_fetch"], +) +def test_can_create_class_based_score_calibration_form( + client, setup_router_db, mock_publication_fetch, session, data_provider, data_files +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + with patch.object(ArqRedis, "enqueue_job", return_value=None): + score_set = publish_score_set(client, score_set["urn"]) + + classification_csv_path = data_files / "calibration_classes.csv" + with open(classification_csv_path, "rb") as class_file: + response = client.post( + "/api/v1/score-calibrations", + files={"classes_file": (classification_csv_path.name, class_file, "text/csv")}, + data={ + "calibration_json": json.dumps( + {"scoreSetUrn": score_set["urn"], **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED)} + ), + }, + ) + + print(response.text) + + assert response.status_code == 200 + calibration_response = response.json() + assert calibration_response["scoreSetUrn"] == score_set["urn"] + assert calibration_response["private"] is True + assert all( + len(classification["variants"]) == 1 for classification in calibration_response["functionalClassifications"] + ) + + ########################################################### # PUT /score-calibrations/{calibration_urn} ########################################################### @@ -1700,6 +1786,47 @@ def test_can_update_score_calibration_as_score_set_owner( assert calibration_response["private"] is True +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ] + ], + indirect=["mock_publication_fetch"], +) +def test_can_update_score_calibration_as_score_set_owner_form( + client, setup_router_db, mock_publication_fetch, session, data_provider, data_files +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + calibration = create_test_score_calibration_in_score_set_via_client( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) + + response = client.put( + f"/api/v1/score-calibrations/{calibration['urn']}", + data={ + "calibration_json": json.dumps( + {"scoreSetUrn": score_set["urn"], **deepcamelize(TEST_PATHOGENICITY_SCORE_CALIBRATION)} + ), + }, + ) + + assert response.status_code == 200 + calibration_response = response.json() + assert calibration_response["urn"] == calibration["urn"] + assert calibration_response["scoreSetUrn"] == score_set["urn"] + assert calibration_response["private"] is True + + @pytest.mark.parametrize( "mock_publication_fetch", [ @@ -2149,6 +2276,55 @@ def test_admin_user_may_move_calibration_to_another_score_set( assert calibration_response["scoreSetUrn"] == score_set2["urn"] +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ] + ], + indirect=["mock_publication_fetch"], +) +def test_can_modify_score_calibration_to_class_based( + client, setup_router_db, mock_publication_fetch, session, data_provider, data_files +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + with patch.object(ArqRedis, "enqueue_job", return_value=None): + score_set = publish_score_set(client, score_set["urn"]) + + calibration = create_test_score_calibration_in_score_set_via_client( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) + + classification_csv_path = data_files / "calibration_classes.csv" + updated_calibration_data = deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED) + + with open(classification_csv_path, "rb") as class_file: + response = client.put( + f"/api/v1/score-calibrations/{calibration['urn']}", + files={"classes_file": (classification_csv_path.name, class_file, "text/csv")}, + data={ + "calibration_json": json.dumps({"scoreSetUrn": score_set["urn"], **updated_calibration_data}), + }, + ) + print(response.text) + + assert response.status_code == 200 + calibration_response = response.json() + assert calibration_response["urn"] == calibration["urn"] + assert all( + len(classification["variants"]) == 1 for classification in calibration_response["functionalClassifications"] + ) + + ########################################################### # DELETE /score-calibrations/{calibration_urn} ########################################################### From 33d78cd7d8b78377f514dc94681d64149e7ee559 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 25 Nov 2025 15:40:38 -0800 Subject: [PATCH 13/24] refactor: replace ValueError with ValidationError for calibration class validation --- src/mavedb/lib/validation/dataframe/calibration.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mavedb/lib/validation/dataframe/calibration.py b/src/mavedb/lib/validation/dataframe/calibration.py index c7db74a6..e31cc436 100644 --- a/src/mavedb/lib/validation/dataframe/calibration.py +++ b/src/mavedb/lib/validation/dataframe/calibration.py @@ -50,7 +50,9 @@ def validate_and_standardize_calibration_classes_dataframe( and data content. """ if not calibration.class_based: - raise ValueError("Calibration classes file can only be provided for functional classification calibrations.") + raise ValidationError( + "Calibration classes file can only be provided for functional classification calibrations." + ) standardized_classes_df = standardize_dataframe(classes_df, STANDARD_CALIBRATION_COLUMNS) validate_calibration_df_column_names(standardized_classes_df) @@ -174,7 +176,7 @@ def validate_calibration_classes( that are missing from the series. """ if not calibration.functional_classifications: - raise ValueError("Calibration must have functional classifications defined for class validation.") + raise ValidationError("Calibration must have functional classifications defined for class validation.") defined_classes = {c.class_ for c in calibration.functional_classifications} provided_classes = set(classes.tolist()) From be4402b2d91b6ecf7ff891f2aafac4d8a5ca8315 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 25 Nov 2025 15:40:49 -0800 Subject: [PATCH 14/24] feat: add error handling for validation of class files in score calibration creation and modification --- src/mavedb/routers/score_calibrations.py | 29 ++++++++++++++----- .../validation/dataframe/test_calibration.py | 14 ++++----- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/src/mavedb/routers/score_calibrations.py b/src/mavedb/routers/score_calibrations.py index f5c30875..caf8b448 100644 --- a/src/mavedb/routers/score_calibrations.py +++ b/src/mavedb/routers/score_calibrations.py @@ -26,6 +26,7 @@ from mavedb.lib.score_sets import csv_data_to_df from mavedb.lib.validation.constants.general import calibration_class_column_name, calibration_variant_column_name from mavedb.lib.validation.dataframe.calibration import validate_and_standardize_calibration_classes_dataframe +from mavedb.lib.validation.exceptions import ValidationError from mavedb.models.score_calibration import ScoreCalibration from mavedb.models.score_set import ScoreSet from mavedb.view_models import score_calibration @@ -290,10 +291,16 @@ async def create_score_calibration_route( status_code=400, detail=f"Error decoding file: {e}. Ensure the file has correct values." ) - standardized_classes_df = validate_and_standardize_calibration_classes_dataframe( - db, score_set, calibration, classes_df - ) - variant_classes = variant_classification_df_to_dict(standardized_classes_df) + try: + standardized_classes_df = validate_and_standardize_calibration_classes_dataframe( + db, score_set, calibration, classes_df + ) + variant_classes = variant_classification_df_to_dict(standardized_classes_df) + except ValidationError as e: + raise HTTPException( + status_code=422, + detail=[{"loc": [e.custom_loc or "classesFile"], "msg": str(e), "type": "value_error"}], + ) created_calibration = await create_score_calibration_in_score_set( db, calibration, user_data.user, variant_classes if classes_file else None @@ -438,10 +445,16 @@ async def modify_score_calibration_route( status_code=400, detail=f"Error decoding file: {e}. Ensure the file has correct values." ) - standardized_classes_df = validate_and_standardize_calibration_classes_dataframe( - db, score_set, calibration_update, classes_df - ) - variant_classes = variant_classification_df_to_dict(standardized_classes_df) + try: + standardized_classes_df = validate_and_standardize_calibration_classes_dataframe( + db, score_set, calibration_update, classes_df + ) + variant_classes = variant_classification_df_to_dict(standardized_classes_df) + except ValidationError as e: + raise HTTPException( + status_code=422, + detail=[{"loc": [e.custom_loc or "classesFile"], "msg": str(e), "type": "value_error"}], + ) updated_calibration = await modify_score_calibration( db, item, calibration_update, user_data.user, variant_classes if classes_file else None diff --git a/tests/validation/dataframe/test_calibration.py b/tests/validation/dataframe/test_calibration.py index 6a7a7676..241abee4 100644 --- a/tests/validation/dataframe/test_calibration.py +++ b/tests/validation/dataframe/test_calibration.py @@ -80,7 +80,7 @@ def test_validate_and_standardize_calibration_classes_dataframe_success(self, mo mock_dependencies["validate_data_column"].assert_called_once() def test_validate_and_standardize_calibration_classes_dataframe_not_class_based(self): - """Test ValueError when calibration is not class-based.""" + """Test ValidationError when calibration is not class-based.""" mock_db = Mock() mock_score_set = Mock() mock_calibration = Mock() @@ -88,7 +88,7 @@ def test_validate_and_standardize_calibration_classes_dataframe_not_class_based( input_df = pd.DataFrame({"variant": ["var1"], "class": ["A"]}) with pytest.raises( - ValueError, + ValidationError, match="Calibration classes file can only be provided for functional classification calibrations.", ): validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) @@ -170,7 +170,7 @@ def test_validate_and_standardize_calibration_classes_dataframe_invalid_classes( mock_calibration.functional_classifications = None with pytest.raises( - ValueError, match="Calibration must have functional classifications defined for class validation." + ValidationError, match="Calibration must have functional classifications defined for class validation." ): validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) @@ -634,24 +634,24 @@ def test_validate_calibration_classes_success(self): validate_calibration_classes(calibration, classes) def test_validate_calibration_classes_no_functional_classifications(self): - """Test ValueError when calibration has no functional classifications.""" + """Test ValidationError when calibration has no functional classifications.""" calibration = Mock(spec=score_calibration.ScoreCalibrationCreate) calibration.functional_classifications = None classes = pd.Series(["class_a", "class_b"]) with pytest.raises( - ValueError, match="Calibration must have functional classifications defined for class validation." + ValidationError, match="Calibration must have functional classifications defined for class validation." ): validate_calibration_classes(calibration, classes) def test_validate_calibration_classes_empty_functional_classifications(self): - """Test ValueError when calibration has empty functional classifications.""" + """Test ValidationError when calibration has empty functional classifications.""" calibration = Mock(spec=score_calibration.ScoreCalibrationCreate) calibration.functional_classifications = [] classes = pd.Series(["class_a", "class_b"]) with pytest.raises( - ValueError, match="Calibration must have functional classifications defined for class validation." + ValidationError, match="Calibration must have functional classifications defined for class validation." ): validate_calibration_classes(calibration, classes) From 126c5916519c37472edf0f7d48506052c858ff48 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 25 Nov 2025 23:31:15 -0800 Subject: [PATCH 15/24] feat: add file presence/absence checks in calibration creation and modification routes --- src/mavedb/routers/score_calibrations.py | 24 ++++ tests/routers/test_score_calibrations.py | 158 ++++++++++++++++++++++- 2 files changed, 181 insertions(+), 1 deletion(-) diff --git a/src/mavedb/routers/score_calibrations.py b/src/mavedb/routers/score_calibrations.py index caf8b448..a0a0dd31 100644 --- a/src/mavedb/routers/score_calibrations.py +++ b/src/mavedb/routers/score_calibrations.py @@ -283,7 +283,19 @@ async def create_score_calibration_route( # permission to update the score set itself. assert_permission(user_data, score_set, Action.UPDATE) + if calibration.class_based and not classes_file: + raise HTTPException( + status_code=422, + detail="A classes_file must be provided when creating a class-based calibration.", + ) + if classes_file: + if calibration.range_based: + raise HTTPException( + status_code=422, + detail="A classes_file should not be provided when creating a range-based calibration.", + ) + try: classes_df = csv_data_to_df(classes_file.file, induce_hgvs_cols=False) except UnicodeDecodeError as e: @@ -437,7 +449,19 @@ async def modify_score_calibration_route( assert_permission(user_data, item, Action.UPDATE) + if calibration_update.class_based and not classes_file: + raise HTTPException( + status_code=422, + detail="A classes_file must be provided when modifying a class-based calibration.", + ) + if classes_file: + if calibration_update.range_based: + raise HTTPException( + status_code=422, + detail="A classes_file should not be provided when modifying a range-based calibration.", + ) + try: classes_df = csv_data_to_df(classes_file.file, induce_hgvs_cols=False) except UnicodeDecodeError as e: diff --git a/tests/routers/test_score_calibrations.py b/tests/routers/test_score_calibrations.py index 90d0d564..1f8d0792 100644 --- a/tests/routers/test_score_calibrations.py +++ b/tests/routers/test_score_calibrations.py @@ -1309,6 +1309,80 @@ def test_cannot_create_score_calibration_in_public_score_set_when_score_set_not_ assert f"insufficient permissions on score set with URN '{score_set['urn']}'" in error["detail"] +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ] + ], + indirect=["mock_publication_fetch"], +) +def test_cannot_create_class_based_score_calibration_without_classes_file( + client, setup_router_db, mock_publication_fetch, session, data_provider, data_files +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + + response = client.post( + "/api/v1/score-calibrations", + json={ + "scoreSetUrn": score_set["urn"], + **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED), + }, + ) + + assert response.status_code == 422 + error = response.json() + assert "A classes_file must be provided when creating a class-based calibration" in str(error["detail"]) + + +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ] + ], + indirect=["mock_publication_fetch"], +) +def test_cannot_create_range_based_score_calibration_with_classes_file( + client, setup_router_db, mock_publication_fetch, session, data_provider, data_files +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + + classification_csv_path = data_files / "calibration_classes.csv" + with open(classification_csv_path, "rb") as class_file: + response = client.post( + "/api/v1/score-calibrations", + files={"classes_file": (classification_csv_path.name, class_file, "text/csv")}, + data={ + "calibration_json": json.dumps( + {"scoreSetUrn": score_set["urn"], **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED)} + ), + }, + ) + + assert response.status_code == 422 + error = response.json() + assert "A classes_file should not be provided when creating a range-based calibration" in str(error["detail"]) + + @pytest.mark.parametrize( "mock_publication_fetch", [ @@ -1626,6 +1700,89 @@ def test_cannot_update_score_calibration_when_calibration_not_exists( assert "The requested score calibration does not exist" in error["detail"] +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ] + ], + indirect=["mock_publication_fetch"], +) +def test_cannot_update_class_based_score_calibration_without_class_file( + client, setup_router_db, mock_publication_fetch, session, data_provider, data_files +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + calibration = create_test_score_calibration_in_score_set_via_client( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) + + response = client.put( + f"/api/v1/score-calibrations/{calibration['urn']}", + json={ + "scoreSetUrn": score_set["urn"], + **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED), + }, + ) + + assert response.status_code == 422 + error = response.json() + assert "A classes_file must be provided when modifying a class-based calibration" in str(error["detail"]) + + +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ] + ], + indirect=["mock_publication_fetch"], +) +def test_cannot_update_range_based_score_calibration_with_class_file( + client, setup_router_db, mock_publication_fetch, session, data_provider, data_files +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + calibration = create_test_score_calibration_in_score_set_via_client( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) + + classification_csv_path = data_files / "calibration_classes.csv" + with open(classification_csv_path, "rb") as class_file: + response = client.put( + f"/api/v1/score-calibrations/{calibration['urn']}", + files={"classes_file": (classification_csv_path.name, class_file, "text/csv")}, + data={ + "calibration_json": json.dumps( + { + "scoreSetUrn": score_set["urn"], + **deepcamelize(TEST_PATHOGENICITY_SCORE_CALIBRATION), + } + ), + }, + ) + + assert response.status_code == 422 + error = response.json() + assert "A classes_file should not be provided when modifying a range-based calibration" in str(error["detail"]) + + @pytest.mark.parametrize( "mock_publication_fetch", [ @@ -2315,7 +2472,6 @@ def test_can_modify_score_calibration_to_class_based( "calibration_json": json.dumps({"scoreSetUrn": score_set["urn"], **updated_calibration_data}), }, ) - print(response.text) assert response.status_code == 200 calibration_response = response.json() From 968126a630e1abf2afb65dff722dc38ddf8fe923 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 26 Nov 2025 16:27:41 -0800 Subject: [PATCH 16/24] feat: allow class-based calibration definitions from hgvs_nt and hgvs_pro - Allow class-based calibration to be defined via hgvs strings - Introduced new test CSV files for calibration classes based on HGVS nucleotide, HGVS protein, and URN. - Enhanced test coverage for score calibration creation and updating, including scenarios for decoding errors and validation errors. - Refactored tests to utilize parameterization for different calibration class files. - Added validation checks for index column selection in calibration dataframes. - Improved error messages for missing or invalid calibration classes. --- src/mavedb/lib/score_calibrations.py | 92 +++- src/mavedb/lib/types/score_calibrations.py | 6 + .../lib/validation/dataframe/calibration.py | 106 +++- src/mavedb/routers/score_calibrations.py | 17 +- tests/lib/test_score_calibrations.py | 332 ++++++++++-- .../data/calibration_classes_by_hgvs_nt.csv | 4 + .../data/calibration_classes_by_hgvs_prot.csv | 4 + ...ses.csv => calibration_classes_by_urn.csv} | 0 tests/routers/test_score_calibrations.py | 228 ++++++++- .../validation/dataframe/test_calibration.py | 481 ++++++++++++++---- 10 files changed, 1036 insertions(+), 234 deletions(-) create mode 100644 src/mavedb/lib/types/score_calibrations.py create mode 100644 tests/routers/data/calibration_classes_by_hgvs_nt.csv create mode 100644 tests/routers/data/calibration_classes_by_hgvs_prot.csv rename tests/routers/data/{calibration_classes.csv => calibration_classes_by_urn.csv} (100%) diff --git a/src/mavedb/lib/score_calibrations.py b/src/mavedb/lib/score_calibrations.py index 82bff826..98c7708c 100644 --- a/src/mavedb/lib/score_calibrations.py +++ b/src/mavedb/lib/score_calibrations.py @@ -9,7 +9,13 @@ from mavedb.lib.acmg import find_or_create_acmg_classification from mavedb.lib.identifiers import find_or_create_publication_identifier -from mavedb.lib.validation.constants.general import calibration_class_column_name, calibration_variant_column_name +from mavedb.lib.types.score_calibrations import ClassificationDict +from mavedb.lib.validation.constants.general import ( + calibration_class_column_name, + calibration_variant_column_name, + hgvs_nt_column, + hgvs_pro_column, +) from mavedb.lib.validation.utilities import inf_or_float from mavedb.models.enums.score_calibration_relation import ScoreCalibrationRelation from mavedb.models.score_calibration import ScoreCalibration @@ -27,7 +33,7 @@ def create_functional_classification( score_calibration.FunctionalClassificationCreate, score_calibration.FunctionalClassificationModify ], containing_calibration: ScoreCalibration, - variant_classes: Optional[dict[str, list[str]]] = None, + variant_classes: Optional[ClassificationDict] = None, ) -> ScoreCalibrationFunctionalClassification: """ Create a functional classification entity for score calibration. @@ -42,7 +48,7 @@ def create_functional_classification( description, range bounds, inclusivity flags, and optional ACMG classification information. containing_calibration (ScoreCalibration): The ScoreCalibration instance. - variant_classes (Optional[dict[str, list[str]]]): Optional dictionary mapping variant classes + variant_classes (Optional[ClassificationDict]): Optional dictionary mapping variant classes to their corresponding variant identifiers. Returns: @@ -92,7 +98,7 @@ async def _create_score_calibration( db: Session, calibration_create: score_calibration.ScoreCalibrationCreate, user: User, - variant_classes: Optional[dict[str, list[str]]] = None, + variant_classes: Optional[ClassificationDict] = None, containing_score_set: Optional[ScoreSet] = None, ) -> ScoreCalibration: """ @@ -125,6 +131,10 @@ async def _create_score_calibration( optional lists of publication source identifiers grouped by relation type. user : User Authenticated user context; the user to be recorded for audit + variant_classes (Optional[ClassificationDict]): + Optional dictionary mapping variant classes to their corresponding variant identifiers. + containing_score_set : Optional[ScoreSet] + If provided, the ScoreSet instance to which the new calibration will belong. Returns ------- @@ -201,7 +211,7 @@ async def create_score_calibration_in_score_set( db: Session, calibration_create: score_calibration.ScoreCalibrationCreate, user: User, - variant_classes: Optional[dict[str, list[str]]] = None, + variant_classes: Optional[ClassificationDict] = None, ) -> ScoreCalibration: """ Create a new score calibration and associate it with an existing score set. @@ -217,7 +227,7 @@ async def create_score_calibration_in_score_set( object containing the fields required to create a score calibration. Must include a non-empty score_set_urn. user (User): Authenticated user information used for auditing - variant_classes (Optional[dict[str, list[str]]]): Optional dictionary mapping variant classes + variant_classes (Optional[ClassificationDict]): Optional dictionary mapping variant classes to their corresponding variant identifiers. Returns: @@ -259,7 +269,7 @@ async def create_score_calibration( db: Session, calibration_create: score_calibration.ScoreCalibrationCreate, user: User, - variant_classes: Optional[dict[str, list[str]]] = None, + variant_classes: Optional[ClassificationDict] = None, ) -> ScoreCalibration: """ Asynchronously create and persist a new ScoreCalibration record. @@ -277,7 +287,7 @@ async def create_score_calibration( score set identifiers). user : User Authenticated user context; the user to be recorded for audit - variant_classes (Optional[dict[str, list[str]]]): Optional dictionary mapping variant classes + variant_classes (Optional[ClassificationDict]): Optional dictionary mapping variant classes to their corresponding variant identifiers. Returns @@ -323,7 +333,7 @@ async def modify_score_calibration( calibration: ScoreCalibration, calibration_update: score_calibration.ScoreCalibrationModify, user: User, - variant_classes: Optional[dict[str, list[str]]] = None, + variant_classes: Optional[ClassificationDict] = None, ) -> ScoreCalibration: """ Asynchronously modify an existing ScoreCalibration record and its related publication @@ -360,7 +370,7 @@ async def modify_score_calibration( - Additional mutable calibration attributes. user : User Context for the authenticated user; the user to be recorded for audit. - variant_classes (Optional[dict[str, list[str]]]): Optional dictionary mapping variant classes + variant_classes (Optional[ClassificationDict]): Optional dictionary mapping variant classes to their corresponding variant identifiers. Returns @@ -645,7 +655,7 @@ def delete_score_calibration(db: Session, calibration: ScoreCalibration) -> None def variants_for_functional_classification( db: Session, functional_classification: ScoreCalibrationFunctionalClassification, - variant_classes: Optional[dict[str, list[str]]] = None, + variant_classes: Optional[ClassificationDict] = None, use_sql: bool = False, ) -> list[Variant]: """ @@ -664,7 +674,7 @@ def variants_for_functional_classification( Active SQLAlchemy session. functional_classification : ScoreCalibrationFunctionalClassification The ORM row defining the interval to test against. - variant_classes : Optional[dict[str, list[str]]] + variant_classes : Optional[ClassificationDict] If provided, a dictionary mapping variant classes to their corresponding variant identifiers to use for classification rather than the range property of the functional_classification. use_sql : bool @@ -688,6 +698,14 @@ def variants_for_functional_classification( """ # Resolve score set id from attached calibration (relationship may be lazy) score_set_id = functional_classification.calibration.score_set_id # type: ignore[attr-defined] + + if variant_classes and variant_classes["indexed_by"] not in [ + hgvs_nt_column, + hgvs_pro_column, + calibration_variant_column_name, + ]: + raise ValueError(f"Unsupported index column `{variant_classes['indexed_by']}` for variant classification.") + if use_sql: try: # Build score extraction expression: data['score_data']['score']::text::float @@ -695,8 +713,16 @@ def variants_for_functional_classification( conditions = [Variant.score_set_id == score_set_id] if variant_classes is not None and functional_classification.class_ is not None: - variant_urns = variant_classes.get(functional_classification.class_, []) - conditions.append(Variant.urn.in_(variant_urns)) + index_element = variant_classes["classifications"].get(functional_classification.class_, set()) + + if variant_classes["indexed_by"] == hgvs_nt_column: + conditions.append(Variant.hgvs_nt.in_(index_element)) + elif variant_classes["indexed_by"] == hgvs_pro_column: + conditions.append(Variant.hgvs_pro.in_(index_element)) + elif variant_classes["indexed_by"] == calibration_variant_column_name: + conditions.append(Variant.urn.in_(index_element)) + else: # pragma: no cover + return [] elif functional_classification.range is not None and len(functional_classification.range) == 2: lower_raw, upper_raw = functional_classification.range @@ -732,9 +758,19 @@ def variants_for_functional_classification( matches: list[Variant] = [] for v in variants: if variant_classes is not None and functional_classification.class_ is not None: - variant_urns = variant_classes.get(functional_classification.class_, []) - if v.urn in variant_urns: - matches.append(v) + index_element = variant_classes["classifications"].get(functional_classification.class_, set()) + + if variant_classes["indexed_by"] == hgvs_nt_column: + if v.hgvs_nt in index_element: + matches.append(v) + elif variant_classes["indexed_by"] == hgvs_pro_column: + if v.hgvs_pro in index_element: + matches.append(v) + elif variant_classes["indexed_by"] == calibration_variant_column_name: + if v.urn in index_element: + matches.append(v) + else: # pragma: no cover + continue elif functional_classification.range is not None and len(functional_classification.range) == 2: try: @@ -759,7 +795,8 @@ def variants_for_functional_classification( def variant_classification_df_to_dict( df: pd.DataFrame, -) -> dict[str, list[str]]: + index_column: str, +) -> ClassificationDict: """ Convert a DataFrame of variant classifications into a dictionary mapping functional class labels to lists of distinct variant URNs. @@ -776,18 +813,19 @@ def variant_classification_df_to_dict( Returns ------- - dict[str, list[str]] - A dictionary where keys are functional class labels and values are lists - of distinct variant URNs belonging to each class. + ClassificationDict + A dictionary with two keys: 'indexed_by' indicating the index column name, + and 'classifications' mapping each functional class label to a list of + distinct variant URNs. """ - classification_dict: dict[str, list[str]] = {} + classifications: dict[str, set[str]] = {} for _, row in df.iterrows(): - variant_urn = row[calibration_variant_column_name] + index_element = row[index_column] functional_class = row[calibration_class_column_name] - if functional_class not in classification_dict: - classification_dict[functional_class] = [] + if functional_class not in classifications: + classifications[functional_class] = set() - classification_dict[functional_class].append(variant_urn) + classifications[functional_class].add(index_element) - return {k: list(set(v)) for k, v in classification_dict.items()} + return {"indexed_by": index_column, "classifications": classifications} diff --git a/src/mavedb/lib/types/score_calibrations.py b/src/mavedb/lib/types/score_calibrations.py new file mode 100644 index 00000000..d40edaf2 --- /dev/null +++ b/src/mavedb/lib/types/score_calibrations.py @@ -0,0 +1,6 @@ +from typing import TypedDict + + +class ClassificationDict(TypedDict): + indexed_by: str + classifications: dict[str, set[str]] diff --git a/src/mavedb/lib/validation/dataframe/calibration.py b/src/mavedb/lib/validation/dataframe/calibration.py index e31cc436..6718faa3 100644 --- a/src/mavedb/lib/validation/dataframe/calibration.py +++ b/src/mavedb/lib/validation/dataframe/calibration.py @@ -5,6 +5,8 @@ from mavedb.lib.validation.constants.general import ( calibration_class_column_name, calibration_variant_column_name, + hgvs_nt_column, + hgvs_pro_column, ) from mavedb.lib.validation.dataframe.column import validate_data_column, validate_variant_column from mavedb.lib.validation.dataframe.dataframe import standardize_dataframe, validate_no_null_rows @@ -13,7 +15,12 @@ from mavedb.models.variant import Variant from mavedb.view_models import score_calibration -STANDARD_CALIBRATION_COLUMNS = (calibration_variant_column_name, calibration_class_column_name) +STANDARD_CALIBRATION_COLUMNS = ( + calibration_variant_column_name, + calibration_class_column_name, + hgvs_nt_column, + hgvs_pro_column, +) def validate_and_standardize_calibration_classes_dataframe( @@ -21,7 +28,7 @@ def validate_and_standardize_calibration_classes_dataframe( score_set: ScoreSet, calibration: score_calibration.ScoreCalibrationCreate | score_calibration.ScoreCalibrationModify, classes_df: pd.DataFrame, -) -> pd.DataFrame: +) -> tuple[pd.DataFrame, str]: """ Validate and standardize a calibration classes dataframe for functional classification calibrations. @@ -59,22 +66,22 @@ def validate_and_standardize_calibration_classes_dataframe( validate_no_null_rows(standardized_classes_df) column_mapping = {c.lower(): c for c in standardized_classes_df.columns} - index_column = column_mapping[calibration_variant_column_name] + index_column = choose_calibration_index_column(standardized_classes_df) + + # Drop rows where the calibration class column is NA + standardized_classes_df = standardized_classes_df.dropna( + subset=[column_mapping[calibration_class_column_name]] + ).reset_index(drop=True) for c in column_mapping: - if c == calibration_variant_column_name: + if c == index_column.lower(): validate_variant_column(standardized_classes_df[c], column_mapping[c] == index_column) - validate_calibration_variant_urns(db, score_set, standardized_classes_df[c]) + validate_index_existence_in_score_set(db, score_set, standardized_classes_df[c], index_column) elif c == calibration_class_column_name: validate_data_column(standardized_classes_df[c], force_numeric=False) validate_calibration_classes(calibration, standardized_classes_df[c]) - # handle unexpected columns. These should have already been caught by - # validate_calibration_df_column_names, but we include this for completeness. - else: # pragma: no cover - raise ValidationError(f"unexpected column in calibration classes file: '{c}'") - - return standardized_classes_df + return standardized_classes_df, index_column def validate_calibration_df_column_names(df: pd.DataFrame) -> None: @@ -113,21 +120,20 @@ def validate_calibration_df_column_names(df: pd.DataFrame) -> None: columns = [c.lower() for c in df.columns] - if calibration_variant_column_name not in columns: - raise ValidationError(f"missing required column: '{calibration_variant_column_name}'") - if calibration_class_column_name not in columns: raise ValidationError(f"missing required column: '{calibration_class_column_name}'") - if set(STANDARD_CALIBRATION_COLUMNS) != set(columns): + if set(columns).isdisjoint({hgvs_nt_column, hgvs_pro_column, calibration_variant_column_name}): raise ValidationError( - f"unexpected column(s) in calibration classes file: {', '.join(sorted(set(columns) - set(STANDARD_CALIBRATION_COLUMNS)))}" + f"at least one of {', '.join({hgvs_nt_column, hgvs_pro_column, calibration_variant_column_name})} must be present" ) -def validate_calibration_variant_urns(db: Session, score_set: ScoreSet, variant_urns: pd.Series) -> None: +def validate_index_existence_in_score_set( + db: Session, score_set: ScoreSet, index_column: pd.Series, index_column_name: str +) -> None: """ - Validate that all provided variant URNs exist in the given score set. + Validate that all provided resources in the index column exist in the given score set. Args: db (Session): Database session for querying variants. @@ -140,19 +146,65 @@ def validate_calibration_variant_urns(db: Session, score_set: ScoreSet, variant_ Returns: None: Function returns nothing if validation passes. """ - existing_variant_urns = set( - db.scalars( - select(Variant.urn).where(Variant.score_set_id == score_set.id, Variant.urn.in_(variant_urns.tolist())) - ).all() - ) - - missing_variant_urns = set(variant_urns.tolist()) - existing_variant_urns - if missing_variant_urns: + if index_column_name.lower() == calibration_variant_column_name: + existing_resources = set( + db.scalars( + select(Variant.urn).where(Variant.score_set_id == score_set.id, Variant.urn.in_(index_column.tolist())) + ).all() + ) + elif index_column_name.lower() == hgvs_nt_column: + existing_resources = set( + db.scalars( + select(Variant.hgvs_nt).where( + Variant.score_set_id == score_set.id, Variant.hgvs_nt.in_(index_column.tolist()) + ) + ).all() + ) + elif index_column_name.lower() == hgvs_pro_column: + existing_resources = set( + db.scalars( + select(Variant.hgvs_pro).where( + Variant.score_set_id == score_set.id, Variant.hgvs_pro.in_(index_column.tolist()) + ) + ).all() + ) + + missing_resources = set(index_column.tolist()) - existing_resources + if missing_resources: raise ValidationError( - f"The following variant URNs do not exist in the score set: {', '.join(sorted(missing_variant_urns))}" + f"The following resources do not exist in the score set: {', '.join(sorted(missing_resources))}" ) +def choose_calibration_index_column(df: pd.DataFrame) -> str: + """ + Choose the appropriate index column for a calibration DataFrame. + + This function selects the index column based on the presence of specific columns + in the DataFrame. It prioritizes the calibration variant column, followed by + HGVS notation columns. + + Args: + df (pd.DataFrame): The DataFrame from which to choose the index column. + + Returns: + str: The name of the chosen index column. + + Raises: + ValidationError: If no valid index column is found in the DataFrame. + """ + column_mapping = {c.lower(): c for c in df.columns if not df[c].isna().all()} + + if calibration_variant_column_name in column_mapping: + return column_mapping[calibration_variant_column_name] + elif hgvs_nt_column in column_mapping: + return column_mapping[hgvs_nt_column] + elif hgvs_pro_column in column_mapping: + return column_mapping[hgvs_pro_column] + else: + raise ValidationError("failed to find valid calibration index column") + + def validate_calibration_classes( calibration: score_calibration.ScoreCalibrationCreate | score_calibration.ScoreCalibrationModify, classes: pd.Series ) -> None: diff --git a/src/mavedb/routers/score_calibrations.py b/src/mavedb/routers/score_calibrations.py index a0a0dd31..1121e083 100644 --- a/src/mavedb/routers/score_calibrations.py +++ b/src/mavedb/routers/score_calibrations.py @@ -304,10 +304,10 @@ async def create_score_calibration_route( ) try: - standardized_classes_df = validate_and_standardize_calibration_classes_dataframe( + standardized_classes_df, index_column = validate_and_standardize_calibration_classes_dataframe( db, score_set, calibration, classes_df ) - variant_classes = variant_classification_df_to_dict(standardized_classes_df) + variant_classes = variant_classification_df_to_dict(standardized_classes_df, index_column) except ValidationError as e: raise HTTPException( status_code=422, @@ -425,9 +425,9 @@ async def modify_score_calibration_route( # If the user supplies a new score_set_urn, validate it exists and the user has permission to use it. if calibration_update.score_set_urn is not None: - score_set = db.query(ScoreSet).filter(ScoreSet.urn == calibration_update.score_set_urn).one_or_none() + score_set_update = db.query(ScoreSet).filter(ScoreSet.urn == calibration_update.score_set_urn).one_or_none() - if not score_set: + if not score_set_update: logger.debug("ScoreSet not found", extra=logging_context()) raise HTTPException( status_code=404, detail=f"score set with URN '{calibration_update.score_set_urn}' not found" @@ -435,7 +435,9 @@ async def modify_score_calibration_route( # TODO#539: Allow any authenticated user to upload a score calibration for a score set, not just those with # permission to update the score set itself. - assert_permission(user_data, score_set, Action.UPDATE) + assert_permission(user_data, score_set_update, Action.UPDATE) + else: + score_set_update = None item = ( db.query(ScoreCalibration) @@ -448,6 +450,7 @@ async def modify_score_calibration_route( raise HTTPException(status_code=404, detail="The requested score calibration does not exist") assert_permission(user_data, item, Action.UPDATE) + score_set = score_set_update or item.score_set if calibration_update.class_based and not classes_file: raise HTTPException( @@ -470,10 +473,10 @@ async def modify_score_calibration_route( ) try: - standardized_classes_df = validate_and_standardize_calibration_classes_dataframe( + standardized_classes_df, index_column = validate_and_standardize_calibration_classes_dataframe( db, score_set, calibration_update, classes_df ) - variant_classes = variant_classification_df_to_dict(standardized_classes_df) + variant_classes = variant_classification_df_to_dict(standardized_classes_df, index_column) except ValidationError as e: raise HTTPException( status_code=422, diff --git a/tests/lib/test_score_calibrations.py b/tests/lib/test_score_calibrations.py index 110633ab..ad6bb0ea 100644 --- a/tests/lib/test_score_calibrations.py +++ b/tests/lib/test_score_calibrations.py @@ -23,7 +23,12 @@ variant_classification_df_to_dict, variants_for_functional_classification, ) -from mavedb.lib.validation.constants.general import calibration_class_column_name, calibration_variant_column_name +from mavedb.lib.validation.constants.general import ( + calibration_class_column_name, + calibration_variant_column_name, + hgvs_nt_column, + hgvs_pro_column, +) from mavedb.models.enums.score_calibration_relation import ScoreCalibrationRelation from mavedb.models.score_calibration import ScoreCalibration from mavedb.models.score_calibration_functional_classification import ScoreCalibrationFunctionalClassification @@ -182,8 +187,11 @@ def test_create_functional_classification_with_variant_classes(setup_lib_db, ses functional_range_create, calibration, variant_classes={ - "pathogenic": ["variant_urn_1", "variant_urn_2"], - "benign": ["variant_urn_3"], + "indexed_by": calibration_variant_column_name, + "classifications": { + "pathogenic": ["variant_urn_1", "variant_urn_2"], + "benign": ["variant_urn_3"], + }, }, ) @@ -1768,9 +1776,47 @@ def test_variants_for_functional_classification_returns_empty_list_when_range_is @pytest.mark.parametrize( - "range_,class_,variant_classes", + "use_sql", + [True, False], +) +def test_variants_for_functional_classification_raises_error_when_index_column_not_found( + setup_lib_db, session, use_sql +): + mock_calibration = mock.Mock(spec=ScoreCalibration) + mock_calibration.score_set_id = 1 + mock_functional_calibration = mock.Mock(spec=ScoreCalibrationFunctionalClassification) + mock_functional_calibration.range = None + mock_functional_calibration.class_ = "benign" + mock_functional_calibration.calibration = mock_calibration + + variant_classes = pd.DataFrame( + { + "some_other_column": [ + "urn:mavedb:variant-1", + "urn:mavedb:variant-2", + "urn:mavedb:variant-3", + ], + calibration_class_column_name: [ + "pathogenic", + "benign", + "pathogenic", + ], + } + ) + + with pytest.raises(ValueError, match="Unsupported index column `some_other_column` for variant classification."): + variants_for_functional_classification( + session, + mock_functional_calibration, + variant_classes=variant_classification_df_to_dict(variant_classes, index_column="some_other_column"), + use_sql=use_sql, + ) + + +@pytest.mark.parametrize( + "range_,class_,variant_classes,index_column", [ - ([1.0, 2.0], None, None), + ([1.0, 2.0], None, None, None), ( None, "benign", @@ -1788,26 +1834,71 @@ def test_variants_for_functional_classification_returns_empty_list_when_range_is ], } ), + calibration_variant_column_name, + ), + ( + None, + "benign", + pd.DataFrame( + { + hgvs_nt_column: [ + "NC_000001.11:g.1000A>T", + "NC_000001.11:g.1001G>C", + "NC_000001.11:g.1002T>A", + ], + calibration_class_column_name: [ + "pathogenic", + "benign", + "pathogenic", + ], + } + ), + hgvs_nt_column, + ), + ( + None, + "benign", + pd.DataFrame( + { + hgvs_pro_column: [ + "NP_000000.1:p.Lys100Asn", + "NP_000000.1:p.Gly101Arg", + "NP_000000.1:p.Ser102Thr", + ], + calibration_class_column_name: [ + "pathogenic", + "benign", + "pathogenic", + ], + } + ), + hgvs_pro_column, ), ], ) def test_variants_for_functional_classification_python_filtering_with_valid_variants( - setup_lib_db_with_score_set, session, range_, class_, variant_classes + setup_lib_db_with_score_set, session, range_, class_, variant_classes, index_column ): variant_1 = Variant( data={"score_data": {"score": 0.5}}, score_set_id=setup_lib_db_with_score_set.id, urn="urn:mavedb:variant-1", + hgvs_nt="NC_000001.11:g.1000A>T", + hgvs_pro="NP_000000.1:p.Lys100Asn", ) variant_2 = Variant( data={"score_data": {"score": 1.5}}, score_set_id=setup_lib_db_with_score_set.id, urn="urn:mavedb:variant-2", + hgvs_nt="NC_000001.11:g.1001G>C", + hgvs_pro="NP_000000.1:p.Gly101Arg", ) variant_3 = Variant( data={"score_data": {"score": 2.5}}, score_set_id=setup_lib_db_with_score_set.id, urn="urn:mavedb:variant-3", + hgvs_nt="NC_000001.11:g.1002T>A", + hgvs_pro="NP_000000.1:p.Ser102Thr", ) session.add_all([variant_1, variant_2, variant_3]) @@ -1824,7 +1915,9 @@ def test_variants_for_functional_classification_python_filtering_with_valid_vari result = variants_for_functional_classification( session, mock_functional_classification, - variant_classes=variant_classification_df_to_dict(variant_classes) if variant_classes is not None else None, + variant_classes=variant_classification_df_to_dict(variant_classes, index_column) + if variant_classes is not None + else None, use_sql=False, ) @@ -2061,9 +2154,9 @@ def test_variants_for_functional_classification_python_filtering_skips_variants_ [True, False], ) @pytest.mark.parametrize( - "range_,class_,variant_classes", + "range_,class_,variant_classes,index_column", [ - ([1.0, 2.0], None, None), + ([1.0, 2.0], None, None, None), ( None, "benign", @@ -2085,20 +2178,83 @@ def test_variants_for_functional_classification_python_filtering_skips_variants_ ], } ), + calibration_variant_column_name, + ), + ( + None, + "benign", + pd.DataFrame( + { + hgvs_nt_column: [ + "NC_000001.11:g.1000A>T", + "NC_000001.11:g.1001G>C", + "NC_000001.11:g.1002T>A", + "NC_000001.11:g.1003C>G", + "NC_000001.11:g.1004G>A", + ], + calibration_class_column_name: [ + "pathogenic", + "benign", + "benign", + "benign", + "pathogenic", + ], + } + ), + hgvs_nt_column, + ), + ( + None, + "benign", + pd.DataFrame( + { + hgvs_pro_column: [ + "NP_000000.1:p.Lys100Asn", + "NP_000000.1:p.Gly101Arg", + "NP_000000.1:p.Ser102Thr", + "NP_000000.1:p.Ala103Pro", + "NP_000000.1:p.Val104Met", + ], + calibration_class_column_name: [ + "pathogenic", + "benign", + "benign", + "benign", + "pathogenic", + ], + } + ), + hgvs_pro_column, ), ], ) def test_variants_for_functional_classification_filters_by_conditions( - setup_lib_db_with_score_set, session, use_sql, range_, class_, variant_classes + setup_lib_db_with_score_set, session, use_sql, range_, class_, variant_classes, index_column ): # Create variants with different scores variants = [] scores = [0.5, 1.0, 1.5, 2.0, 2.5] + hgvs_nts = [ + "NC_000001.11:g.1000A>T", + "NC_000001.11:g.1001G>C", + "NC_000001.11:g.1002T>A", + "NC_000001.11:g.1003C>G", + "NC_000001.11:g.1004G>A", + ] + hgvs_pros = [ + "NP_000000.1:p.Lys100Asn", + "NP_000000.1:p.Gly101Arg", + "NP_000000.1:p.Ser102Thr", + "NP_000000.1:p.Ala103Pro", + "NP_000000.1:p.Val104Met", + ] for i, score in enumerate(scores, 1): variant = Variant( data={"score_data": {"score": score}}, score_set_id=setup_lib_db_with_score_set.id, urn=f"urn:mavedb:variant-{i}", + hgvs_nt=hgvs_nts[i - 1], + hgvs_pro=hgvs_pros[i - 1], ) variants.append(variant) @@ -2119,7 +2275,9 @@ def test_variants_for_functional_classification_filters_by_conditions( result = variants_for_functional_classification( session, mock_functional_classification, - variant_classes=variant_classification_df_to_dict(variant_classes) if variant_classes is not None else None, + variant_classes=variant_classification_df_to_dict(variant_classes, index_column) + if variant_classes is not None + else None, use_sql=use_sql, ) @@ -2130,9 +2288,9 @@ def test_variants_for_functional_classification_filters_by_conditions( @pytest.mark.parametrize( - "range_,class_,variant_classes", + "range_,class_,variant_classes,index_column", [ - ([1.0, 2.0], None, None), + ([1.0, 2.0], None, None, None), ( None, "benign", @@ -2150,17 +2308,58 @@ def test_variants_for_functional_classification_filters_by_conditions( ], } ), + calibration_variant_column_name, + ), + ( + None, + "benign", + pd.DataFrame( + { + hgvs_nt_column: [ + "NC_000001.11:g.1000A>T", + "NC_000001.11:g.1001G>C", + "NC_000001.11:g.1002T>A", + ], + calibration_class_column_name: [ + "benign", + "pathogenic", + "pathogenic", + ], + } + ), + hgvs_nt_column, + ), + ( + None, + "benign", + pd.DataFrame( + { + hgvs_pro_column: [ + "NP_000000.1:p.Lys100Asn", + "NP_000000.1:p.Gly101Arg", + "NP_000000.1:p.Ser102Thr", + ], + calibration_class_column_name: [ + "benign", + "pathogenic", + "pathogenic", + ], + } + ), + hgvs_pro_column, ), ], ) def test_variants_for_functional_classification_sql_fallback_on_exception( - setup_lib_db_with_score_set, session, range_, class_, variant_classes + setup_lib_db_with_score_set, session, range_, class_, variant_classes, index_column ): # Create a variant variant = Variant( data={"score_data": {"score": 1.5}}, score_set_id=setup_lib_db_with_score_set.id, urn="urn:mavedb:variant-1", + hgvs_nt="NC_000001.11:g.1000A>T", + hgvs_pro="NP_000000.1:p.Lys100Asn", ) session.add(variant) session.commit() @@ -2185,7 +2384,9 @@ def test_variants_for_functional_classification_sql_fallback_on_exception( result = variants_for_functional_classification( session, mock_functional_classification, - variant_classes=variant_classification_df_to_dict(variant_classes) if variant_classes is not None else None, + variant_classes=variant_classification_df_to_dict(variant_classes, index_column) + if variant_classes is not None + else None, use_sql=True, ) mocked_execute.assert_called() @@ -2369,10 +2570,14 @@ def test_variant_classification_df_to_dict_with_single_class(): } ) - result = variant_classification_df_to_dict(df) + result = variant_classification_df_to_dict(df, calibration_variant_column_name) - expected = {"pathogenic": sorted(["var1", "var2", "var3"])} - assert {k: sorted(v) for k, v in result.items()} == expected + expected = { + "indexed_by": calibration_variant_column_name, + "classifications": {"pathogenic": set(["var1", "var2", "var3"])}, + } + assert result["classifications"] == expected["classifications"] + assert result["indexed_by"] == expected["indexed_by"] def test_variant_classification_df_to_dict_with_multiple_classes(): @@ -2384,44 +2589,39 @@ def test_variant_classification_df_to_dict_with_multiple_classes(): } ) - result = variant_classification_df_to_dict(df) + result = variant_classification_df_to_dict(df, calibration_variant_column_name) - expected = {"pathogenic": ["var1", "var3"], "benign": sorted(["var2", "var5"]), "uncertain": ["var4"]} - assert {k: sorted(v) for k, v in result.items()} == expected + expected = { + "indexed_by": calibration_variant_column_name, + "classifications": { + "pathogenic": set(["var1", "var3"]), + "benign": set(["var2", "var5"]), + "uncertain": set(["var4"]), + }, + } + assert result["classifications"] == expected["classifications"] + assert result["indexed_by"] == expected["indexed_by"] def test_variant_classification_df_to_dict_with_empty_dataframe(): """Test conversion with empty DataFrame.""" df = pd.DataFrame(columns=[calibration_variant_column_name, calibration_class_column_name]) - result = variant_classification_df_to_dict(df) + result = variant_classification_df_to_dict(df, calibration_variant_column_name) - assert result == {} + assert result["classifications"] == {} + assert result["indexed_by"] == calibration_variant_column_name def test_variant_classification_df_to_dict_with_single_row(): """Test conversion with DataFrame containing single row.""" df = pd.DataFrame({calibration_variant_column_name: ["var1"], calibration_class_column_name: ["pathogenic"]}) - result = variant_classification_df_to_dict(df) - - expected = {"pathogenic": ["var1"]} - assert result == expected - - -def test_variant_classification_df_to_dict_preserves_order_within_classes(): - """Test that variant order is preserved within each functional class.""" - df = pd.DataFrame( - { - calibration_variant_column_name: ["var1", "var2", "var3", "var4"], - calibration_class_column_name: ["pathogenic", "pathogenic", "benign", "pathogenic"], - } - ) - - result = variant_classification_df_to_dict(df) + result = variant_classification_df_to_dict(df, calibration_variant_column_name) - expected = {"pathogenic": sorted(["var1", "var2", "var4"]), "benign": ["var3"]} - assert {k: sorted(v) for k, v in result.items()} == expected + expected = {"indexed_by": calibration_variant_column_name, "classifications": {"pathogenic": set(["var1"])}} + assert result["classifications"] == expected["classifications"] + assert result["indexed_by"] == expected["indexed_by"] def test_variant_classification_df_to_dict_with_extra_columns(): @@ -2435,10 +2635,14 @@ def test_variant_classification_df_to_dict_with_extra_columns(): } ) - result = variant_classification_df_to_dict(df) + result = variant_classification_df_to_dict(df, calibration_variant_column_name) - expected = {"pathogenic": ["var1"], "benign": ["var2"]} - assert {k: sorted(v) for k, v in result.items()} == expected + expected = { + "indexed_by": calibration_variant_column_name, + "classifications": {"pathogenic": set(["var1"]), "benign": set(["var2"])}, + } + assert result["classifications"] == expected["classifications"] + assert result["indexed_by"] == expected["indexed_by"] def test_variant_classification_df_to_dict_with_duplicate_variants_in_same_class(): @@ -2450,10 +2654,14 @@ def test_variant_classification_df_to_dict_with_duplicate_variants_in_same_class } ) - result = variant_classification_df_to_dict(df) + result = variant_classification_df_to_dict(df, calibration_variant_column_name) - expected = {"pathogenic": ["var1"], "benign": ["var2"]} - assert {k: sorted(v) for k, v in result.items()} == expected + expected = { + "indexed_by": calibration_variant_column_name, + "classifications": {"pathogenic": set(["var1"]), "benign": set(["var2"])}, + } + assert result["classifications"] == expected["classifications"] + assert result["indexed_by"] == expected["indexed_by"] def test_variant_classification_df_to_dict_with_none_values(): @@ -2465,10 +2673,14 @@ def test_variant_classification_df_to_dict_with_none_values(): } ) - result = variant_classification_df_to_dict(df) + result = variant_classification_df_to_dict(df, calibration_variant_column_name) - expected = {"pathogenic": ["var1"], None: ["var2"], "benign": ["var3"]} - assert {k: sorted(v) for k, v in result.items()} == expected + expected = { + "indexed_by": calibration_variant_column_name, + "classifications": {"pathogenic": set(["var1"]), None: set(["var2"]), "benign": set(["var3"])}, + } + assert result["classifications"] == expected["classifications"] + assert result["indexed_by"] == expected["indexed_by"] def test_variant_classification_df_to_dict_with_numeric_classes(): @@ -2477,10 +2689,14 @@ def test_variant_classification_df_to_dict_with_numeric_classes(): {calibration_variant_column_name: ["var1", "var2", "var3"], calibration_class_column_name: [1, 2, 1]} ) - result = variant_classification_df_to_dict(df) + result = variant_classification_df_to_dict(df, calibration_variant_column_name) - expected = {1: sorted(["var1", "var3"]), 2: ["var2"]} - assert {k: sorted(v) for k, v in result.items()} == expected + expected = { + "indexed_by": calibration_variant_column_name, + "classifications": {1: set(["var1", "var3"]), 2: set(["var2"])}, + } + assert result["classifications"] == expected["classifications"] + assert result["indexed_by"] == expected["indexed_by"] def test_variant_classification_df_to_dict_with_mixed_type_classes(): @@ -2492,7 +2708,11 @@ def test_variant_classification_df_to_dict_with_mixed_type_classes(): } ) - result = variant_classification_df_to_dict(df) + result = variant_classification_df_to_dict(df, calibration_variant_column_name) - expected = {"pathogenic": ["var1"], 1: sorted(["var2", "var4"]), "benign": ["var3"]} - assert {k: sorted(v) for k, v in result.items()} == expected + expected = { + "indexed_by": calibration_variant_column_name, + "classifications": {"pathogenic": set(["var1"]), 1: set(["var2", "var4"]), "benign": set(["var3"])}, + } + assert result["classifications"] == expected["classifications"] + assert result["indexed_by"] == expected["indexed_by"] diff --git a/tests/routers/data/calibration_classes_by_hgvs_nt.csv b/tests/routers/data/calibration_classes_by_hgvs_nt.csv new file mode 100644 index 00000000..07025f44 --- /dev/null +++ b/tests/routers/data/calibration_classes_by_hgvs_nt.csv @@ -0,0 +1,4 @@ +hgvs_nt,class_name +c.1A>T,normal_class +c.2C>T,abnormal_class +c.6T>A,not_specified_class \ No newline at end of file diff --git a/tests/routers/data/calibration_classes_by_hgvs_prot.csv b/tests/routers/data/calibration_classes_by_hgvs_prot.csv new file mode 100644 index 00000000..0a948cb8 --- /dev/null +++ b/tests/routers/data/calibration_classes_by_hgvs_prot.csv @@ -0,0 +1,4 @@ +hgvs_pro,class_name +p.Thr1Ser,normal_class +p.Thr1Met,abnormal_class +p.Phe2Leu,not_specified_class \ No newline at end of file diff --git a/tests/routers/data/calibration_classes.csv b/tests/routers/data/calibration_classes_by_urn.csv similarity index 100% rename from tests/routers/data/calibration_classes.csv rename to tests/routers/data/calibration_classes_by_urn.csv diff --git a/tests/routers/test_score_calibrations.py b/tests/routers/test_score_calibrations.py index 1f8d0792..8cdbeefe 100644 --- a/tests/routers/test_score_calibrations.py +++ b/tests/routers/test_score_calibrations.py @@ -2,6 +2,8 @@ import pytest +from mavedb.lib.validation.exceptions import ValidationError + arq = pytest.importorskip("arq") cdot = pytest.importorskip("cdot") fastapi = pytest.importorskip("fastapi") @@ -1234,6 +1236,96 @@ def test_cannot_create_score_calibration_when_score_set_does_not_exist(client, s assert "score set with URN 'urn:ngs:score-set:nonexistent' not found" in error["detail"] +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ] + ], + indirect=["mock_publication_fetch"], +) +def test_cannot_create_score_calibration_when_csv_file_fails_decoding( + client, setup_router_db, session, data_provider, data_files, mock_publication_fetch +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + + calibration_csv_path = data_files / "calibration_classes_by_urn.csv" + with ( + open(calibration_csv_path, "rb") as class_file, + patch( + "mavedb.routers.score_calibrations.csv_data_to_df", + side_effect=UnicodeDecodeError("utf-8", b"", 0, 1, "invalid start byte"), + ), + ): + response = client.post( + "/api/v1/score-calibrations", + files={"classes_file": (calibration_csv_path.name, class_file, "text/csv")}, + data={ + "calibration_json": json.dumps( + {"scoreSetUrn": score_set["urn"], **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED)} + ), + }, + ) + + assert response.status_code == 400 + error = response.json() + assert "Error decoding file:" in str(error["detail"]) + + +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ] + ], + indirect=["mock_publication_fetch"], +) +def test_cannot_create_score_calibration_when_validation_error_is_raised_from_score_calibration_file_standardization( + client, setup_router_db, session, data_provider, data_files, mock_publication_fetch +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + + calibration_csv_path = data_files / "calibration_classes_by_urn.csv" + with ( + open(calibration_csv_path, "rb") as class_file, + patch( + "mavedb.routers.score_calibrations.validate_and_standardize_calibration_classes_dataframe", + side_effect=ValidationError("Test validation error"), + ), + ): + response = client.post( + "/api/v1/score-calibrations", + files={"classes_file": (calibration_csv_path.name, class_file, "text/csv")}, + data={ + "calibration_json": json.dumps( + {"scoreSetUrn": score_set["urn"], **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED)} + ), + }, + ) + + assert response.status_code == 422 + error = response.json() + assert "Test validation error" in str(error["detail"][0]["msg"]) + + @pytest.mark.parametrize( "mock_publication_fetch", [ @@ -1354,8 +1446,12 @@ def test_cannot_create_class_based_score_calibration_without_classes_file( ], indirect=["mock_publication_fetch"], ) +@pytest.mark.parametrize( + "calibration_csv_path", + ["calibration_classes_by_urn.csv", "calibration_classes_by_hgvs_nt.csv", "calibration_classes_by_hgvs_prot.csv"], +) def test_cannot_create_range_based_score_calibration_with_classes_file( - client, setup_router_db, mock_publication_fetch, session, data_provider, data_files + client, setup_router_db, mock_publication_fetch, session, data_provider, data_files, calibration_csv_path ): experiment = create_experiment(client) score_set = create_seq_score_set_with_mapped_variants( @@ -1366,7 +1462,7 @@ def test_cannot_create_range_based_score_calibration_with_classes_file( data_files / "scores.csv", ) - classification_csv_path = data_files / "calibration_classes.csv" + classification_csv_path = data_files / calibration_csv_path with open(classification_csv_path, "rb") as class_file: response = client.post( "/api/v1/score-calibrations", @@ -1585,8 +1681,12 @@ def test_can_create_score_calibration_as_admin_user( ], indirect=["mock_publication_fetch"], ) +@pytest.mark.parametrize( + "calibration_csv_path", + ["calibration_classes_by_urn.csv", "calibration_classes_by_hgvs_nt.csv", "calibration_classes_by_hgvs_prot.csv"], +) def test_can_create_class_based_score_calibration_form( - client, setup_router_db, mock_publication_fetch, session, data_provider, data_files + client, setup_router_db, mock_publication_fetch, session, data_provider, data_files, calibration_csv_path ): experiment = create_experiment(client) score_set = create_seq_score_set_with_mapped_variants( @@ -1599,7 +1699,7 @@ def test_can_create_class_based_score_calibration_form( with patch.object(ArqRedis, "enqueue_job", return_value=None): score_set = publish_score_set(client, score_set["urn"]) - classification_csv_path = data_files / "calibration_classes.csv" + classification_csv_path = data_files / calibration_csv_path with open(classification_csv_path, "rb") as class_file: response = client.post( "/api/v1/score-calibrations", @@ -1611,8 +1711,6 @@ def test_can_create_class_based_score_calibration_form( }, ) - print(response.text) - assert response.status_code == 200 calibration_response = response.json() assert calibration_response["scoreSetUrn"] == score_set["urn"] @@ -1700,6 +1798,108 @@ def test_cannot_update_score_calibration_when_calibration_not_exists( assert "The requested score calibration does not exist" in error["detail"] +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ] + ], + indirect=["mock_publication_fetch"], +) +def test_cannot_update_score_calibration_when_csv_file_fails_decoding( + client, setup_router_db, session, data_provider, data_files, mock_publication_fetch +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + calibration = create_test_score_calibration_in_score_set_via_client( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) + + calibration_csv_path = data_files / "calibration_classes_by_urn.csv" + with ( + open(calibration_csv_path, "rb") as class_file, + patch( + "mavedb.routers.score_calibrations.csv_data_to_df", + side_effect=UnicodeDecodeError("utf-8", b"", 0, 1, "invalid start byte"), + ), + ): + response = client.put( + f"/api/v1/score-calibrations/{calibration['urn']}", + files={"classes_file": (calibration_csv_path.name, class_file, "text/csv")}, + data={ + "calibration_json": json.dumps( + { + "scoreSetUrn": score_set["urn"], + **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED), + } + ), + }, + ) + + assert response.status_code == 400 + error = response.json() + assert "Error decoding file:" in str(error["detail"]) + + +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ] + ], + indirect=["mock_publication_fetch"], +) +def test_cannot_update_score_calibration_when_validation_error_is_raised_from_score_calibration_file_standardization( + client, setup_router_db, session, data_provider, data_files, mock_publication_fetch +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + calibration = create_test_score_calibration_in_score_set_via_client( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) + + calibration_csv_path = data_files / "calibration_classes_by_urn.csv" + with ( + open(calibration_csv_path, "rb") as class_file, + patch( + "mavedb.routers.score_calibrations.validate_and_standardize_calibration_classes_dataframe", + side_effect=ValidationError("Test validation error"), + ), + ): + response = client.put( + f"/api/v1/score-calibrations/{calibration['urn']}", + files={"classes_file": (calibration_csv_path.name, class_file, "text/csv")}, + data={ + "calibration_json": json.dumps( + { + "scoreSetUrn": score_set["urn"], + **deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED), + } + ), + }, + ) + + assert response.status_code == 422 + error = response.json() + assert "Test validation error" in str(error["detail"][0]["msg"]) + + @pytest.mark.parametrize( "mock_publication_fetch", [ @@ -1748,8 +1948,12 @@ def test_cannot_update_class_based_score_calibration_without_class_file( ], indirect=["mock_publication_fetch"], ) +@pytest.mark.parametrize( + "calibration_csv_path", + ["calibration_classes_by_urn.csv", "calibration_classes_by_hgvs_nt.csv", "calibration_classes_by_hgvs_prot.csv"], +) def test_cannot_update_range_based_score_calibration_with_class_file( - client, setup_router_db, mock_publication_fetch, session, data_provider, data_files + client, setup_router_db, mock_publication_fetch, session, data_provider, data_files, calibration_csv_path ): experiment = create_experiment(client) score_set = create_seq_score_set_with_mapped_variants( @@ -1763,7 +1967,7 @@ def test_cannot_update_range_based_score_calibration_with_class_file( client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) - classification_csv_path = data_files / "calibration_classes.csv" + classification_csv_path = data_files / calibration_csv_path with open(classification_csv_path, "rb") as class_file: response = client.put( f"/api/v1/score-calibrations/{calibration['urn']}", @@ -2443,8 +2647,12 @@ def test_admin_user_may_move_calibration_to_another_score_set( ], indirect=["mock_publication_fetch"], ) +@pytest.mark.parametrize( + "calibration_csv_path", + ["calibration_classes_by_urn.csv", "calibration_classes_by_hgvs_nt.csv", "calibration_classes_by_hgvs_prot.csv"], +) def test_can_modify_score_calibration_to_class_based( - client, setup_router_db, mock_publication_fetch, session, data_provider, data_files + client, setup_router_db, mock_publication_fetch, session, data_provider, data_files, calibration_csv_path ): experiment = create_experiment(client) score_set = create_seq_score_set_with_mapped_variants( @@ -2461,7 +2669,7 @@ def test_can_modify_score_calibration_to_class_based( client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) ) - classification_csv_path = data_files / "calibration_classes.csv" + classification_csv_path = data_files / calibration_csv_path updated_calibration_data = deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED) with open(classification_csv_path, "rb") as class_file: diff --git a/tests/validation/dataframe/test_calibration.py b/tests/validation/dataframe/test_calibration.py index 241abee4..4e7f2946 100644 --- a/tests/validation/dataframe/test_calibration.py +++ b/tests/validation/dataframe/test_calibration.py @@ -1,19 +1,25 @@ # ruff: noqa: E402 -from unittest.mock import Mock, patch - import pytest pytest.importorskip("psycopg2") +from unittest.mock import Mock, patch + import pandas as pd -from mavedb.lib.validation.constants.general import calibration_class_column_name, calibration_variant_column_name +from mavedb.lib.validation.constants.general import ( + calibration_class_column_name, + calibration_variant_column_name, + hgvs_nt_column, + hgvs_pro_column, +) from mavedb.lib.validation.dataframe.calibration import ( + choose_calibration_index_column, validate_and_standardize_calibration_classes_dataframe, validate_calibration_classes, validate_calibration_df_column_names, - validate_calibration_variant_urns, + validate_index_existence_in_score_set, ) from mavedb.lib.validation.exceptions import ValidationError from mavedb.view_models import score_calibration @@ -69,7 +75,7 @@ def test_validate_and_standardize_calibration_classes_dataframe_success(self, mo mock_classification2.class_ = "B" mock_calibration.functional_classifications = [mock_classification1, mock_classification2] - result = validate_and_standardize_calibration_classes_dataframe( + result, index_column = validate_and_standardize_calibration_classes_dataframe( mock_db, mock_score_set, mock_calibration, input_df ) @@ -93,19 +99,22 @@ def test_validate_and_standardize_calibration_classes_dataframe_not_class_based( ): validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) - def test_validate_and_standardize_calibration_classes_dataframe_invalid_column_names(self, mock_dependencies): + def test_validate_and_standardize_calibration_classes_dataframe_missing_index_columns(self, mock_dependencies): """Test ValidationError when column validation fails.""" mock_db = Mock() mock_score_set = Mock() mock_calibration = Mock() mock_calibration.class_based = True - input_df = pd.DataFrame({calibration_variant_column_name: ["var1"], "invalid": ["A"]}) - standardized_df = pd.DataFrame({calibration_variant_column_name: ["var1"], "invalid": ["A"]}) + input_df = pd.DataFrame({calibration_class_column_name: ["c"], "invalid": ["A"]}) + standardized_df = pd.DataFrame({calibration_class_column_name: ["c"], "invalid": ["A"]}) mock_dependencies["standardize_dataframe"].return_value = standardized_df - with pytest.raises(ValidationError, match=f"missing required column: '{calibration_class_column_name}'"): + with pytest.raises( + ValidationError, + match=f"at least one of {', '.join({hgvs_nt_column, hgvs_pro_column, calibration_variant_column_name})} must be present", + ): validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) def test_validate_and_standardize_calibration_classes_dataframe_null_rows(self, mock_dependencies): @@ -126,6 +135,52 @@ def test_validate_and_standardize_calibration_classes_dataframe_null_rows(self, with pytest.raises(ValidationError, match="null rows detected"): validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) + def test_validate_and_standardize_calibration_classes_dataframe_drops_null_class_rows(self, mock_dependencies): + """Test that rows with null calibration class are dropped.""" + mock_db = Mock() + mock_score_set = Mock() + mock_score_set.id = 123 + mock_calibration = Mock() + mock_calibration.class_based = True + + input_df = pd.DataFrame( + { + calibration_variant_column_name: ["var1", "var2", "var3", "var4"], + calibration_class_column_name: ["A", None, "B", pd.NA], + } + ) + standardized_df = pd.DataFrame( + { + calibration_variant_column_name: ["var1", "var2", "var3", "var4"], + calibration_class_column_name: ["A", None, "B", pd.NA], + } + ) + + mock_dependencies["standardize_dataframe"].return_value = standardized_df + + mock_scalars = Mock() + mock_scalars.all.return_value = ["var1", "var3"] + mock_db.scalars.return_value = mock_scalars + + mock_classification1 = Mock() + mock_classification1.class_ = "A" + mock_classification2 = Mock() + mock_classification2.class_ = "B" + mock_calibration.functional_classifications = [mock_classification1, mock_classification2] + + result, index_column = validate_and_standardize_calibration_classes_dataframe( + mock_db, mock_score_set, mock_calibration, input_df + ) + + expected_df = pd.DataFrame( + { + calibration_variant_column_name: ["var1", "var3"], + calibration_class_column_name: ["A", "B"], + } + ) + + assert result.equals(expected_df) + def test_validate_and_standardize_calibration_classes_dataframe_invalid_variants(self, mock_dependencies): """Test ValidationError when variant URN validation fails.""" mock_db = Mock() @@ -145,7 +200,7 @@ def test_validate_and_standardize_calibration_classes_dataframe_invalid_variants mock_scalars.all.return_value = [] mock_db.scalars.return_value = mock_scalars - with pytest.raises(ValidationError, match="The following variant URNs do not exist in the score set: var1"): + with pytest.raises(ValidationError, match="The following resources do not exist in the score set: var1"): validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) def test_validate_and_standardize_calibration_classes_dataframe_invalid_classes(self, mock_dependencies): @@ -174,35 +229,6 @@ def test_validate_and_standardize_calibration_classes_dataframe_invalid_classes( ): validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) - def test_validate_and_standardize_calibration_classes_dataframe_unexpected_column(self, mock_dependencies): - """Test ValidationError when unexpected column is present.""" - mock_db = Mock() - mock_score_set = Mock() - mock_calibration = Mock() - mock_calibration.class_based = True - - input_df = pd.DataFrame( - { - calibration_variant_column_name: ["var1"], - calibration_class_column_name: ["A"], - "extra1": ["X"], - "extra2": ["Y"], - } - ) - standardized_df = pd.DataFrame( - { - calibration_variant_column_name: ["var1"], - calibration_class_column_name: ["A"], - "extra1": ["X"], - "extra2": ["Y"], - } - ) - - mock_dependencies["standardize_dataframe"].return_value = standardized_df - - with pytest.raises(ValidationError, match="unexpected column\(s\) in calibration classes file: extra1, extra2"): - validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) - def test_validate_and_standardize_calibration_classes_dataframe_variant_column_validation_fails( self, mock_dependencies ): @@ -273,14 +299,11 @@ def test_validate_and_standardize_calibration_classes_dataframe_mixed_case_colum mock_classification.class_ = "A" mock_calibration.functional_classifications = [mock_classification] - result = validate_and_standardize_calibration_classes_dataframe( + result, index_column = validate_and_standardize_calibration_classes_dataframe( mock_db, mock_score_set, mock_calibration, input_df ) assert result.equals(standardized_df) - mock_dependencies["validate_data_column"].assert_called_once_with( - standardized_df[calibration_class_column_name], force_numeric=False - ) def test_validate_and_standardize_calibration_classes_dataframe_with_score_calibration_modify( self, mock_dependencies @@ -307,7 +330,7 @@ def test_validate_and_standardize_calibration_classes_dataframe_with_score_calib mock_classification.class_ = "A" mock_calibration.functional_classifications = [mock_classification] - result = validate_and_standardize_calibration_classes_dataframe( + result, index_column = validate_and_standardize_calibration_classes_dataframe( mock_db, mock_score_set, mock_calibration, input_df ) @@ -325,7 +348,7 @@ def test_validate_and_standardize_calibration_classes_dataframe_empty_dataframe( mock_dependencies["standardize_dataframe"].return_value = standardized_df - with pytest.raises(ValidationError, match=f"missing required column: '{calibration_variant_column_name}'"): + with pytest.raises(ValidationError, match=f"missing required column: '{calibration_class_column_name}'"): validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) @@ -399,7 +422,12 @@ def test_validate_calibration_df_column_names_missing_variant_column(self): df = pd.DataFrame({calibration_class_column_name: ["A", "B"], "other": ["X", "Y"]}) # Act & Assert - with pytest.raises(ValidationError, match=f"missing required column: '{calibration_variant_column_name}'"): + with pytest.raises( + ValidationError, + match="at least one of {} must be present".format( + ", ".join({hgvs_nt_column, hgvs_pro_column, calibration_variant_column_name}) + ), + ): validate_calibration_df_column_names(df) def test_validate_calibration_df_column_names_missing_class_column(self): @@ -415,21 +443,7 @@ def test_validate_calibration_df_column_names_missing_both_required_columns(self df = pd.DataFrame({"other1": ["X", "Y"], "other2": ["A", "B"]}) # Act & Assert - with pytest.raises(ValidationError, match=f"missing required column: '{calibration_variant_column_name}'"): - validate_calibration_df_column_names(df) - - def test_validate_calibration_df_column_names_unexpected_extra_columns(self): - """Test ValidationError when unexpected columns are present.""" - df = pd.DataFrame( - { - calibration_variant_column_name: ["var1", "var2"], - calibration_class_column_name: ["A", "B"], - "extra_column": ["X", "Y"], - } - ) - - # Act & Assert - with pytest.raises(ValidationError, match="unexpected column\(s\) in calibration classes file: extra_column"): + with pytest.raises(ValidationError, match=f"missing required column: '{calibration_class_column_name}'"): validate_calibration_df_column_names(df) def test_validate_calibration_df_column_names_fewer_than_expected_columns(self): @@ -459,7 +473,7 @@ def test_validate_calibration_df_column_names_empty_dataframe(self): df = pd.DataFrame() # Act & Assert - with pytest.raises(ValidationError, match=f"missing required column: '{calibration_variant_column_name}'"): + with pytest.raises(ValidationError, match=f"missing required column: '{calibration_class_column_name}'"): validate_calibration_df_column_names(df) def test_validate_calibration_df_column_names_with_numeric_and_string_mix(self): @@ -479,139 +493,306 @@ def test_validate_calibration_df_column_names_newline_in_whitespace(self): validate_calibration_df_column_names(df) -class TestValidateCalibrationVariantUrns: - """Test suite for validate_calibration_variant_urns function.""" +class TestValidateIndexExistenceInScoreSet: + """Test suite for validate_index_existence_in_score_set function.""" - def test_validate_calibration_variant_urns_success(self): + @pytest.mark.parametrize( + "index_column_name,existing_resources_return_value,index_values", + [ + ( + calibration_variant_column_name, + ["urn:variant:1", "urn:variant:2", "urn:variant:3"], + ["urn:variant:1", "urn:variant:2", "urn:variant:3"], + ), + ( + hgvs_nt_column, + ["NM_000546.5:c.215C>G", "NM_000546.5:c.743G>A"], + ["NM_000546.5:c.215C>G", "NM_000546.5:c.743G>A"], + ), + ( + hgvs_pro_column, + ["NP_000537.3:p.Arg72Pro", "NP_000537.3:p.Gly248Trp"], + ["NP_000537.3:p.Arg72Pro", "NP_000537.3:p.Gly248Trp"], + ), + ], + ) + def test_validate_index_existence_in_score_set_success( + self, index_column_name, existing_resources_return_value, index_values + ): """Test successful validation when all variant URNs exist in score set.""" mock_db = Mock() mock_scalars = Mock() - mock_scalars.all.return_value = ["urn:variant:1", "urn:variant:2", "urn:variant:3"] + mock_scalars.all.return_value = existing_resources_return_value mock_db.scalars.return_value = mock_scalars mock_score_set = Mock() mock_score_set.id = 123 - variant_urns = pd.Series(["urn:variant:1", "urn:variant:2", "urn:variant:3"]) + variant_urns = pd.Series(index_values) - validate_calibration_variant_urns(mock_db, mock_score_set, variant_urns) + validate_index_existence_in_score_set(mock_db, mock_score_set, variant_urns, index_column_name) mock_db.scalars.assert_called_once() - def test_validate_calibration_variant_urns_missing_variants(self): + @pytest.mark.parametrize( + "index_column_name,existing_resources_return_value,index_values", + [ + ( + calibration_variant_column_name, + ["urn:variant:1", "urn:variant:2"], + ["urn:variant:1", "urn:variant:2", "urn:variant:3"], + ), + ( + hgvs_nt_column, + ["NM_000546.5:c.215C>G"], + ["NM_000546.5:c.215C>G", "NM_000546.5:c.743G>A"], + ), + ( + hgvs_pro_column, + ["NP_000537.3:p.Arg72Pro"], + ["NP_000537.3:p.Arg72Pro", "NP_000537.3:p.Gly248Trp"], + ), + ], + ) + def test_validate_index_existence_in_score_set_missing_variants( + self, index_column_name, existing_resources_return_value, index_values + ): """Test ValidationError when some variant URNs don't exist in score set.""" mock_db = Mock() mock_scalars = Mock() - mock_scalars.all.return_value = ["urn:variant:1", "urn:variant:2"] + mock_scalars.all.return_value = existing_resources_return_value mock_db.scalars.return_value = mock_scalars mock_score_set = Mock() mock_score_set.id = 123 - variant_urns = pd.Series(["urn:variant:1", "urn:variant:2", "urn:variant:3"]) + variant_urns = pd.Series(index_values) # Act & Assert with pytest.raises( - ValidationError, match="The following variant URNs do not exist in the score set: urn:variant:3" + ValidationError, + match="The following resources do not exist in the score set: {}".format( + ", ".join(sorted(set(index_values) - set(existing_resources_return_value))) + ), ): - validate_calibration_variant_urns(mock_db, mock_score_set, variant_urns) - - def test_validate_calibration_variant_urns_multiple_missing_variants(self): - """Test ValidationError when multiple variant URNs don't exist in score set.""" + validate_index_existence_in_score_set( + mock_db, + mock_score_set, + variant_urns, + index_column_name, + ) + + @pytest.mark.parametrize( + "index_column_name,existing_resources_return_value,index_values", + [ + ( + calibration_variant_column_name, + ["urn:variant:1"], + ["urn:variant:1", "urn:variant:2", "urn:variant:3"], + ), + ( + hgvs_nt_column, + ["NM_000546.5:c.215C>G"], + ["NM_000546.5:c.215C>G", "NM_000546.5:c.743G>A", "NM_000546.5:c.999A>T"], + ), + ( + hgvs_pro_column, + ["NP_000537.3:p.Arg72Pro"], + ["NP_000537.3:p.Arg72Pro", "NP_000537.3:p.Gly248Trp", "NP_000537.3:p.Ser215Ile"], + ), + ], + ) + def test_validate_index_existence_in_score_set_multiple_missing_variants( + self, index_column_name, existing_resources_return_value, index_values + ): + """Test ValidationError when multiple variant resources don't exist in score set.""" mock_db = Mock() mock_scalars = Mock() - mock_scalars.all.return_value = ["urn:variant:1"] + mock_scalars.all.return_value = existing_resources_return_value mock_db.scalars.return_value = mock_scalars mock_score_set = Mock() mock_score_set.id = 456 - variant_urns = pd.Series(["urn:variant:1", "urn:variant:2", "urn:variant:3"]) + variant_urns = pd.Series(index_values) # Act & Assert with pytest.raises( ValidationError, - match="The following variant URNs do not exist in the score set: urn:variant:2, urn:variant:3", + match="The following resources do not exist in the score set: {}".format( + ", ".join(sorted(set(index_values) - set(existing_resources_return_value))) + ), ): - validate_calibration_variant_urns(mock_db, mock_score_set, variant_urns) - - def test_validate_calibration_variant_urns_all_missing(self): + validate_index_existence_in_score_set(mock_db, mock_score_set, variant_urns, index_column_name) + + @pytest.mark.parametrize( + "index_column_name,existing_resources_return_value,index_values", + [ + (calibration_variant_column_name, [], ["urn:variant:1", "urn:variant:2", "urn:variant:3"]), + (hgvs_nt_column, [], ["NM_000546.5:c.215C>G", "NM_000546.5:c.743G>A"]), + (hgvs_pro_column, [], ["NP_000537.3:p.Arg72Pro", "NP_000537.3:p.Gly248Trp"]), + ], + ) + def test_validate_index_existence_in_score_set_all_missing( + self, index_column_name, existing_resources_return_value, index_values + ): """Test ValidationError when all variant URNs are missing from score set.""" mock_db = Mock() mock_scalars = Mock() - mock_scalars.all.return_value = [] + mock_scalars.all.return_value = existing_resources_return_value mock_db.scalars.return_value = mock_scalars mock_score_set = Mock() mock_score_set.id = 789 - variant_urns = pd.Series(["urn:variant:1", "urn:variant:2"]) + variant_urns = pd.Series(index_values) # Act & Assert with pytest.raises( ValidationError, - match="The following variant URNs do not exist in the score set: urn:variant:1, urn:variant:2", + match="The following resources do not exist in the score set: {}".format( + ", ".join(sorted(set(index_values) - set(existing_resources_return_value))) + ), ): - validate_calibration_variant_urns(mock_db, mock_score_set, variant_urns) - - def test_validate_calibration_variant_urns_empty_series(self): - """Test successful validation with empty variant URNs series.""" + validate_index_existence_in_score_set(mock_db, mock_score_set, variant_urns, index_column_name) + + @pytest.mark.parametrize( + "index_column_name,existing_resources_return_value,index_values", + [ + (calibration_variant_column_name, [], []), + (hgvs_nt_column, [], []), + (hgvs_pro_column, [], []), + ], + ) + def test_validate_index_existence_in_score_set_empty_series( + self, index_column_name, existing_resources_return_value, index_values + ): + """Test successful validation with empty index resources series.""" mock_db = Mock() mock_scalars = Mock() - mock_scalars.all.return_value = [] + mock_scalars.all.return_value = existing_resources_return_value mock_db.scalars.return_value = mock_scalars mock_score_set = Mock() mock_score_set.id = 123 - variant_urns = pd.Series([], dtype=object) + variant_urns = pd.Series(index_values, dtype=object) # Act & Assert - should not raise any exception - validate_calibration_variant_urns(mock_db, mock_score_set, variant_urns) - - def test_validate_calibration_variant_urns_single_variant(self): - """Test successful validation with single variant URN.""" + validate_index_existence_in_score_set(mock_db, mock_score_set, variant_urns, index_column_name) + + @pytest.mark.parametrize( + "index_column_name,existing_resources_return_value,index_values", + [ + (calibration_variant_column_name, ["urn:variant:1"], ["urn:variant:1"]), + (hgvs_nt_column, ["NM_000546.5:c.215C>G"], ["NM_000546.5:c.215C>G"]), + (hgvs_pro_column, ["NP_000537.3:p.Arg72Pro"], ["NP_000537.3:p.Arg72Pro"]), + ], + ) + def test_validate_calibration_index_existence_single_variant( + self, index_column_name, existing_resources_return_value, index_values + ): + """Test successful validation with single index value URN.""" mock_db = Mock() mock_scalars = Mock() - mock_scalars.all.return_value = ["urn:variant:single"] + mock_scalars.all.return_value = existing_resources_return_value mock_db.scalars.return_value = mock_scalars mock_score_set = Mock() mock_score_set.id = 123 - variant_urns = pd.Series(["urn:variant:single"]) + variant_urns = pd.Series(index_values) # Act & Assert - should not raise any exception - validate_calibration_variant_urns(mock_db, mock_score_set, variant_urns) + validate_index_existence_in_score_set(mock_db, mock_score_set, variant_urns, index_column_name) - def test_validate_calibration_variant_urns_duplicate_urns_in_series(self): - """Test validation with duplicate URNs in input series.""" + @pytest.mark.parametrize( + "index_column_name,existing_resources_return_value,index_values", + [ + ( + calibration_variant_column_name, + ["urn:variant:1", "urn:variant:2"], + [ + "urn:variant:1", + "urn:variant:2", + "urn:variant:1", + "urn:variant:2", + ], + ), + ( + hgvs_nt_column, + ["NM_000546.5:c.215C>G", "NM_000546.5:c.743G>A"], + [ + "NM_000546.5:c.215C>G", + "NM_000546.5:c.743G>A", + "NM_000546.5:c.215C>G", + "NM_000546.5:c.743G>A", + ], + ), + ( + hgvs_pro_column, + ["NP_000537.3:p.Arg72Pro", "NP_000537.3:p.Gly248Trp"], + [ + "NP_000537.3:p.Arg72Pro", + "NP_000537.3:p.Gly248Trp", + "NP_000537.3:p.Arg72Pro", + "NP_000537.3:p.Gly248Trp", + ], + ), + ], + ) + def test_validate_calibration_index_existence_duplicate_values_in_series( + self, index_column_name, existing_resources_return_value, index_values + ): + """Test validation with duplicate index values in input series.""" mock_db = Mock() mock_scalars = Mock() - mock_scalars.all.return_value = ["urn:variant:1", "urn:variant:2"] + mock_scalars.all.return_value = existing_resources_return_value mock_db.scalars.return_value = mock_scalars mock_score_set = Mock() mock_score_set.id = 123 - variant_urns = pd.Series(["urn:variant:1", "urn:variant:2", "urn:variant:1", "urn:variant:2"]) + variant_urns = pd.Series(index_values) # Act & Assert - should not raise any exception - validate_calibration_variant_urns(mock_db, mock_score_set, variant_urns) + validate_index_existence_in_score_set(mock_db, mock_score_set, variant_urns, index_column_name) - def test_validate_calibration_variant_urns_database_query_parameters(self): + @pytest.mark.parametrize( + "index_column_name,existing_resources_return_value,index_values", + [ + ( + calibration_variant_column_name, + ["urn:variant:1", "urn:variant:2", "urn:variant:3"], + ["urn:variant:1", "urn:variant:2", "urn:variant:3"], + ), + ( + hgvs_nt_column, + ["NM_000546.5:c.215C>G", "NM_000546.5:c.743G>A", "NM_000546.5:c.999A>T"], + ["NM_000546.5:c.215C>G", "NM_000546.5:c.743G>A", "NM_000546.5:c.999A>T"], + ), + ( + hgvs_pro_column, + ["NP_000537.3:p.Arg72Pro", "NP_000537.3:p.Gly248Trp", "NP_000537.3:p.Ser215Ile"], + ["NP_000537.3:p.Arg72Pro", "NP_000537.3:p.Gly248Trp", "NP_000537.3:p.Ser215Ile"], + ), + ], + ) + def test_validate_calibration_index_existence_database_query_parameters( + self, index_column_name, existing_resources_return_value, index_values + ): """Test that database query is constructed with correct parameters.""" mock_db = Mock() mock_scalars = Mock() - mock_scalars.all.return_value = ["urn:variant:1", "urn:variant:2"] + mock_scalars.all.return_value = existing_resources_return_value mock_db.scalars.return_value = mock_scalars mock_score_set = Mock() mock_score_set.id = 999 - variant_urns = pd.Series(["urn:variant:1", "urn:variant:2"]) + variant_urns = pd.Series(index_values) - validate_calibration_variant_urns(mock_db, mock_score_set, variant_urns) + validate_index_existence_in_score_set(mock_db, mock_score_set, variant_urns, index_column_name) mock_db.scalars.assert_called_once() @@ -741,3 +922,89 @@ def test_validate_calibration_classes_single_class(self): classes = pd.Series(["single_class", "single_class"]) validate_calibration_classes(calibration, classes) + + +class TestChooseCalibrationIndexColumn: + """Test suite for choose_calibration_index_column function.""" + + def test_choose_variant_column_priority(self): + """Should return the variant column if present and not all NaN.""" + df = pd.DataFrame( + { + calibration_variant_column_name: ["v1", "v2"], + calibration_class_column_name: ["A", "B"], + hgvs_nt_column: [None, None], + hgvs_pro_column: [None, None], + } + ) + result = choose_calibration_index_column(df) + assert result == calibration_variant_column_name + + def test_choose_hgvs_nt_column_if_variant_missing(self): + """Should return hgvs_nt_column if variant column is missing or all NaN.""" + df = pd.DataFrame( + { + hgvs_nt_column: ["c.1A>G", "c.2T>C"], + calibration_class_column_name: ["A", "B"], + } + ) + result = choose_calibration_index_column(df) + assert result == hgvs_nt_column + + def test_choose_hgvs_pro_column_if_variant_and_nt_missing(self): + """Should return hgvs_pro_column if variant and hgvs_nt columns are missing or all NaN.""" + df = pd.DataFrame( + { + hgvs_pro_column: ["p.A1G", "p.T2C"], + calibration_class_column_name: ["A", "B"], + } + ) + result = choose_calibration_index_column(df) + assert result == hgvs_pro_column + + def test_ignores_all_nan_columns(self): + """Should ignore columns that are all NaN when choosing index column.""" + df = pd.DataFrame( + { + calibration_variant_column_name: [float("nan"), float("nan")], + hgvs_nt_column: ["c.1A>G", "c.2T>C"], + calibration_class_column_name: ["A", "B"], + } + ) + result = choose_calibration_index_column(df) + assert result == hgvs_nt_column + + def test_case_insensitive_column_names(self): + """Should handle column names in different cases.""" + df = pd.DataFrame( + { + calibration_variant_column_name.upper(): ["v1", "v2"], + calibration_class_column_name.capitalize(): ["A", "B"], + } + ) + result = choose_calibration_index_column(df) + assert result == calibration_variant_column_name.upper() + + def test_raises_if_no_valid_index_column(self): + """Should raise ValidationError if no valid index column is found.""" + df = pd.DataFrame( + { + calibration_class_column_name: ["A", "B"], + "other": ["x", "y"], + } + ) + with pytest.raises(ValidationError, match="failed to find valid calibration index column"): + choose_calibration_index_column(df) + + def test_raises_if_all_index_columns_are_nan(self): + """Should raise ValidationError if all possible index columns are all NaN.""" + df = pd.DataFrame( + { + calibration_variant_column_name: [float("nan"), float("nan")], + hgvs_nt_column: [float("nan"), float("nan")], + hgvs_pro_column: [float("nan"), float("nan")], + calibration_class_column_name: ["A", "B"], + } + ) + with pytest.raises(ValidationError, match="failed to find valid calibration index column"): + choose_calibration_index_column(df) From c4f3a3f2c312d12a7f1f9138e107975fb70c0236 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 26 Nov 2025 16:27:49 -0800 Subject: [PATCH 17/24] fixup --- src/mavedb/lib/validation/dataframe/calibration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/lib/validation/dataframe/calibration.py b/src/mavedb/lib/validation/dataframe/calibration.py index 6718faa3..d649455e 100644 --- a/src/mavedb/lib/validation/dataframe/calibration.py +++ b/src/mavedb/lib/validation/dataframe/calibration.py @@ -74,7 +74,7 @@ def validate_and_standardize_calibration_classes_dataframe( ).reset_index(drop=True) for c in column_mapping: - if c == index_column.lower(): + if c in {calibration_variant_column_name, hgvs_nt_column, hgvs_pro_column}: validate_variant_column(standardized_classes_df[c], column_mapping[c] == index_column) validate_index_existence_in_score_set(db, score_set, standardized_classes_df[c], index_column) elif c == calibration_class_column_name: From fc20f40cce24f77a147a47bd2a2e21c4e5ca2ef7 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 26 Nov 2025 16:49:02 -0800 Subject: [PATCH 18/24] feat: don't allow class-based calibrations during score set creatoin --- src/mavedb/routers/score_sets.py | 15 ++++++++++++++- tests/routers/test_score_set.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 959f9133..b9869a6d 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -1551,7 +1551,20 @@ async def create_score_set( score_calibrations: list[ScoreCalibration] = [] if item_create.score_calibrations: for calibration_create in item_create.score_calibrations: - created_calibration_item = await create_score_calibration(db, calibration_create, user_data.user) + # TODO#592: Support for class-based calibrations on score set creation + if calibration_create.class_based: + logger.info( + msg="Failed to create score set; Class-based calibrations are not supported on score set creation.", + extra=logging_context(), + ) + raise HTTPException( + status_code=409, + detail="Class-based calibrations are not supported on score set creation. Please create class-based calibrations after creating the score set.", + ) + + created_calibration_item = await create_score_calibration( + db, calibration_create, user_data.user, variant_classes=None + ) created_calibration_item.investigator_provided = True # necessarily true on score set creation score_calibrations.append(created_calibration_item) diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index a20f47fc..413f0d09 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -35,6 +35,7 @@ SAVED_PUBMED_PUBLICATION, SAVED_SHORT_EXTRA_LICENSE, TEST_BIORXIV_IDENTIFIER, + TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED, TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, TEST_CROSSREF_IDENTIFIER, TEST_GNOMAD_DATA_VERSION, @@ -234,6 +235,34 @@ def test_create_score_set_with_score_calibration(client, mock_publication_fetch, assert response.status_code == 200 +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + ( + [ + {"dbName": "PubMed", "identifier": f"{TEST_PUBMED_IDENTIFIER}"}, + {"dbName": "bioRxiv", "identifier": f"{TEST_BIORXIV_IDENTIFIER}"}, + ] + ) + ], + indirect=["mock_publication_fetch"], +) +def test_cannot_create_score_set_with_class_based_calibration(client, mock_publication_fetch, setup_router_db): + experiment = create_experiment(client) + score_set = deepcopy(TEST_MINIMAL_SEQ_SCORESET) + score_set["experimentUrn"] = experiment["urn"] + score_set.update( + { + "scoreCalibrations": [deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED)], + } + ) + + response = client.post("/api/v1/score-sets/", json=score_set) + assert response.status_code == 409 + response_data = response.json() + assert "Class-based calibrations are not supported on score set creation" in response_data["detail"] + + @pytest.mark.parametrize( "mock_publication_fetch", [ From edc2058e34ed8e48f4b67e1b721a81a5faf7d975 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 26 Nov 2025 17:07:20 -0800 Subject: [PATCH 19/24] fix: only check resource existence for index columns --- .../lib/validation/dataframe/calibration.py | 9 +++- .../validation/dataframe/test_calibration.py | 50 +++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/src/mavedb/lib/validation/dataframe/calibration.py b/src/mavedb/lib/validation/dataframe/calibration.py index d649455e..13909637 100644 --- a/src/mavedb/lib/validation/dataframe/calibration.py +++ b/src/mavedb/lib/validation/dataframe/calibration.py @@ -76,11 +76,15 @@ def validate_and_standardize_calibration_classes_dataframe( for c in column_mapping: if c in {calibration_variant_column_name, hgvs_nt_column, hgvs_pro_column}: validate_variant_column(standardized_classes_df[c], column_mapping[c] == index_column) - validate_index_existence_in_score_set(db, score_set, standardized_classes_df[c], index_column) elif c == calibration_class_column_name: validate_data_column(standardized_classes_df[c], force_numeric=False) validate_calibration_classes(calibration, standardized_classes_df[c]) + if c == index_column: + validate_index_existence_in_score_set( + db, score_set, standardized_classes_df[column_mapping[c]], column_mapping[c] + ) + return standardized_classes_df, index_column @@ -146,6 +150,9 @@ def validate_index_existence_in_score_set( Returns: None: Function returns nothing if validation passes. """ + print(index_column.tolist()) + print(index_column_name) + if index_column_name.lower() == calibration_variant_column_name: existing_resources = set( db.scalars( diff --git a/tests/validation/dataframe/test_calibration.py b/tests/validation/dataframe/test_calibration.py index 4e7f2946..6d0a0af0 100644 --- a/tests/validation/dataframe/test_calibration.py +++ b/tests/validation/dataframe/test_calibration.py @@ -36,12 +36,16 @@ def mock_dependencies(self): patch("mavedb.lib.validation.dataframe.calibration.validate_no_null_rows") as mock_validate_no_null, patch("mavedb.lib.validation.dataframe.calibration.validate_variant_column") as mock_validate_variant, patch("mavedb.lib.validation.dataframe.calibration.validate_data_column") as mock_validate_data, + patch( + "mavedb.lib.validation.dataframe.calibration.validate_index_existence_in_score_set" + ) as mock_validate_index_existence, ): yield { "standardize_dataframe": mock_standardize, "validate_no_null_rows": mock_validate_no_null, "validate_variant_column": mock_validate_variant, "validate_data_column": mock_validate_data, + "validate_index_existence_in_score_set": mock_validate_index_existence, } def test_validate_and_standardize_calibration_classes_dataframe_success(self, mock_dependencies): @@ -351,6 +355,52 @@ def test_validate_and_standardize_calibration_classes_dataframe_empty_dataframe( with pytest.raises(ValidationError, match=f"missing required column: '{calibration_class_column_name}'"): validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) + def test_validate_and_standardize_calibration_classes_dataframe_multiple_candidate_index_columns( + self, mock_dependencies + ): + """Test successful validation when multiple candidate index columns are present.""" + mock_db = Mock() + mock_score_set = Mock() + mock_score_set.id = 123 + mock_calibration = Mock() + mock_calibration.class_based = True + + input_df = pd.DataFrame( + { + calibration_variant_column_name: ["var1", "var2"], + hgvs_nt_column: ["NM_000546.5:c.215C>G", "NM_000546.5:c.743G>A"], + calibration_class_column_name: ["A", "B"], + } + ) + standardized_df = pd.DataFrame( + { + calibration_variant_column_name: ["var1", "var2"], + hgvs_nt_column: ["NM_000546.5:c.215C>G", "NM_000546.5:c.743G>A"], + calibration_class_column_name: ["A", "B"], + } + ) + + mock_dependencies["standardize_dataframe"].return_value = standardized_df + mock_dependencies["validate_index_existence_in_score_set"].return_value = None + + mock_scalars = Mock() + mock_scalars.all.return_value = ["var1", "var2"] + mock_db.scalars.return_value = mock_scalars + + mock_classification1 = Mock() + mock_classification1.class_ = "A" + mock_classification2 = Mock() + mock_classification2.class_ = "B" + mock_calibration.functional_classifications = [mock_classification1, mock_classification2] + + result, index_column = validate_and_standardize_calibration_classes_dataframe( + mock_db, mock_score_set, mock_calibration, input_df + ) + + assert result.equals(standardized_df) + assert index_column == calibration_variant_column_name + mock_dependencies["validate_index_existence_in_score_set"].assert_called_once() + class TestValidateCalibrationDfColumnNames: """Test suite for validate_calibration_df_column_names function.""" From bd54cd2b52f6766141e88e6cd69edacb4c9fc432 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 26 Nov 2025 18:29:10 -0800 Subject: [PATCH 20/24] fix: improperly renamed functional_ranges property in alembic downgrade --- .../16beeb593513_add_acmg_classification_and_functional_.py | 1 + 1 file changed, 1 insertion(+) diff --git a/alembic/versions/16beeb593513_add_acmg_classification_and_functional_.py b/alembic/versions/16beeb593513_add_acmg_classification_and_functional_.py index 53e812bb..d77dea85 100644 --- a/alembic/versions/16beeb593513_add_acmg_classification_and_functional_.py +++ b/alembic/versions/16beeb593513_add_acmg_classification_and_functional_.py @@ -139,4 +139,5 @@ def downgrade(): op.drop_table("score_calibration_functional_classification_variants") op.drop_table("score_calibration_functional_classifications") op.drop_table("acmg_classifications") + op.alter_column("score_calibrations", "functional_ranges_deprecated_json", new_column_name="functional_ranges") # ### end Alembic commands ### From 969bf4517d3a706871d8dba98b3f95be500fabde Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 26 Nov 2025 19:22:42 -0800 Subject: [PATCH 21/24] fix: correct index column reference in validation function --- src/mavedb/lib/validation/dataframe/calibration.py | 5 +---- tests/validation/dataframe/test_calibration.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/mavedb/lib/validation/dataframe/calibration.py b/src/mavedb/lib/validation/dataframe/calibration.py index 13909637..1c46be46 100644 --- a/src/mavedb/lib/validation/dataframe/calibration.py +++ b/src/mavedb/lib/validation/dataframe/calibration.py @@ -80,7 +80,7 @@ def validate_and_standardize_calibration_classes_dataframe( validate_data_column(standardized_classes_df[c], force_numeric=False) validate_calibration_classes(calibration, standardized_classes_df[c]) - if c == index_column: + if column_mapping[c] == index_column: validate_index_existence_in_score_set( db, score_set, standardized_classes_df[column_mapping[c]], column_mapping[c] ) @@ -150,9 +150,6 @@ def validate_index_existence_in_score_set( Returns: None: Function returns nothing if validation passes. """ - print(index_column.tolist()) - print(index_column_name) - if index_column_name.lower() == calibration_variant_column_name: existing_resources = set( db.scalars( diff --git a/tests/validation/dataframe/test_calibration.py b/tests/validation/dataframe/test_calibration.py index 6d0a0af0..57c7d22b 100644 --- a/tests/validation/dataframe/test_calibration.py +++ b/tests/validation/dataframe/test_calibration.py @@ -185,7 +185,9 @@ def test_validate_and_standardize_calibration_classes_dataframe_drops_null_class assert result.equals(expected_df) - def test_validate_and_standardize_calibration_classes_dataframe_invalid_variants(self, mock_dependencies): + def test_validate_and_standardize_calibration_classes_dataframe_propagates_nonexistent_variants( + self, mock_dependencies + ): """Test ValidationError when variant URN validation fails.""" mock_db = Mock() mock_score_set = Mock() @@ -204,6 +206,14 @@ def test_validate_and_standardize_calibration_classes_dataframe_invalid_variants mock_scalars.all.return_value = [] mock_db.scalars.return_value = mock_scalars + mock_classification1 = Mock() + mock_classification1.class_ = "A" + mock_calibration.functional_classifications = [mock_classification1] + + mock_dependencies["validate_index_existence_in_score_set"].side_effect = ValidationError( + "The following resources do not exist in the score set: var1" + ) + with pytest.raises(ValidationError, match="The following resources do not exist in the score set: var1"): validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df) From 4962f4fc725a8cdb2495e682d54ddf68286fc411 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 18 Dec 2025 12:32:58 -0800 Subject: [PATCH 22/24] refactor: update calibrated variant effects script for new classification format --- .../scripts/calibrated_variant_effects.py | 87 ++++++------------- 1 file changed, 25 insertions(+), 62 deletions(-) diff --git a/src/mavedb/scripts/calibrated_variant_effects.py b/src/mavedb/scripts/calibrated_variant_effects.py index 652b251c..a8f47088 100644 --- a/src/mavedb/scripts/calibrated_variant_effects.py +++ b/src/mavedb/scripts/calibrated_variant_effects.py @@ -64,33 +64,13 @@ from sqlalchemy import select from sqlalchemy.orm import Session, joinedload +from mavedb.models.score_calibration_functional_classification import ScoreCalibrationFunctionalClassification from mavedb.models.score_set import ScoreSet from mavedb.scripts.environment import with_database_session -from mavedb.view_models.score_calibration import FunctionalRange logger = logging.getLogger(__name__) -def score_falls_within_range(score: float, functional_range: dict) -> bool: - """Check if a score falls within a functional range using the view model.""" - try: - range_obj = FunctionalRange.model_validate(functional_range) - return range_obj.is_contained_by_range(score) - except Exception as e: - logger.warning(f"Error validating functional range: {e}") - return False - - -def has_acmg_classification(functional_range: dict) -> bool: - """Check if a functional range has an ACMG classification.""" - acmg_data = functional_range.get("acmg_classification") - return acmg_data is not None and ( - acmg_data.get("criterion") is not None - or acmg_data.get("evidence_strength") is not None - or acmg_data.get("points") is not None - ) - - @click.command() @with_database_session def main(db: Session) -> None: @@ -106,28 +86,26 @@ def main(db: Session) -> None: score_sets = db.scalars(query).unique().all() - total_variants = 0 - classified_variants = 0 - score_sets_with_acmg = 0 - processed_variants: Set[int] = set() + total_variants_count = 0 + classified_variants_count = 0 + score_sets_with_acmg_count = 0 gene_list: Set[str] = set() click.echo(f"Found {len(score_sets)} non-superseded score sets with calibrations") for score_set in score_sets: # Collect all ACMG-classified ranges from this score set's calibrations - acmg_ranges = [] + acmg_ranges: list[ScoreCalibrationFunctionalClassification] = [] for calibration in score_set.score_calibrations: - if calibration.functional_ranges: - for func_range in calibration.functional_ranges: - if has_acmg_classification(func_range): - acmg_ranges.append(func_range) + if calibration.functional_classifications: + for func_classification in calibration.functional_classifications: + if func_classification.acmg_classification_id is not None: + acmg_ranges.append(func_classification) if not acmg_ranges: continue - score_sets_with_acmg += 1 - score_set_classified_variants = 0 + score_sets_with_acmg_count += 1 # Retain a list of unique target genes for reporting for target in score_set.target_genes: @@ -137,47 +115,32 @@ def main(db: Session) -> None: gene_list.add(target_name.strip().upper()) - for variant in score_set.variants: - if variant.id in processed_variants: - continue - - variant_data = variant.data - if not variant_data: - continue - - score_data = variant_data.get("score_data", {}) - score = score_data.get("score") - - total_variants += 1 - processed_variants.add(variant.id) # type: ignore - - if score is None: - continue - - # Check if score falls within any ACMG-classified range in this score set - for func_range in acmg_ranges: - if score_falls_within_range(float(score), func_range): - classified_variants += 1 - score_set_classified_variants += 1 - break # Count variant only once per score set + score_set_classified_variants: set[int] = set() + for classified_range in acmg_ranges: + variants_classified_by_range: list[int] = [ + variant.id for variant in classified_range.variants if variant.id is not None + ] + score_set_classified_variants.update(variants_classified_by_range) - if score_set_classified_variants > 0: + total_variants_count += score_set.num_variants or 0 + classified_variants_count += len(score_set_classified_variants) + if score_set_classified_variants: click.echo( - f"Score set {score_set.urn}: {score_set_classified_variants} classified variants ({score_set.num_variants} total variants)" + f"Score set {score_set.urn}: {len(score_set_classified_variants)} classified variants ({score_set.num_variants} total variants)" ) click.echo("\n" + "=" * 60) click.echo("SUMMARY") click.echo("=" * 60) - click.echo(f"Score sets with ACMG classifications: {score_sets_with_acmg}") - click.echo(f"Total unique variants processed: {total_variants}") - click.echo(f"Variants within ACMG-classified ranges: {classified_variants}") + click.echo(f"Score sets with ACMG classifications: {score_sets_with_acmg_count}") + click.echo(f"Total unique variants processed: {total_variants_count}") + click.echo(f"Variants within ACMG-classified ranges: {classified_variants_count}") click.echo(f"Unique target genes covered ({len(gene_list)}):") for gene in sorted(gene_list): click.echo(f" - {gene}") - if total_variants > 0: - percentage = (classified_variants / total_variants) * 100 + if total_variants_count > 0: + percentage = (classified_variants_count / total_variants_count) * 100 click.echo(f"Classification rate: {percentage:.1f}%") From 8ee4c1a985ab1319fff3d8272cc206195765c205 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 30 Dec 2025 10:10:14 -0800 Subject: [PATCH 23/24] fix: use functional classification enum in place of old style strings --- src/mavedb/scripts/load_calibration_csv.py | 17 ++++++------ .../scripts/load_pp_style_calibration.py | 5 +++- tests/helpers/constants.py | 17 ++++++------ tests/view_models/test_score_calibration.py | 27 +++++++++++++------ 4 files changed, 40 insertions(+), 26 deletions(-) diff --git a/src/mavedb/scripts/load_calibration_csv.py b/src/mavedb/scripts/load_calibration_csv.py index 95da46fe..904f51e9 100644 --- a/src/mavedb/scripts/load_calibration_csv.py +++ b/src/mavedb/scripts/load_calibration_csv.py @@ -92,7 +92,7 @@ import csv import re from pathlib import Path -from typing import Any, Dict, List, Literal, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple import click from sqlalchemy.orm import Session @@ -101,6 +101,7 @@ from mavedb.lib.oddspaths import oddspaths_evidence_strength_equivalent from mavedb.lib.score_calibrations import create_score_calibration_in_score_set from mavedb.models import score_calibration +from mavedb.models.enums.functional_classification import FunctionalClassification as FunctionalClassifcationOptions from mavedb.models.score_set import ScoreSet from mavedb.models.user import User from mavedb.scripts.environment import with_database_session @@ -152,23 +153,21 @@ def parse_interval(text: str) -> Tuple[Optional[float], Optional[float], bool, b return lower, upper, inclusive_lower, inclusive_upper -def normalize_classification( - raw: Optional[str], strength: Optional[str] -) -> Literal["normal", "abnormal", "not_specified"]: +def normalize_classification(raw: Optional[str], strength: Optional[str]) -> FunctionalClassifcationOptions: if raw: r = raw.strip().lower() if r in {"normal", "abnormal", "not_specified"}: - return r # type: ignore[return-value] + return FunctionalClassifcationOptions[r] if r in {"indeterminate", "uncertain", "unknown"}: - return "not_specified" + return FunctionalClassifcationOptions.not_specified if strength: if strength.upper().startswith("PS"): - return "abnormal" + return FunctionalClassifcationOptions.abnormal if strength.upper().startswith("BS"): - return "normal" + return FunctionalClassifcationOptions.normal - return "not_specified" + return FunctionalClassifcationOptions.not_specified def build_publications( diff --git a/src/mavedb/scripts/load_pp_style_calibration.py b/src/mavedb/scripts/load_pp_style_calibration.py index 99862d6d..2592792f 100644 --- a/src/mavedb/scripts/load_pp_style_calibration.py +++ b/src/mavedb/scripts/load_pp_style_calibration.py @@ -84,6 +84,7 @@ from sqlalchemy.orm import Session from mavedb.lib.score_calibrations import create_score_calibration_in_score_set +from mavedb.models.enums.functional_classification import FunctionalClassification as FunctionalClassifcationOptions from mavedb.models.score_calibration import ScoreCalibration from mavedb.models.score_set import ScoreSet from mavedb.models.user import User @@ -214,7 +215,9 @@ def main(db: Session, archive_path: str, dataset_map: str, overwrite: bool) -> N functional_range = score_calibration.FunctionalClassificationCreate( label=f"{ps_or_bs} {strength_label} ({points})", - classification="abnormal" if points > 0 else "normal", + classification=FunctionalClassifcationOptions.abnormal + if points > 0 + else FunctionalClassifcationOptions.normal, range=range_data, acmg_classification=acmg_classification.ACMGClassificationCreate( points=int(points), diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index c7b17b1d..48eb9ec3 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -2,6 +2,7 @@ from humps import camelize +from mavedb.models.enums.functional_classification import FunctionalClassification as FunctionalClassificationOptions from mavedb.models.enums.processing_state import ProcessingState VALID_EXPERIMENT_SET_URN = "urn:mavedb:01234567" @@ -1411,7 +1412,7 @@ TEST_FUNCTIONAL_RANGE_NORMAL = { "label": "test normal functional range", "description": "A normal functional range", - "functional_classification": "normal", + "functional_classification": FunctionalClassificationOptions.normal.value, "range": [1.0, 5.0], "acmg_classification": TEST_ACMG_BS3_STRONG_CLASSIFICATION, "oddspaths_ratio": TEST_BS3_STRONG_ODDS_PATH_RATIO, @@ -1431,7 +1432,7 @@ TEST_FUNCTIONAL_RANGE_ABNORMAL = { "label": "test abnormal functional range", "description": "An abnormal functional range", - "functional_classification": "abnormal", + "functional_classification": FunctionalClassificationOptions.abnormal.value, "range": [-5.0, -1.0], "acmg_classification": TEST_ACMG_PS3_STRONG_CLASSIFICATION, "oddspaths_ratio": TEST_PS3_STRONG_ODDS_PATH_RATIO, @@ -1450,7 +1451,7 @@ TEST_FUNCTIONAL_RANGE_NOT_SPECIFIED = { "label": "test not specified functional range", - "functional_classification": "not_specified", + "functional_classification": FunctionalClassificationOptions.not_specified.value, "range": [-1.0, 1.0], "inclusive_lower_bound": True, "inclusive_upper_bound": False, @@ -1467,7 +1468,7 @@ TEST_FUNCTIONAL_CLASSIFICATION_NORMAL = { "label": "test normal functional class", "description": "A normal functional class", - "functional_classification": "normal", + "functional_classification": FunctionalClassificationOptions.normal.value, "class": "normal_class", "acmg_classification": TEST_ACMG_BS3_STRONG_CLASSIFICATION, "oddspaths_ratio": TEST_BS3_STRONG_ODDS_PATH_RATIO, @@ -1485,7 +1486,7 @@ TEST_FUNCTIONAL_CLASSIFICATION_ABNORMAL = { "label": "test abnormal functional class", "description": "An abnormal functional class", - "functional_classification": "abnormal", + "functional_classification": FunctionalClassificationOptions.abnormal.value, "class": "abnormal_class", "acmg_classification": TEST_ACMG_PS3_STRONG_CLASSIFICATION, "oddspaths_ratio": TEST_PS3_STRONG_ODDS_PATH_RATIO, @@ -1502,7 +1503,7 @@ TEST_FUNCTIONAL_CLASSIFICATION_NOT_SPECIFIED = { "label": "test not specified functional class", - "functional_classification": "not_specified", + "functional_classification": FunctionalClassificationOptions.not_specified.value, "class": "not_specified_class", } @@ -1517,7 +1518,7 @@ TEST_FUNCTIONAL_RANGE_INCLUDING_NEGATIVE_INFINITY = { "label": "test functional range including negative infinity", "description": "A functional range including negative infinity", - "functional_classification": "not_specified", + "functional_classification": FunctionalClassificationOptions.not_specified.value, "range": [None, 0.0], "inclusive_lower_bound": False, "inclusive_upper_bound": False, @@ -1533,7 +1534,7 @@ TEST_FUNCTIONAL_RANGE_INCLUDING_POSITIVE_INFINITY = { "label": "test functional range including positive infinity", "description": "A functional range including positive infinity", - "functional_classification": "not_specified", + "functional_classification": FunctionalClassificationOptions.not_specified.value, "range": [0.0, None], "inclusive_lower_bound": False, "inclusive_upper_bound": False, diff --git a/tests/view_models/test_score_calibration.py b/tests/view_models/test_score_calibration.py index 1c600b26..55f1185c 100644 --- a/tests/view_models/test_score_calibration.py +++ b/tests/view_models/test_score_calibration.py @@ -4,6 +4,7 @@ from pydantic import ValidationError from mavedb.lib.acmg import ACMGCriterion +from mavedb.models.enums.functional_classification import FunctionalClassification as FunctionalClassificationOptions from mavedb.models.enums.score_calibration_relation import ScoreCalibrationRelation from mavedb.view_models.score_calibration import ( FunctionalClassificationCreate, @@ -182,7 +183,7 @@ def test_is_contained_by_range(): fr = FunctionalClassificationCreate.model_validate( { "label": "test range", - "functional_classification": "abnormal", + "functional_classification": FunctionalClassificationOptions.abnormal, "range": (0.0, 1.0), "inclusive_lower_bound": True, "inclusive_upper_bound": True, @@ -205,7 +206,7 @@ def test_inclusive_bounds_get_default_when_unset_and_range_exists(): fr = FunctionalClassificationCreate.model_validate( { "label": "test range", - "functional_classification": "abnormal", + "functional_classification": FunctionalClassificationOptions.abnormal, "range": (0.0, 1.0), } ) @@ -218,7 +219,7 @@ def test_inclusive_bounds_remain_none_when_range_is_none(): fr = FunctionalClassificationCreate.model_validate( { "label": "test range", - "functional_classification": "abnormal", + "functional_classification": FunctionalClassificationOptions.abnormal, "class": "some_class", } ) @@ -245,7 +246,7 @@ def test_inclusive_bounds_remain_none_when_range_is_none(): def test_cant_set_inclusive_bounds_when_range_is_none(bound_property, bound_value, match_text): invalid_data = { "label": "test range", - "functional_classification": "abnormal", + "functional_classification": FunctionalClassificationOptions.abnormal, "class": "some_class", bound_property: bound_value, } @@ -373,7 +374,9 @@ def test_can_create_score_calibration_when_unclassified_ranges_overlap_with_clas # Make the first two ranges overlap, one being 'not_specified' valid_data["functional_classifications"][0]["range"] = [1.5, 3.0] valid_data["functional_classifications"][1]["range"] = [2.0, 4.0] - valid_data["functional_classifications"][0]["functional_classification"] = "not_specified" + valid_data["functional_classifications"][0]["functional_classification"] = ( + FunctionalClassificationOptions.not_specified + ) sc = ScoreCalibrationCreate.model_validate(valid_data) assert len(sc.functional_classifications) == len(valid_data["functional_classifications"]) @@ -383,8 +386,12 @@ def test_can_create_score_calibration_when_unclassified_ranges_overlap_with_each # Make the first two ranges overlap, both being 'not_specified' valid_data["functional_classifications"][0]["range"] = [1.5, 3.0] valid_data["functional_classifications"][1]["range"] = [2.0, 4.0] - valid_data["functional_classifications"][0]["functional_classification"] = "not_specified" - valid_data["functional_classifications"][1]["functional_classification"] = "not_specified" + valid_data["functional_classifications"][0]["functional_classification"] = ( + FunctionalClassificationOptions.not_specified + ) + valid_data["functional_classifications"][1]["functional_classification"] = ( + FunctionalClassificationOptions.not_specified + ) sc = ScoreCalibrationCreate.model_validate(valid_data) assert len(sc.functional_classifications) == len(valid_data["functional_classifications"]) @@ -616,7 +623,11 @@ def test_cannot_create_score_calibration_with_mixed_range_and_class_based_functi invalid_data = deepcopy(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) # Add a class-based functional classification to a range-based calibration invalid_data["functional_classifications"].append( - {"label": "class based classification", "functional_classification": "abnormal", "class": "some_class"} + { + "label": "class based classification", + "functional_classification": FunctionalClassificationOptions.abnormal, + "class": "some_class", + } ) with pytest.raises( From 2b3249b537cad4e11805950ed29616925bd24726 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 14 Jan 2026 08:56:22 -0800 Subject: [PATCH 24/24] fix: remove duplicate score calibration urn idx in migration --- .../16beeb593513_add_acmg_classification_and_functional_.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/alembic/versions/16beeb593513_add_acmg_classification_and_functional_.py b/alembic/versions/16beeb593513_add_acmg_classification_and_functional_.py index d77dea85..41e86383 100644 --- a/alembic/versions/16beeb593513_add_acmg_classification_and_functional_.py +++ b/alembic/versions/16beeb593513_add_acmg_classification_and_functional_.py @@ -123,7 +123,6 @@ def upgrade(): sa.PrimaryKeyConstraint("functional_classification_id", "variant_id"), ) op.alter_column("score_calibrations", "functional_ranges", new_column_name="functional_ranges_deprecated_json") - op.create_index(op.f("ix_score_calibrations_created_by_id"), "score_calibrations", ["created_by_id"], unique=False) op.create_index( op.f("ix_score_calibrations_modified_by_id"), "score_calibrations", ["modified_by_id"], unique=False ) @@ -133,7 +132,6 @@ def upgrade(): def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.drop_index(op.f("ix_score_calibrations_urn"), table_name="score_calibrations") op.drop_index(op.f("ix_score_calibrations_modified_by_id"), table_name="score_calibrations") op.drop_index(op.f("ix_score_calibrations_created_by_id"), table_name="score_calibrations") op.drop_table("score_calibration_functional_classification_variants")