From ccd93b333e770887a6861feccce2869f728ee4e0 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 31 Dec 2025 10:47:38 -0800 Subject: [PATCH 1/3] fix: enhance error handling in get_allele_registry_associations function --- src/mavedb/lib/clingen/services.py | 25 +++++++++++++------- src/mavedb/lib/types/clingen.py | 16 +++++++++++-- tests/lib/clingen/test_services.py | 38 ++++++++++++++++++++++++------ 3 files changed, 61 insertions(+), 18 deletions(-) diff --git a/src/mavedb/lib/clingen/services.py b/src/mavedb/lib/clingen/services.py index 1bcb7778..e4540709 100644 --- a/src/mavedb/lib/clingen/services.py +++ b/src/mavedb/lib/clingen/services.py @@ -1,19 +1,17 @@ import hashlib import logging -import requests import os import time from datetime import datetime -from typing import Optional +from typing import Optional, Union from urllib import parse - +import requests from jose import jwt -from mavedb.lib.logging.context import logging_context, save_to_logging_context, format_raised_exception_info_as_dict from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD, LDH_MAVE_ACCESS_ENDPOINT - -from mavedb.lib.types.clingen import LdhSubmission, ClinGenAllele +from mavedb.lib.logging.context import format_raised_exception_info_as_dict, logging_context, save_to_logging_context +from mavedb.lib.types.clingen import ClinGenAllele, ClinGenSubmissionError, LdhSubmission from mavedb.lib.utils import batched logger = logging.getLogger(__name__) @@ -71,7 +69,9 @@ def construct_auth_url(self, url: str) -> str: token = hashlib.sha1((url + identity + gbTime).encode("utf-8")).hexdigest() return url + "&gbLogin=" + GENBOREE_ACCOUNT_NAME + "&gbTime=" + gbTime + "&gbToken=" + token - def dispatch_submissions(self, content_submissions: list[str]) -> list[ClinGenAllele]: + def dispatch_submissions( + self, content_submissions: list[str] + ) -> list[Union[ClinGenAllele, ClinGenSubmissionError]]: save_to_logging_context({"car_submission_count": len(content_submissions)}) try: @@ -89,7 +89,7 @@ def dispatch_submissions(self, content_submissions: list[str]) -> list[ClinGenAl logger.error(msg="Failed to dispatch CAR submission.", exc_info=exc, extra=logging_context()) return [] - response_data: list[ClinGenAllele] = response.json() + response_data: list[Union[ClinGenAllele, ClinGenSubmissionError]] = response.json() save_to_logging_context({"car_submission_response_count": len(response_data)}) logger.info(msg="Successfully dispatched CAR submission.", extra=logging_context()) @@ -324,7 +324,7 @@ def clingen_allele_id_from_ldh_variation(variation: Optional[dict]) -> Optional[ def get_allele_registry_associations( - content_submissions: list[str], submission_response: list[ClinGenAllele] + content_submissions: list[str], submission_response: list[Union[ClinGenAllele, ClinGenSubmissionError]] ) -> dict[str, str]: """ Links HGVS strings and ClinGen Canonoical Allele IDs (CAIDs) given a list of both. @@ -360,6 +360,13 @@ def get_allele_registry_associations( allele_registry_associations: dict[str, str] = {} for registration in submission_response: + if "errorType" in registration: + logger.warning( + msg=f"Skipping errored ClinGen Allele Registry HGVS {registration.get('hgvs', 'unknown')} ({registration.get('errorType', 'unknown')}): {registration.get('message', 'unknown error message')}", + extra=logging_context(), + ) + continue + # Extract the CAID from the URL (e.g., "http://reg.test.genome.network/allele/CA2513066" -> "CA2513066") caid = registration["@id"].split("/")[-1] alleles = registration.get("genomicAlleles", []) + registration.get("transcriptAlleles", []) diff --git a/src/mavedb/lib/types/clingen.py b/src/mavedb/lib/types/clingen.py index 9085a9da..708b6c17 100644 --- a/src/mavedb/lib/types/clingen.py +++ b/src/mavedb/lib/types/clingen.py @@ -1,6 +1,6 @@ -from typing import Any, Optional, TypedDict, Literal -from typing_extensions import NotRequired +from typing import Any, Literal, Optional, TypedDict +from typing_extensions import NotRequired # See: https://ldh.genome.network/docs/ldh/submit.html#content-submission-body @@ -152,3 +152,15 @@ class ClinGenAlleleDefinition(TypedDict): "aminoAcidAlleles": NotRequired[list[ClinGenAlleleDefinition]], }, ) + +ClinGenSubmissionError = TypedDict( + "ClinGenSubmissionError", + { + "description": str, + "errorType": str, + "hgvs": str, + "inputLine": str, + "message": str, + "position": str, + }, +) diff --git a/tests/lib/clingen/test_services.py b/tests/lib/clingen/test_services.py index 34828649..bafd4f7f 100644 --- a/tests/lib/clingen/test_services.py +++ b/tests/lib/clingen/test_services.py @@ -1,26 +1,26 @@ # ruff: noqa: E402 import os -import pytest -import requests from datetime import datetime -from unittest.mock import patch, MagicMock +from unittest.mock import MagicMock, patch from urllib import parse +import pytest +import requests + arq = pytest.importorskip("arq") cdot = pytest.importorskip("cdot") fastapi = pytest.importorskip("fastapi") -from mavedb.lib.clingen.constants import LDH_MAVE_ACCESS_ENDPOINT, GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD -from mavedb.lib.utils import batched +from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD, LDH_MAVE_ACCESS_ENDPOINT from mavedb.lib.clingen.services import ( ClinGenAlleleRegistryService, ClinGenLdhService, - get_clingen_variation, clingen_allele_id_from_ldh_variation, get_allele_registry_associations, + get_clingen_variation, ) - +from mavedb.lib.utils import batched from tests.helpers.constants import VALID_CLINGEN_CA_ID TEST_CLINGEN_URL = "https://pytest.clingen.com" @@ -365,3 +365,27 @@ def test_get_allele_registry_associations_no_match(): ] result = get_allele_registry_associations(content_submissions, submission_response) assert result == {} + + +def test_get_allele_registry_associations_mixed(): + content_submissions = ["NM_0001:c.1A>G", "NM_0002:c.2T>C", "NM_0003:c.3G>A"] + submission_response = [ + { + "@id": "http://reg.test.genome.network/allele/CA123", + "genomicAlleles": [{"hgvs": "NM_0001:c.1A>G"}], + "transcriptAlleles": [], + }, + { + "errorType": "InvalidHGVS", + "hgvs": "NM_0002:c.2T>C", + "message": "The HGVS string is invalid.", + }, + { + "@id": "http://reg.test.genome.network/allele/CA789", + "genomicAlleles": [], + "transcriptAlleles": [{"hgvs": "NM_0003:c.3G>A"}], + }, + ] + + result = get_allele_registry_associations(content_submissions, submission_response) + assert result == {"NM_0001:c.1A>G": "CA123", "NM_0003:c.3G>A": "CA789"} From bea83d30c7ce56b30c88a025e0176a8c75af34eb Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 31 Dec 2025 10:49:30 -0800 Subject: [PATCH 2/3] fix: include amino acid alleles in get_allele_registry_associations function --- src/mavedb/lib/clingen/services.py | 6 +++++- tests/lib/clingen/test_services.py | 10 ++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/mavedb/lib/clingen/services.py b/src/mavedb/lib/clingen/services.py index e4540709..0450d61d 100644 --- a/src/mavedb/lib/clingen/services.py +++ b/src/mavedb/lib/clingen/services.py @@ -369,7 +369,11 @@ def get_allele_registry_associations( # Extract the CAID from the URL (e.g., "http://reg.test.genome.network/allele/CA2513066" -> "CA2513066") caid = registration["@id"].split("/")[-1] - alleles = registration.get("genomicAlleles", []) + registration.get("transcriptAlleles", []) + alleles = ( + registration.get("genomicAlleles", []) + + registration.get("transcriptAlleles", []) + + registration.get("aminoAcidAlleles", []) + ) for allele in alleles: for hgvs_string in content_submissions: diff --git a/tests/lib/clingen/test_services.py b/tests/lib/clingen/test_services.py index bafd4f7f..481c16d8 100644 --- a/tests/lib/clingen/test_services.py +++ b/tests/lib/clingen/test_services.py @@ -332,7 +332,7 @@ def test_dispatch_submissions_failure(self, mock_auth_url, mock_put, car_service def test_get_allele_registry_associations_success(): - content_submissions = ["NM_0001:c.1A>G", "NM_0002:c.2T>C"] + content_submissions = ["NM_0001:c.1A>G", "NM_0002:c.2T>C", "NM_0003:c.3G>A"] submission_response = [ { "@id": "http://reg.test.genome.network/allele/CA123", @@ -344,9 +344,15 @@ def test_get_allele_registry_associations_success(): "genomicAlleles": [], "transcriptAlleles": [{"hgvs": "NM_0002:c.2T>C"}], }, + { + "@id": "http://reg.test.genome.network/allele/CA789", + "genomicAlleles": [], + "transcriptAlleles": [], + "aminoAcidAlleles": [{"hgvs": "NM_0003:c.3G>A"}], + }, ] result = get_allele_registry_associations(content_submissions, submission_response) - assert result == {"NM_0001:c.1A>G": "CA123", "NM_0002:c.2T>C": "CA456"} + assert result == {"NM_0001:c.1A>G": "CA123", "NM_0002:c.2T>C": "CA456", "NM_0003:c.3G>A": "CA789"} def test_get_allele_registry_associations_empty(): From 1aae4966c3eb304cc740738ca2a176328d300ebb Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 31 Dec 2025 12:23:57 -0800 Subject: [PATCH 3/3] refactor: reorder import statements --- src/mavedb/scripts/clingen_car_submission.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/mavedb/scripts/clingen_car_submission.py b/src/mavedb/scripts/clingen_car_submission.py index 29ea5fd8..0c0e7bc4 100644 --- a/src/mavedb/scripts/clingen_car_submission.py +++ b/src/mavedb/scripts/clingen_car_submission.py @@ -1,16 +1,17 @@ -import click import logging from typing import Sequence + +import click from sqlalchemy import select from sqlalchemy.orm import Session +from mavedb.lib.clingen.constants import CAR_SUBMISSION_ENDPOINT +from mavedb.lib.clingen.services import ClinGenAlleleRegistryService, get_allele_registry_associations +from mavedb.lib.variants import get_hgvs_from_post_mapped +from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant -from mavedb.models.mapped_variant import MappedVariant from mavedb.scripts.environment import with_database_session -from mavedb.lib.clingen.services import ClinGenAlleleRegistryService, get_allele_registry_associations -from mavedb.lib.clingen.constants import CAR_SUBMISSION_ENDPOINT -from mavedb.lib.variants import get_hgvs_from_post_mapped logger = logging.getLogger(__name__)