Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 21 additions & 10 deletions src/mavedb/lib/clingen/services.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
import hashlib
import logging
import requests
import os
import time
from datetime import datetime
from typing import Optional
from typing import Optional, Union
from urllib import parse


import requests
from jose import jwt

from mavedb.lib.logging.context import logging_context, save_to_logging_context, format_raised_exception_info_as_dict
from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD, LDH_MAVE_ACCESS_ENDPOINT

from mavedb.lib.types.clingen import LdhSubmission, ClinGenAllele
from mavedb.lib.logging.context import format_raised_exception_info_as_dict, logging_context, save_to_logging_context
from mavedb.lib.types.clingen import ClinGenAllele, ClinGenSubmissionError, LdhSubmission
from mavedb.lib.utils import batched

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -71,7 +69,9 @@ def construct_auth_url(self, url: str) -> str:
token = hashlib.sha1((url + identity + gbTime).encode("utf-8")).hexdigest()
return url + "&gbLogin=" + GENBOREE_ACCOUNT_NAME + "&gbTime=" + gbTime + "&gbToken=" + token

def dispatch_submissions(self, content_submissions: list[str]) -> list[ClinGenAllele]:
def dispatch_submissions(
self, content_submissions: list[str]
) -> list[Union[ClinGenAllele, ClinGenSubmissionError]]:
save_to_logging_context({"car_submission_count": len(content_submissions)})

try:
Expand All @@ -89,7 +89,7 @@ def dispatch_submissions(self, content_submissions: list[str]) -> list[ClinGenAl
logger.error(msg="Failed to dispatch CAR submission.", exc_info=exc, extra=logging_context())
return []

response_data: list[ClinGenAllele] = response.json()
response_data: list[Union[ClinGenAllele, ClinGenSubmissionError]] = response.json()
save_to_logging_context({"car_submission_response_count": len(response_data)})
logger.info(msg="Successfully dispatched CAR submission.", extra=logging_context())

Expand Down Expand Up @@ -324,7 +324,7 @@ def clingen_allele_id_from_ldh_variation(variation: Optional[dict]) -> Optional[


def get_allele_registry_associations(
content_submissions: list[str], submission_response: list[ClinGenAllele]
content_submissions: list[str], submission_response: list[Union[ClinGenAllele, ClinGenSubmissionError]]
) -> dict[str, str]:
"""
Links HGVS strings and ClinGen Canonoical Allele IDs (CAIDs) given a list of both.
Expand Down Expand Up @@ -360,9 +360,20 @@ def get_allele_registry_associations(

allele_registry_associations: dict[str, str] = {}
for registration in submission_response:
if "errorType" in registration:
logger.warning(
msg=f"Skipping errored ClinGen Allele Registry HGVS {registration.get('hgvs', 'unknown')} ({registration.get('errorType', 'unknown')}): {registration.get('message', 'unknown error message')}",
extra=logging_context(),
)
continue

# Extract the CAID from the URL (e.g., "http://reg.test.genome.network/allele/CA2513066" -> "CA2513066")
caid = registration["@id"].split("/")[-1]
alleles = registration.get("genomicAlleles", []) + registration.get("transcriptAlleles", [])
alleles = (
registration.get("genomicAlleles", [])
+ registration.get("transcriptAlleles", [])
+ registration.get("aminoAcidAlleles", [])
)

for allele in alleles:
for hgvs_string in content_submissions:
Expand Down
16 changes: 14 additions & 2 deletions src/mavedb/lib/types/clingen.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Any, Optional, TypedDict, Literal
from typing_extensions import NotRequired
from typing import Any, Literal, Optional, TypedDict

from typing_extensions import NotRequired

# See: https://ldh.genome.network/docs/ldh/submit.html#content-submission-body

Expand Down Expand Up @@ -152,3 +152,15 @@ class ClinGenAlleleDefinition(TypedDict):
"aminoAcidAlleles": NotRequired[list[ClinGenAlleleDefinition]],
},
)

ClinGenSubmissionError = TypedDict(
"ClinGenSubmissionError",
{
"description": str,
"errorType": str,
"hgvs": str,
"inputLine": str,
"message": str,
"position": str,
},
)
11 changes: 6 additions & 5 deletions src/mavedb/scripts/clingen_car_submission.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
import click
import logging
from typing import Sequence

import click
from sqlalchemy import select
from sqlalchemy.orm import Session

from mavedb.lib.clingen.constants import CAR_SUBMISSION_ENDPOINT
from mavedb.lib.clingen.services import ClinGenAlleleRegistryService, get_allele_registry_associations
from mavedb.lib.variants import get_hgvs_from_post_mapped
from mavedb.models.mapped_variant import MappedVariant
from mavedb.models.score_set import ScoreSet
from mavedb.models.variant import Variant
from mavedb.models.mapped_variant import MappedVariant
from mavedb.scripts.environment import with_database_session
from mavedb.lib.clingen.services import ClinGenAlleleRegistryService, get_allele_registry_associations
from mavedb.lib.clingen.constants import CAR_SUBMISSION_ENDPOINT
from mavedb.lib.variants import get_hgvs_from_post_mapped

logger = logging.getLogger(__name__)

Expand Down
48 changes: 39 additions & 9 deletions tests/lib/clingen/test_services.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
# ruff: noqa: E402

import os
import pytest
import requests
from datetime import datetime
from unittest.mock import patch, MagicMock
from unittest.mock import MagicMock, patch
from urllib import parse

import pytest
import requests

arq = pytest.importorskip("arq")
cdot = pytest.importorskip("cdot")
fastapi = pytest.importorskip("fastapi")

from mavedb.lib.clingen.constants import LDH_MAVE_ACCESS_ENDPOINT, GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD
from mavedb.lib.utils import batched
from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD, LDH_MAVE_ACCESS_ENDPOINT
from mavedb.lib.clingen.services import (
ClinGenAlleleRegistryService,
ClinGenLdhService,
get_clingen_variation,
clingen_allele_id_from_ldh_variation,
get_allele_registry_associations,
get_clingen_variation,
)

from mavedb.lib.utils import batched
from tests.helpers.constants import VALID_CLINGEN_CA_ID

TEST_CLINGEN_URL = "https://pytest.clingen.com"
Expand Down Expand Up @@ -332,7 +332,7 @@ def test_dispatch_submissions_failure(self, mock_auth_url, mock_put, car_service


def test_get_allele_registry_associations_success():
content_submissions = ["NM_0001:c.1A>G", "NM_0002:c.2T>C"]
content_submissions = ["NM_0001:c.1A>G", "NM_0002:c.2T>C", "NM_0003:c.3G>A"]
submission_response = [
{
"@id": "http://reg.test.genome.network/allele/CA123",
Expand All @@ -344,9 +344,15 @@ def test_get_allele_registry_associations_success():
"genomicAlleles": [],
"transcriptAlleles": [{"hgvs": "NM_0002:c.2T>C"}],
},
{
"@id": "http://reg.test.genome.network/allele/CA789",
"genomicAlleles": [],
"transcriptAlleles": [],
"aminoAcidAlleles": [{"hgvs": "NM_0003:c.3G>A"}],
},
]
result = get_allele_registry_associations(content_submissions, submission_response)
assert result == {"NM_0001:c.1A>G": "CA123", "NM_0002:c.2T>C": "CA456"}
assert result == {"NM_0001:c.1A>G": "CA123", "NM_0002:c.2T>C": "CA456", "NM_0003:c.3G>A": "CA789"}


def test_get_allele_registry_associations_empty():
Expand All @@ -365,3 +371,27 @@ def test_get_allele_registry_associations_no_match():
]
result = get_allele_registry_associations(content_submissions, submission_response)
assert result == {}


def test_get_allele_registry_associations_mixed():
content_submissions = ["NM_0001:c.1A>G", "NM_0002:c.2T>C", "NM_0003:c.3G>A"]
submission_response = [
{
"@id": "http://reg.test.genome.network/allele/CA123",
"genomicAlleles": [{"hgvs": "NM_0001:c.1A>G"}],
"transcriptAlleles": [],
},
{
"errorType": "InvalidHGVS",
"hgvs": "NM_0002:c.2T>C",
"message": "The HGVS string is invalid.",
},
{
"@id": "http://reg.test.genome.network/allele/CA789",
"genomicAlleles": [],
"transcriptAlleles": [{"hgvs": "NM_0003:c.3G>A"}],
},
]

result = get_allele_registry_associations(content_submissions, submission_response)
assert result == {"NM_0001:c.1A>G": "CA123", "NM_0003:c.3G>A": "CA789"}