Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions settings/.env.dev
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,9 @@ MAVEDB_API_KEY=
####################################################################################################

SEQREPO_ROOT_DIR=/usr/local/share/seqrepo/2024-12-20

####################################################################################################
# Environment variables for ensembl
####################################################################################################

ENSEMBL_API_URL=https://rest.ensembl.org
37 changes: 27 additions & 10 deletions src/api/routers/map.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
_get_mapped_reference_sequence,
_set_scoreset_layer,
annotate,
compute_target_gene_info,
)
from dcd_mapping.exceptions import (
AlignmentError,
Expand All @@ -34,6 +35,7 @@
from dcd_mapping.schemas import (
ScoreAnnotation,
ScoresetMapping,
TargetAnnotation,
TargetType,
TxSelectResult,
VrsVersion,
Expand Down Expand Up @@ -196,29 +198,41 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> JSONResponse

try:
raw_metadata = get_raw_scoreset_metadata(urn, store_path)
reference_sequences: dict[str, dict] = {}
reference_sequences: dict[str, TargetAnnotation] = {}
mapped_scores: list[ScoreAnnotation] = []
for target_gene in annotated_vrs_results:
preferred_layers = {
_set_scoreset_layer(urn, annotated_vrs_results[target_gene]),
}
target_gene_name = metadata.target_genes[target_gene].target_gene_name
reference_sequences[target_gene_name] = {
reference_sequences[target_gene_name] = TargetAnnotation()
reference_sequences[target_gene_name].layers = {
layer: {
"computed_reference_sequence": None,
"mapped_reference_sequence": None,
}
for layer in preferred_layers
}

# sometimes Nonetype layers show up in preferred layers dict; remove these
preferred_layers.discard(None)

# Determine one gene symbol per target and its selection method
gene_info = await compute_target_gene_info(
target_key=target_gene,
transcripts=transcripts,
alignment_results=alignment_results,
metadata=metadata,
mapped_scores=annotated_vrs_results[target_gene],
)

for layer in preferred_layers:
reference_sequences[target_gene_name][layer][
reference_sequences[target_gene_name].layers[layer][
"computed_reference_sequence"
] = _get_computed_reference_sequence(
metadata.target_genes[target_gene], layer, transcripts[target_gene]
)
reference_sequences[target_gene_name][layer][
reference_sequences[target_gene_name].layers[layer][
"mapped_reference_sequence"
] = _get_mapped_reference_sequence(
metadata.target_genes[target_gene],
Expand All @@ -227,6 +241,9 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> JSONResponse
alignment_results[target_gene],
)

if gene_info is not None:
reference_sequences[target_gene_name].gene_info = gene_info

for m in annotated_vrs_results[target_gene]:
if m.pre_mapped is None:
mapped_scores.append(ScoreAnnotation(**m.model_dump()))
Expand All @@ -236,15 +253,15 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> JSONResponse

# if genomic layer, not accession-based, and target gene type is coding, add cdna entry (just the sequence accession) to reference_sequences dict
if (
AnnotationLayer.GENOMIC in reference_sequences[target_gene_name]
AnnotationLayer.GENOMIC in reference_sequences[target_gene_name].layers
and metadata.target_genes[target_gene].target_gene_category
== TargetType.PROTEIN_CODING
and metadata.target_genes[target_gene].target_accession_id is None
and transcripts[target_gene] is not None
and isinstance(transcripts[target_gene], TxSelectResult)
and transcripts[target_gene].nm is not None
):
reference_sequences[target_gene_name][AnnotationLayer.CDNA] = {
reference_sequences[target_gene_name].layers[AnnotationLayer.CDNA] = {
"computed_reference_sequence": None,
"mapped_reference_sequence": {
"sequence_accessions": [transcripts[target_gene].nm]
Expand All @@ -253,18 +270,18 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> JSONResponse

# drop Nonetype reference sequences
for target_gene in reference_sequences:
for layer in list(reference_sequences[target_gene].keys()):
for layer in list(reference_sequences[target_gene].layers.keys()):
if (
reference_sequences[target_gene][layer][
reference_sequences[target_gene].layers[layer][
"mapped_reference_sequence"
]
is None
and reference_sequences[target_gene][layer][
and reference_sequences[target_gene].layers[layer][
"computed_reference_sequence"
]
is None
) or layer is None:
del reference_sequences[target_gene][layer]
del reference_sequences[target_gene].layers[layer]

except Exception as e:
return JSONResponse(
Expand Down
Loading