Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
8cf12dd
Added in UCS method, method to handle variant with no gene, expanded …
NImeson Aug 5, 2024
8d26f48
working on tests
NImeson Aug 7, 2024
722bc55
added method in to create BRCA1+2 records as standard when we don't k…
NImeson Aug 9, 2024
fface18
NImeson Oct 31, 2024
898a5fd
expanded regex, added dic for malformed variants
NImeson Dec 3, 2024
e472872
expanded regex, added dic for malformed variants
NImeson Dec 3, 2024
d4b02e4
Merge branch 'develop' of https://github.com/NHSDigital/data_manageme…
NImeson Dec 4, 2024
78631a2
expanded ucs method to handle variants, added malformed variant dict
NImeson Dec 16, 2024
2784515
Merge branch 'develop' of https://github.com/NHSDigital/data_manageme…
NImeson Dec 16, 2024
05f18ba
Re-factored ucs method into existed methods
NImeson Dec 16, 2024
121e9cd
refactored UCS method into existing methods
NImeson Dec 17, 2024
05f17a3
added Br2 representation of BRCA2
NImeson Dec 20, 2024
e0c29f5
additional changes to import variant outliers
NImeson Jan 6, 2025
3ab209e
added new UCS no gene code
NImeson Jan 10, 2025
af5ef63
Re-jigged the way the importer is handling the missing samples
NImeson Jan 15, 2025
f516bc2
edit of cDNA regex
NImeson Jan 15, 2025
be2a513
expanded exon regex slightly to account for variation in representation
NImeson Jan 16, 2025
4b023ae
create new blank tests
NImeson Jan 17, 2025
530a90c
Updating tests
NImeson Jan 20, 2025
1d4aa39
Fixing some rubocop issues
NImeson Jan 20, 2025
2ca787d
edited comment
NImeson Jan 22, 2025
7d951a8
added in teststatus 10 for single variants
NImeson Jan 22, 2025
19d97a6
Added additional tests
NImeson Jan 23, 2025
56d78c9
tests now all running to completion, awaiting QC check result
NImeson Jan 24, 2025
8fce60c
edited recodr_basic_full_screen_ucs method to account for multiple genes
NImeson Jan 27, 2025
48932ea
removing legacy logger statements from tests
NImeson Jan 29, 2025
a8120da
removing legacy logger statements from tests
NImeson Jan 30, 2025
be9e18e
removed process_single_gene method from unknown_status()
NImeson Jan 30, 2025
73b8ec8
removed more logger statements from test suite
NImeson Jan 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 101 additions & 19 deletions lib/import/brca/providers/st_george_old/st_george_handler_old.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
require 'possibly'
require 'pry'

module Import
module Brca
Expand All @@ -11,7 +10,10 @@ class StGeorgeHandlerOld < Import::Germline::ProviderHandler
PASS_THROUGH_FIELDS = %w[age sex consultantcode collecteddate
receiveddate authoriseddate servicereportidentifier
providercode receiveddate sampletype].freeze
CDNA_REGEX = /c\.(?<cdna>[0-9]+[^\s)]+)|c\.\[(?<cdna>.*?)\]/i.freeze

CDNA_REGEX = /c\.(?<cdna>[0-9]+[^\s)]+)|
c\.\[(?<cdna>.*?)\]|
c\.\*?\s?(?<cdna>[0-9]+[^\s)]+)/ix.freeze

PROTEIN_REGEX = /p\.(?<impact>[a-z]+[0-9]+[a-z]+)|
p\.(?<sqrbo>\[)?(?<rndbo>\()?(?<impact>[a-z]+[0-9]+[a-z]+)
Expand All @@ -22,7 +24,8 @@ class StGeorgeHandlerOld < Import::Germline::ProviderHandler
'BRCA 1' => 'BRCA1',
'BR2' => 'BRCA2',
'B2' => 'BRCA2',
'BRCA 2' => 'BRCA2' }.freeze
'BRCA 2' => 'BRCA2',
'Br2' => 'BRCA2' }.freeze

BRCA_GENES_REGEX = /(?<brca>BRCA1|
BRCA2|
Expand All @@ -39,20 +42,21 @@ class StGeorgeHandlerOld < Import::Germline::ProviderHandler
SMARCB1|
LZTR1)/xi.freeze

EXON_VARIANT_REGEX = /(?<variant>del|dup|ins).+ex(?<on>on)?(?<s>s)?\s
(?<exons>[0-9]+(?<dgs>-[0-9]+)?)|
ex(?<on>on)?(?<s>s)?\s(?<exons>[0-9]+(?<dgs>-[0-9]+)?)\s
(?<variant>del|dup|ins)|
(?<variant>del|dup|ins)\sexon(?<s>s)?\s
(?<exons>[0-9]+(?<dgs>\sto\s[0-9]+))|
(?<variant>del|dup|ins)(?<s>\s)?(?<exons>[0-9]+(?<dgs>-[0-9]+)?)|
ex(?<on>on)?(?<s>s)?\s(?<exons>[0-9]+(?<dgs>\sto\s[0-9]+)?)\s
(?<variant>del|dup|ins)/ix.freeze
EXON_VARIANT_REGEX = /(?<variant>del|dup|ins).+ex(?<on>on)?(?<s>s)?\s(?<exons>[0-9]+(?<dgs>-[0-9]+)?)|
ex(?<on>on)?(?<s>s)?\s(?<exons>[0-9]+(?<dgs>-[0-9]+)?)\s(?<variant>del|dup|ins)|
(?<variant>del|dup|ins)\sexon(?<s>s)?\s(?<exons>[0-9]+(?<dgs>\sto\s[0-9]+))|
(?<variant>del|dup|ins)(?<s>\s)?(?<exons>[0-9]+(?<dgs>-[0-9]+)?)|
ex(?<on>on)?(?<s>s)?\s?(?<exons>[0-9]+(?<dgs>\sto\s[0-9]+)?)\s(?<variant>del|dup|ins)|
(?<variant>dup|del|ins)\s?(x|ex|exon)\s?(?<exons>[0-9]+(-|_)[0-9]+)|
(?<variant>ivs.*-ivs.*del|dup|ins)(?<exon>~?[0-9]+)|
(?<variant>dup|del|ins)\s?(ex|x)\s?(?<exons>[0-9]+)|
(x|ex|exon)\s?(?<exons>[0-9]+(-|_)[0-9]+)\s(?<variant>dup|del|ins)/ix.freeze

DEPRECATED_BRCA_NAMES_REGEX = /B1|BR1|BRCA\s1|B2|BR2|BRCA\s2/i.freeze

DELIMETER_REGEX = /[&\n+,;]|and|IFD/i.freeze


def process_fields(record)
# records using new importer should only have SRIs starting with D
return unless record.raw_fields['servicereportidentifier'].start_with?('D')
Expand Down Expand Up @@ -148,6 +152,10 @@ def process_fullscreen_records(genotype, record, positive_genes, genotypes)
else
single_variant_full_screen(genotype, genotypes, positive_genes, record)
end
elsif ucs_variant?(record)
record_basic_full_screen_ucs(genotype, genotypes, positive_genes, record)
else
unknown_status(genotype, genotypes, positive_genes, record)
end
genotypes
end
Expand Down Expand Up @@ -189,6 +197,10 @@ def process_targeted_records(positive_genes, genotype, record, genotypes)
process_failed_targeted(genotype, record, genotypes)
elsif positive_cdna?(record) || positive_exonvariant?(record)
process_positive_targeted(record, positive_genes, genotype, genotypes)
elsif ucs_variant?(record)
record_basic_targeted_ucs(genotype, genotypes, record)
else
unknown_status(genotype, genotypes, positive_genes, record)
end
genotypes
end
Expand Down Expand Up @@ -216,6 +228,49 @@ def process_positive_targeted(record, positive_genes, genotype, genotypes)
end
end

def record_basic_full_screen_ucs(genotype, genotypes, positive_genes, _record)
# record with just a gene and 'UCS' - no specific variant mentioned but we know it should be teststatus 10
if positive_genes.size == 1
process_single_ucs_gene(positive_genes, genotype, genotypes)
elsif positive_genes.size > 1
positive_genes.each do |gene|
genotype.add_gene(gene)
genotype.add_status(10)
genotypes.append(genotype)
end
else
create_empty_brca_tests(genotype, genotypes)
end
end

def process_single_ucs_gene(positive_genes, genotype, genotypes)
negative_gene = %w[BRCA1 BRCA2] - positive_genes
genotype_dup = genotype.dup
genotype_dup.add_gene(negative_gene.join)
genotype_dup.add_status(1)
genotypes.append(genotype_dup)
genotype.add_gene(positive_genes.join)
genotype.add_status(10)
genotypes.append(genotype)
end

def record_basic_targeted_ucs(genotype, genotypes, record)
process_single_gene(genotype, record)
genotype.add_status(10)
genotypes.append(genotype)
end

def unknown_status(genotype, genotypes, positive_genes, record)
# where there is nothing definitive written in the genotype field but there may be a gene
if ashkenazi?(record) || polish?(record) || full_screen?(record)
create_empty_brca_tests(genotype, genotypes)
else
genotype.add_gene(positive_genes.join) if !positive_genes.nil?
genotype.add_status(4)
genotypes.append(genotype)
end
end

# Ordering here is important so duplicate branches are required
# rubocop:disable Lint/DuplicateBranch
def process_single_gene(genotype, record)
Expand Down Expand Up @@ -270,13 +325,39 @@ def process_multiple_positive_variants(positive_genes, genotype, record, genotyp
variants = process_multi_genes_rec(record, positive_genes)
elsif positive_genes.flatten.uniq.size == 1
variants = process_uniq_gene_rec(record, positive_genes)
elsif positive_genes.empty?
process_multi_variants_no_gene(record, genotype, genotypes)
end

add_variants_multiple_results(variants, genotype, genotypes) unless variants.nil?

genotypes
end

def ucs_variant?(record)
record.raw_fields['genotype'].scan(/ucs/i).size.positive?
end

def process_multi_variants_no_gene(record, genotype, genotypes)
return if record.raw_fields['genotype'].nil?

raw_genotypes = record.raw_fields['genotype'].scan(CDNA_REGEX).flatten.compact
raw_genotypes.each do |raw_genotype|
genotype_dup = genotype.dup
genotype_dup.add_gene_location(raw_genotype) unless raw_genotype.nil?
ucs_variant?(record) ? genotype_dup.add_status(10) : genotype_dup.add_status(2)
genotypes.append(genotype_dup)
end
create_empty_brca_tests(genotype, genotypes) if full_screen?(record)
end

def create_empty_brca_tests(genotype, genotypes)
%w[BRCA1 BRCA2].each do |fs_gene|
genotype_dup = genotype.dup
genotype_dup.add_gene(fs_gene)
genotype_dup.add_status(4)
genotypes.append(genotype_dup)
end
end

def process_multi_genes_rec(record, positive_genes)
if record.raw_fields['genotype'].scan(DELIMETER_REGEX).size > 1
variants = process_single_variant(record, positive_genes)
Expand Down Expand Up @@ -360,7 +441,7 @@ def process_exonic_variant(genotype, record)

genotype.add_exon_location($LAST_MATCH_INFO[:exons])
genotype.add_variant_type($LAST_MATCH_INFO[:variant])
genotype.add_status(2)
ucs_variant?(record) ? genotype.add_status(10) : genotype.add_status(2)
@logger.debug "SUCCESSFUL exon variant parse for: #{record.raw_fields['genotype']}"
# end
end
Expand All @@ -369,7 +450,7 @@ def process_cdna_variant(genotype, record)
return unless record.raw_fields['genotype'].scan(CDNA_REGEX).size.positive?

genotype.add_gene_location($LAST_MATCH_INFO[:cdna])
genotype.add_status(2)
ucs_variant?(record) ? genotype.add_status(10) : genotype.add_status(2)
@logger.debug "SUCCESSFUL cdna change parse for: #{$LAST_MATCH_INFO[:cdna]}"
# end
end
Expand All @@ -382,7 +463,7 @@ def process_normal_record(genotype, record)
def normal?(record)
variant = record.raw_fields['genotype']
moltesttype = record.raw_fields['moleculartestingtype']
variant.scan(%r{NO PATHOGENIC|Normal|N/N|NOT DETECTED}i).size.positive? ||
variant.scan(%r{NO PATHOGENIC|Normal|N/N|NOT DETECTED|FALSE POSITIVE| N$}i).size.positive? ||
variant == 'N' || moltesttype.scan(/unaffected/i).size.positive?
end

Expand Down Expand Up @@ -434,11 +515,12 @@ def void_genetictestscope?(record)
return if record.raw_fields['moleculartestingtype'].nil?

record.raw_fields['moleculartestingtype'].empty? ||
record.raw_fields['moleculartestingtype'] == 'Store'
record.raw_fields['moleculartestingtype'] == 'Store' ||
record.raw_fields['moleculartestingtype'] == 'Reclassification of previous result'
end
end
# rubocop:enable Metrics/ClassLength
end
end
end
end
end
Loading
Loading