2323import org .apache .commons .lang3 .math .NumberUtils ;
2424import org .apache .commons .math3 .stat .descriptive .rank .Median ;
2525import org .apache .logging .log4j .Logger ;
26- import org .apache .logging .log4j .LogManager ;
2726import org .biojava3 .core .sequence .DNASequence ;
2827import org .biojava3 .core .sequence .compound .AmbiguityDNACompoundSet ;
2928import org .biojava3 .core .sequence .compound .NucleotideCompound ;
6059import org .labkey .api .util .PageFlowUtil ;
6160import org .labkey .api .writer .PrintWriters ;
6261import org .labkey .sequenceanalysis .SequenceAnalysisModule ;
62+ import org .labkey .sequenceanalysis .SequenceAnalysisSchema ;
6363import org .labkey .sequenceanalysis .run .util .DepthOfCoverageWrapper ;
6464import org .labkey .sequenceanalysis .run .variant .SNPEffStep ;
6565import org .labkey .sequenceanalysis .run .variant .SnpEffWrapper ;
66+ import org .labkey .sequenceanalysis .util .ReferenceLibraryHelperImpl ;
6667
6768import java .io .File ;
6869import java .io .IOException ;
7374import java .util .ArrayList ;
7475import java .util .Arrays ;
7576import java .util .Collections ;
77+ import java .util .Date ;
7678import java .util .HashMap ;
7779import java .util .HashSet ;
7880import java .util .LinkedHashMap ;
@@ -137,7 +139,7 @@ public Provider()
137139 {{
138140
139141 }}, false ),
140- ToolParameterDescriptor .create ("runPangolin" , "Run Pangolin" , "If selected, Pangolin will be used to score the consensus against common SARS-CoV-2 lineages." , "checkbox" , new JSONObject ()
142+ ToolParameterDescriptor .create ("runPangolin" , "Run Pangolin and NextClade " , "If selected, Pangolin and NextClade will be used to score the consensus against common SARS-CoV-2 lineages." , "checkbox" , new JSONObject ()
141143 {{
142144
143145 }}, false ),
@@ -146,8 +148,20 @@ public Provider()
146148 put ("minValue" , 0.0 );
147149 put ("maxValue" , 1.0 );
148150 put ("decimalPrecision" , 2 );
149- }}, 0.1 )
150- ), PageFlowUtil .set ("sequenceanalysis/field/GenomeFileSelectorField.js" ), "http://csb5.github.io/lofreq/" );
151+ }}, 0.1 ),
152+ ToolParameterDescriptor .create ("dbImport" , "Import SNVs" , "If selected, the LoFreq and pindel variants will be imported into the DB." , "checkbox" , new JSONObject ()
153+ {{
154+
155+ }}, false ),
156+ ToolParameterDescriptor .create ("dbImportThreshold" , "Variant Import AF Threshold" , "If DB import is selected, variants above this AF threshold will be imported." , "ldk-numberfield" , new JSONObject ()
157+ {{
158+
159+ }}, 0.1 ),
160+ ToolParameterDescriptor .create ("dbImportDepthThreshold" , "Variant Import Depth Threshold" , "If DB import is selected, variants in a site with coverage greater than or equal to this value will be imported." , "ldk-integerfield" , new JSONObject ()
161+ {{
162+
163+ }}, 20 )
164+ ), PageFlowUtil .set ("sequenceanalysis/field/GenomeFileSelectorField.js" ), "http://csb5.github.io/lofreq/" );
151165 }
152166
153167
@@ -407,8 +421,8 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
407421 int totalIndelGTThreshold = 0 ;
408422 int totalConsensusInPBS = 0 ;
409423
410- File loFreqConsensusVcf = new File (outputDir , FileUtil . getBaseName ( inputBam ) + ".lofreq.consensus.vcf.gz" );
411- File loFreqAllVcf = new File (outputDir , FileUtil . getBaseName ( inputBam ) + ".lofreq.all.vcf.gz" );
424+ File loFreqConsensusVcf = getConsensusVcf (outputDir , inputBam );
425+ File loFreqAllVcf = getAllVcf (outputDir , inputBam );
412426 Double strandBiasRecoveryAF = getProvider ().getParameterByName ("strandBiasRecoveryAF" ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Double .class , 1.0 );
413427 SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor .extractDictionary (referenceGenome .getSequenceDictionary ().toPath ());
414428 VariantContextWriterBuilder writerBuilder1 = new VariantContextWriterBuilder ().setOutputFile (loFreqConsensusVcf ).setReferenceDictionary (dict );
@@ -591,7 +605,7 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
591605 int minDepth = getProvider ().getParameterByName ("minDepth" ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Integer .class , 0 );
592606 int minInsertSize = getProvider ().getParameterByName ("minInsertSize" ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Integer .class , 0 );
593607
594- boolean runPangolin = getProvider ().getParameterByName ("runPangolin" ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Boolean .class , false );
608+ boolean runPangolinAndNextClade = getProvider ().getParameterByName ("runPangolin" ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Boolean .class , false );
595609 boolean runPindel = getProvider ().getParameterByName ("runPindel" ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Boolean .class , false );
596610
597611 if (runPindel )
@@ -600,10 +614,13 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
600614 }
601615
602616 String [] pangolinData = null ;
603- if (runPangolin )
617+ if (runPangolinAndNextClade )
604618 {
605619 PangolinHandler .updatePangolinRefs (getPipelineCtx ().getLogger ());
606620 pangolinData = PangolinHandler .runPangolin (consensusFastaLoFreq , getPipelineCtx ().getLogger (), output );
621+
622+ File json = NextCladeHandler .runNextClade (consensusFastaLoFreq , getPipelineCtx ().getLogger (), output , outputDir );
623+ output .addSequenceOutput (json , "Nextclade: " + rs .getName (), "NextClade JSON" , rs .getReadsetId (), null , referenceGenome .getGenomeId (), null );
607624 }
608625
609626 //write metrics:
@@ -640,6 +657,11 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
640657 return output ;
641658 }
642659
660+ private File getAllVcf (File outputDir , File inputBam )
661+ {
662+ return new File (outputDir , FileUtil .getBaseName (inputBam ) + ".lofreq.all.vcf.gz" );
663+ }
664+
643665 private Set <String > runBcftools (File inputBam , ReferenceGenome referenceGenome , File mask , int minCoverage ) throws PipelineJobException
644666 {
645667 Set <String > variantsBcftools = new HashSet <>();
@@ -684,9 +706,19 @@ private Set<String> runBcftools(File inputBam, ReferenceGenome referenceGenome,
684706 return variantsBcftools ;
685707 }
686708
709+ private File getConsensusVcf (File outputDir , File inputBam )
710+ {
711+ return new File (outputDir , FileUtil .getBaseName (inputBam ) + ".lofreq.consensus.vcf.gz" );
712+ }
713+
714+ private File getConsensusFasta (File loFreqConsensusVcf )
715+ {
716+ return new File (loFreqConsensusVcf .getParentFile (), SequenceAnalysisService .get ().getUnzippedBaseName (loFreqConsensusVcf .getName ()) + ".fasta" );
717+ }
718+
687719 private File generateConsensus (File loFreqConsensusVcf , File fasta , File maskBed ) throws PipelineJobException
688720 {
689- File ret = new File (loFreqConsensusVcf . getParentFile (), SequenceAnalysisService . get (). getUnzippedBaseName ( loFreqConsensusVcf . getName ()) + ".fasta" );
721+ File ret = getConsensusFasta (loFreqConsensusVcf );
690722 List <String > args = new ArrayList <>();
691723
692724 args .add (SequencePipelineService .get ().getExeForPackage ("BCFTOOLS" , "bcftools" ).getPath ());
@@ -769,8 +801,6 @@ private File getMetricsFile(File outDir)
769801 @ Override
770802 public Output performAnalysisPerSampleLocal (AnalysisModel model , File inputBam , File referenceFasta , File outDir ) throws PipelineJobException
771803 {
772- Map <String , String > valueMap = new HashMap <>();
773-
774804 File metrics = getMetricsFile (outDir );
775805 if (metrics .exists ())
776806 {
@@ -819,8 +849,6 @@ public Output performAnalysisPerSampleLocal(AnalysisModel model, File inputBam,
819849
820850 Table .insert (getPipelineCtx ().getJob ().getUser (), ti , r );
821851 total ++;
822-
823- valueMap .put (line [1 ], value );
824852 }
825853
826854 getPipelineCtx ().getJob ().getLogger ().info ("total metrics: " + total );
@@ -835,9 +863,121 @@ public Output performAnalysisPerSampleLocal(AnalysisModel model, File inputBam,
835863 throw new PipelineJobException ("Unable to find metrics file: " + metrics .getPath ());
836864 }
837865
866+ boolean dbImport = getProvider ().getParameterByName ("dbImport" ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Boolean .class , false );
867+ if (dbImport )
868+ {
869+ importNtSnps (model , inputBam , outDir );
870+ }
871+ else
872+ {
873+ getPipelineCtx ().getLogger ().info ("NT SNP DB Import not selected" );
874+ }
875+
876+ boolean runPangolinAndNextClade = getProvider ().getParameterByName ("runPangolin" ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Boolean .class , false );
877+ if (runPangolinAndNextClade )
878+ {
879+ //Find the NextClade json:
880+ File jsonFile = NextCladeHandler .getJsonFile (outDir , getConsensusFasta (getConsensusVcf (outDir , inputBam )));
881+ if (!jsonFile .exists ())
882+ {
883+ throw new PipelineJobException ("Unable to find NextClade JSON record: " + jsonFile .getPath ());
884+ }
885+
886+ File vcf = getAllVcf (outDir , inputBam );
887+ if (!vcf .exists ())
888+ {
889+ throw new PipelineJobException ("Unable to find LoFreq VCF: " + vcf .getPath ());
890+ }
891+
892+ NextCladeHandler .processAndImportNextCladeAa (getPipelineCtx ().getJob (), jsonFile , model .getRowId (), model .getLibraryId (), model .getAlignmentFile (), model .getReadset (), vcf , dbImport );
893+ }
894+ else
895+ {
896+ getPipelineCtx ().getLogger ().info ("NextClade was not run" );
897+ }
898+
838899 return null ;
839900 }
840901
902+ private void importNtSnps (AnalysisModel model , File inputBam , File outDir ) throws PipelineJobException
903+ {
904+ getPipelineCtx ().getLogger ().info ("Importing variants into DB" );
905+
906+ File vcf = getAllVcf (outDir , inputBam );
907+ if (!vcf .exists ())
908+ {
909+ throw new PipelineJobException ("Unable to find file: " + vcf .getPath ());
910+ }
911+
912+ ReferenceGenome genome = SequenceAnalysisService .get ().getReferenceGenome (model .getLibraryId (), getPipelineCtx ().getJob ().getUser ());
913+ ReferenceLibraryHelperImpl helper = new ReferenceLibraryHelperImpl (genome .getWorkingFastaFile (), getPipelineCtx ().getLogger ());
914+
915+ ViralSnpUtil .deleteExistingValues (getPipelineCtx ().getJob (), model .getAnalysisId (), SequenceAnalysisSchema .TABLE_NT_SNP_BY_POS , null );
916+ Double afThreshold = getProvider ().getParameterByName ("dbImportThreshold" ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Double .class , 0.0 );
917+ Integer depthTheshold = getProvider ().getParameterByName ("dbImportDepthThreshold" ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Integer .class , 0 );
918+
919+ TableInfo ti = SequenceAnalysisSchema .getTable (SequenceAnalysisSchema .TABLE_NT_SNP_BY_POS );
920+
921+ try (VCFFileReader reader = new VCFFileReader (vcf );CloseableIterator <VariantContext > it = reader .iterator ())
922+ {
923+ while (it .hasNext ())
924+ {
925+ VariantContext vc = it .next ();
926+
927+ if (vc .isFiltered ())
928+ {
929+ continue ;
930+ }
931+
932+ double pct = vc .getAttributeAsDouble ("AF" , 0.0 );
933+ if (pct < afThreshold )
934+ {
935+ continue ;
936+ }
937+
938+ int depth = vc .getAttributeAsInt ("GATK_DP" , 0 );
939+ if (depth < depthTheshold )
940+ {
941+ continue ;
942+ }
943+
944+ if (vc .getAlternateAlleles ().size () != 1 )
945+ {
946+ throw new PipelineJobException ("Expected a single ALT allele" );
947+ }
948+
949+ Map <String , Object > ntRow = new HashMap <>();
950+ ntRow .put ("analysis_id" , model .getAnalysisId ());
951+
952+ Integer refId = helper .resolveSequenceId (vc .getContig ());
953+ if (refId == null )
954+ {
955+ getPipelineCtx ().getLogger ().error ("unknown reference id: [" + vc .getContig () + "]" );
956+ }
957+
958+ ntRow .put ("ref_nt_id" , refId );
959+ ntRow .put ("ref_nt_position" , vc .getStart ()); //use 1-based
960+ ntRow .put ("ref_nt_insert_index" , 0 );
961+ ntRow .put ("ref_nt" , vc .getReference ().getBaseString ());
962+ ntRow .put ("q_nt" , vc .getAlternateAllele (0 ).getBaseString ());
963+
964+ List <Integer > depths = vc .getAttributeAsIntList ("DP4" , 0 );
965+ int alleleDepth = depths .get (2 ) + depths .get (3 );
966+ ntRow .put ("readcount" , alleleDepth );
967+ ntRow .put ("depth" , depth );
968+ ntRow .put ("adj_depth" , vc .getAttribute ("DP" ));
969+ ntRow .put ("pct" , vc .getAttribute ("AF" ));
970+ ntRow .put ("container" , getPipelineCtx ().getJob ().getContainer ().getEntityId ());
971+ ntRow .put ("createdby" , getPipelineCtx ().getJob ().getUser ().getUserId ());
972+ ntRow .put ("modifiedby" , getPipelineCtx ().getJob ().getUser ().getUserId ());
973+ ntRow .put ("created" , new Date ());
974+ ntRow .put ("modified" , new Date ());
975+
976+ Table .insert (getPipelineCtx ().getJob ().getUser (), ti , ntRow );
977+ }
978+ }
979+ }
980+
841981 private String getHashKey (VariantContext vc )
842982 {
843983 return vc .getContig () + "<>" + vc .getStart () + vc .getAlternateAlleles ().stream ().map (Allele ::getBaseString ).collect (Collectors .joining (";" ));
0 commit comments