55import htsjdk .samtools .util .CloseableIterator ;
66import htsjdk .samtools .util .IOUtil ;
77import htsjdk .samtools .util .Interval ;
8+ import htsjdk .variant .variantcontext .Allele ;
89import htsjdk .variant .variantcontext .VariantContext ;
910import htsjdk .variant .vcf .VCFFileReader ;
1011import org .apache .log4j .Logger ;
3738import java .util .ArrayList ;
3839import java .util .Arrays ;
3940import java .util .Collections ;
41+ import java .util .HashMap ;
42+ import java .util .HashSet ;
4043import java .util .List ;
44+ import java .util .Map ;
45+ import java .util .Set ;
46+ import java .util .stream .Collectors ;
4147
4248public class LofreqAnalysis extends AbstractCommandPipelineStep <LofreqAnalysis .LofreqWrapper > implements AnalysisStep
4349{
@@ -118,6 +124,7 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
118124 int totalVariants = 0 ;
119125 int totalGT1 = 0 ;
120126 int totalGT50 = 0 ;
127+ Map <String , Double > alleleToAF = new HashMap <>();
121128 int totalIndelGT1 = 0 ;
122129 try (VCFFileReader reader = new VCFFileReader (outputVcfSnpEff );CloseableIterator <VariantContext > it = reader .iterator ())
123130 {
@@ -137,6 +144,10 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
137144 if (vc .hasAttribute ("AF" ) && vc .getAttributeAsDouble ("AF" , 0.0 ) > 0.5 )
138145 {
139146 totalGT50 ++;
147+ String key = getHashKey (vc );
148+ Double af = alleleToAF .getOrDefault (key , 0.0 );
149+ af = Math .max (af , vc .getAttributeAsDouble ("AF" , 0.0 ));
150+ alleleToAF .put (key , af );
140151 }
141152 }
142153 }
@@ -228,10 +239,51 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
228239 getPipelineCtx ().getLogger ().info ("Total intervals of these gaps: " + gapIntervals );
229240
230241 consensusWrapper .execute (Arrays .asList ("/bin/bash" , script .getPath (), inputBam .getPath (), referenceGenome .getWorkingFastaFile ().getPath (), mask .getPath ()));
242+ File calls = new File (inputBam .getParentFile (), FileUtil .getBaseName (inputBam ) + ".calls.vcf.gz" );
243+
244+ Set <VariantContext > variantsBcftoolsOnly = new HashSet <>();
245+ try (VCFFileReader reader = new VCFFileReader (calls );CloseableIterator <VariantContext > it = reader .iterator ())
246+ {
247+ while (it .hasNext ())
248+ {
249+ VariantContext vc = it .next ();
250+ String key = getHashKey (vc );
251+ if (alleleToAF .containsKey (key ))
252+ {
253+ //Variant shared
254+ alleleToAF .remove (key );
255+ }
256+ else
257+ {
258+ variantsBcftoolsOnly .add (vc );
259+ }
260+ }
261+ }
231262
232263 String description = String .format ("Total Variants: %s\n Total GT 1 PCT: %s\n Total GT 50 PCT: %s\n Total Indel GT 1 PCT: %s" , totalVariants , totalGT1 , totalGT50 , totalIndelGT1 );
264+
265+ if (!variantsBcftoolsOnly .isEmpty ())
266+ {
267+ getPipelineCtx ().getLogger ().error ("The following variants were in bcftools, but not GT50% in lofreq: " );
268+ variantsBcftoolsOnly .forEach (vc -> getPipelineCtx ().getLogger ().error (getHashKey (vc )));
269+
270+ description += "\n " + "WARNING: " + variantsBcftoolsOnly .size () + " variants detected in bcftools and not lofreq" ;
271+ }
272+
273+ if (!alleleToAF .isEmpty ())
274+ {
275+ getPipelineCtx ().getLogger ().error ("The following variants were GT50% in lofreq, but not in bcftools: " );
276+ alleleToAF .keySet ().forEach (vc -> getPipelineCtx ().getLogger ().error (vc ));
277+
278+ description += "\n " + "WARNING: " + alleleToAF .size () + " variants detected in lofreq and not bcftools" ;
279+ }
280+
281+
233282 output .addSequenceOutput (outputVcfSnpEff , "LoFreq: " + rs .getName (), CATEGORY , rs .getReadsetId (), null , referenceGenome .getGenomeId (), description );
234- output .addSequenceOutput (coverageOut , "Depth of Coverage: " + rs .getName (), "Depth of Coverage" , rs .getReadsetId (), null , referenceGenome .getGenomeId (), description );
283+ output .addSequenceOutput (coverageOut , "Depth of Coverage: " + rs .getName (), "Depth of Coverage" , rs .getReadsetId (), null , referenceGenome .getGenomeId (), null );
284+
285+ File consensusFasta = new File (inputBam .getParentFile (), FileUtil .getBaseName (inputBam .getName ()) + ".consensus.fasta" );
286+ output .addSequenceOutput (consensusFasta , "Consensus: " + rs .getName (), "Viral Consensus Sequence" , rs .getReadsetId (), null , referenceGenome .getGenomeId (), description );
235287
236288 return output ;
237289 }
@@ -242,6 +294,11 @@ public Output performAnalysisPerSampleLocal(AnalysisModel model, File inputBam,
242294 return null ;
243295 }
244296
297+ private String getHashKey (VariantContext vc )
298+ {
299+ return vc .getContig () + "<>" + vc .getStart () + vc .getAlternateAlleles ().stream ().map (Allele ::getBaseString ).collect (Collectors .joining (";" ));
300+ }
301+
245302 public static class LofreqWrapper extends AbstractCommandWrapper
246303 {
247304 public LofreqWrapper (Logger log )
0 commit comments