@@ -180,7 +180,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
180180
181181 ctx .getLogger ().info ("Pass 1: Building whitelist of sites/alleles" );
182182 Set <String > uniqueIndels = new HashSet <>();
183- Map <String , SiteAndAlleles > siteToAlleleNoIndel = new HashMap <>();
183+ Map <String , SiteAndAlleles > siteToAllele = new HashMap <>();
184184 List <Pair <String , Integer >> whitelistSites = new ArrayList <>();
185185
186186 Set <Integer > genomeIds = new HashSet <>();
@@ -236,17 +236,17 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
236236 }
237237
238238 double af = vc .getAttributeAsDouble ("AF" , 0.0 );
239- if (af >= minAfThreshold && ! vc .isIndel ())
239+ if ((! vc . isIndel () && af >= minAfThreshold ) || ( vc .isIndel () && af >= minIndelAfThreshold ))
240240 {
241241 String key = getCacheKey (vc .getContig (), vc .getStart ());
242- SiteAndAlleles site = siteToAlleleNoIndel .containsKey (key ) ? siteToAlleleNoIndel .get (key ) : new SiteAndAlleles (vc .getContig (), vc .getStart (), vc .getReference ());
243- if (!siteToAlleleNoIndel .containsKey (key ))
242+ SiteAndAlleles site = siteToAllele .containsKey (key ) ? siteToAllele .get (key ) : new SiteAndAlleles (vc .getContig (), vc .getStart (), vc .getReference ());
243+ if (!siteToAllele .containsKey (key ))
244244 {
245245 whitelistSites .add (Pair .of (vc .getContig (), vc .getStart ()));
246246 }
247247
248248 site .addSite (vc , ctx .getLogger ());
249- siteToAlleleNoIndel .put (key , site );
249+ siteToAllele .put (key , site );
250250 }
251251
252252 if (af >= minIndelAfThreshold && vc .isIndel ())
@@ -361,7 +361,7 @@ else if ("I".equals(line[0]))
361361 Map <String , Integer > contigToOffset = getContigToOffset (dict );
362362
363363 //Write whitelist as VCF, then run SNPEff:
364- runSnpEff (ctx , siteToAlleleNoIndel , whitelistSites , uniqueIndels , genome , basename );
364+ runSnpEff (ctx , siteToAllele , whitelistSites , uniqueIndels , genome , basename );
365365
366366 ctx .getLogger ().info ("Pass 2: Building merged table" );
367367
@@ -394,7 +394,7 @@ else if ("I".equals(line[0]))
394394 line .add (String .valueOf (so .getReadset ()));
395395
396396 String key = getCacheKey (site .getLeft (), site .getRight ());
397- SiteAndAlleles siteDef = siteToAlleleNoIndel .get (key );
397+ SiteAndAlleles siteDef = siteToAllele .get (key );
398398 if (siteDef ._alternates .isEmpty ())
399399 {
400400 continue ;
@@ -444,7 +444,7 @@ else if ("I".equals(line[0]))
444444 while (it .hasNext ())
445445 {
446446 VariantContext vc = it .next ();
447- if (vc .isIndel () || vc . isFiltered ())
447+ if (vc .isFiltered ())
448448 {
449449 continue ;
450450 }
@@ -726,18 +726,19 @@ private VCFFileReader getReader(File f)
726726 return readerMap .get (f );
727727 }
728728
729- private void runSnpEff (JobContext ctx , Map <String , SiteAndAlleles > siteToAlleleNoIndel , List <Pair <String , Integer >> whitelistSites , Set <String > uniqueIndels , ReferenceGenome genome , String basename ) throws PipelineJobException
729+ private void runSnpEff (JobContext ctx , Map <String , SiteAndAlleles > siteToAllele , List <Pair <String , Integer >> whitelistSites , Set <String > uniqueIndels , ReferenceGenome genome , String basename ) throws PipelineJobException
730730 {
731731 File vcfOut = new File (ctx .getOutputDir (), "whitelistSites.vcf.gz" );
732732 VariantContextWriterBuilder vcb = new VariantContextWriterBuilder ();
733733 vcb .setOutputFile (vcfOut );
734734
735735 //This set shouldnt be huge, so try in memory:
736736 List <VariantContext > snpEffSites = new ArrayList <>();
737+ Set <String > indelsAdded = new HashSet <>();
737738 for (Pair <String , Integer > site : whitelistSites )
738739 {
739740 String key = getCacheKey (site .getLeft (), site .getRight ());
740- Processor .SiteAndAlleles siteDef = siteToAlleleNoIndel .get (key );
741+ Processor .SiteAndAlleles siteDef = siteToAllele .get (key );
741742
742743 for (String a : siteDef ._alternates )
743744 {
@@ -753,12 +754,24 @@ private void runSnpEff(JobContext ctx, Map<String, SiteAndAlleles> siteToAlleleN
753754 List <Allele > alleles = Arrays .asList (siteDef ._ref , Allele .create (a ));
754755 b .alleles (alleles );
755756 b .computeEndFromAlleles (alleles , siteDef ._start );
756- snpEffSites .add (b .make ());
757+
758+ VariantContext vc = b .make ();
759+ if (vc .isIndel ())
760+ {
761+ indelsAdded .add (vc .getContig () + "<>" + vc .getStart () + "<>" + vc .getReference ().getBaseString () + "<>" + vc .getAlternateAllele (0 ).getBaseString ());
762+ }
763+
764+ snpEffSites .add (vc );
757765 }
758766 }
759767
760768 for (String allele : uniqueIndels )
761769 {
770+ if (indelsAdded .contains (allele ))
771+ {
772+ continue ;
773+ }
774+
762775 String [] tokens = allele .split ("<>" );
763776
764777 VariantContextBuilder b = new VariantContextBuilder ();
0 commit comments