Skip to content

Commit afd39f6

Browse files
committed
Restore indels to merged lofreq table
1 parent d2c0657 commit afd39f6

File tree

1 file changed

+24
-11
lines changed

1 file changed

+24
-11
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/MergeLoFreqVcfHandler.java

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
180180

181181
ctx.getLogger().info("Pass 1: Building whitelist of sites/alleles");
182182
Set<String> uniqueIndels = new HashSet<>();
183-
Map<String, SiteAndAlleles> siteToAlleleNoIndel = new HashMap<>();
183+
Map<String, SiteAndAlleles> siteToAllele = new HashMap<>();
184184
List<Pair<String, Integer>> whitelistSites = new ArrayList<>();
185185

186186
Set<Integer> genomeIds = new HashSet<>();
@@ -236,17 +236,17 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
236236
}
237237

238238
double af = vc.getAttributeAsDouble("AF", 0.0);
239-
if (af >= minAfThreshold && !vc.isIndel())
239+
if ((!vc.isIndel() && af >= minAfThreshold) || (vc.isIndel() && af >= minIndelAfThreshold))
240240
{
241241
String key = getCacheKey(vc.getContig(), vc.getStart());
242-
SiteAndAlleles site = siteToAlleleNoIndel.containsKey(key) ? siteToAlleleNoIndel.get(key) : new SiteAndAlleles(vc.getContig(), vc.getStart(), vc.getReference());
243-
if (!siteToAlleleNoIndel.containsKey(key))
242+
SiteAndAlleles site = siteToAllele.containsKey(key) ? siteToAllele.get(key) : new SiteAndAlleles(vc.getContig(), vc.getStart(), vc.getReference());
243+
if (!siteToAllele.containsKey(key))
244244
{
245245
whitelistSites.add(Pair.of(vc.getContig(), vc.getStart()));
246246
}
247247

248248
site.addSite(vc, ctx.getLogger());
249-
siteToAlleleNoIndel.put(key, site);
249+
siteToAllele.put(key, site);
250250
}
251251

252252
if (af >= minIndelAfThreshold && vc.isIndel())
@@ -361,7 +361,7 @@ else if ("I".equals(line[0]))
361361
Map<String, Integer> contigToOffset = getContigToOffset(dict);
362362

363363
//Write whitelist as VCF, then run SNPEff:
364-
runSnpEff(ctx, siteToAlleleNoIndel, whitelistSites, uniqueIndels, genome, basename);
364+
runSnpEff(ctx, siteToAllele, whitelistSites, uniqueIndels, genome, basename);
365365

366366
ctx.getLogger().info("Pass 2: Building merged table");
367367

@@ -394,7 +394,7 @@ else if ("I".equals(line[0]))
394394
line.add(String.valueOf(so.getReadset()));
395395

396396
String key = getCacheKey(site.getLeft(), site.getRight());
397-
SiteAndAlleles siteDef = siteToAlleleNoIndel.get(key);
397+
SiteAndAlleles siteDef = siteToAllele.get(key);
398398
if (siteDef._alternates.isEmpty())
399399
{
400400
continue;
@@ -444,7 +444,7 @@ else if ("I".equals(line[0]))
444444
while (it.hasNext())
445445
{
446446
VariantContext vc = it.next();
447-
if (vc.isIndel() || vc.isFiltered())
447+
if (vc.isFiltered())
448448
{
449449
continue;
450450
}
@@ -726,18 +726,19 @@ private VCFFileReader getReader(File f)
726726
return readerMap.get(f);
727727
}
728728

729-
private void runSnpEff(JobContext ctx, Map<String, SiteAndAlleles> siteToAlleleNoIndel, List<Pair<String, Integer>> whitelistSites, Set<String> uniqueIndels, ReferenceGenome genome, String basename) throws PipelineJobException
729+
private void runSnpEff(JobContext ctx, Map<String, SiteAndAlleles> siteToAllele, List<Pair<String, Integer>> whitelistSites, Set<String> uniqueIndels, ReferenceGenome genome, String basename) throws PipelineJobException
730730
{
731731
File vcfOut = new File(ctx.getOutputDir(), "whitelistSites.vcf.gz");
732732
VariantContextWriterBuilder vcb = new VariantContextWriterBuilder();
733733
vcb.setOutputFile(vcfOut);
734734

735735
//This set shouldnt be huge, so try in memory:
736736
List<VariantContext> snpEffSites = new ArrayList<>();
737+
Set<String> indelsAdded = new HashSet<>();
737738
for (Pair<String, Integer> site : whitelistSites)
738739
{
739740
String key = getCacheKey(site.getLeft(), site.getRight());
740-
Processor.SiteAndAlleles siteDef = siteToAlleleNoIndel.get(key);
741+
Processor.SiteAndAlleles siteDef = siteToAllele.get(key);
741742

742743
for (String a : siteDef._alternates)
743744
{
@@ -753,12 +754,24 @@ private void runSnpEff(JobContext ctx, Map<String, SiteAndAlleles> siteToAlleleN
753754
List<Allele> alleles = Arrays.asList(siteDef._ref, Allele.create(a));
754755
b.alleles(alleles);
755756
b.computeEndFromAlleles(alleles, siteDef._start);
756-
snpEffSites.add(b.make());
757+
758+
VariantContext vc = b.make();
759+
if (vc.isIndel())
760+
{
761+
indelsAdded.add(vc.getContig() + "<>" + vc.getStart() + "<>" + vc.getReference().getBaseString() + "<>" + vc.getAlternateAllele(0).getBaseString());
762+
}
763+
764+
snpEffSites.add(vc);
757765
}
758766
}
759767

760768
for (String allele : uniqueIndels)
761769
{
770+
if (indelsAdded.contains(allele))
771+
{
772+
continue;
773+
}
774+
762775
String[] tokens = allele.split("<>");
763776

764777
VariantContextBuilder b = new VariantContextBuilder();

0 commit comments

Comments
 (0)