Skip to content

Commit 745b927

Browse files
committed
Track AF and depth when merging LoFreq VCFs
1 parent 7946672 commit 745b927

File tree

1 file changed

+62
-25
lines changed

1 file changed

+62
-25
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/MergeLoFreqVcfHandler.java

Lines changed: 62 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,13 @@ public SiteAndAlleles(String contig, int start, Allele ref)
122122

123123
public Allele getRenamedAllele(VariantContext vc, Allele toCheck)
124124
{
125-
Allele ret = _encounteredAlleles.get(getEncounteredKey(vc.getStart(), vc.getReference())).get(toCheck);
125+
Map<Allele, Allele> map = _encounteredAlleles.get(getEncounteredKey(vc.getStart(), vc.getReference()));
126+
if (map == null)
127+
{
128+
throw new IllegalArgumentException("Ref not found: " + vc.toStringWithoutGenotypes());
129+
}
126130

131+
Allele ret = map.get(toCheck);
127132
return ret == null ? toCheck : ret;
128133
}
129134

@@ -137,14 +142,11 @@ public void addSite(VariantContext vc, Logger log)
137142
Map<Allele, Allele> alleles = _encounteredAlleles.getOrDefault(getEncounteredKey(vc.getStart(), vc.getReference()), new HashMap<>());
138143
vc.getAlternateAlleles().forEach(a -> {
139144
Allele translated = extractAlleleForPosition(vc, a, log);
140-
if (translated != null)
141-
{
142-
if (!alleles.containsKey(a))
143-
alleles.put(a, translated);
145+
if (!alleles.containsKey(a))
146+
alleles.put(a, translated);
144147

145-
if (!_alternates.contains(translated))
146-
_alternates.add(translated);
147-
}
148+
if (translated != null && !_alternates.contains(translated))
149+
_alternates.add(translated);
148150
});
149151
_encounteredAlleles.put(getEncounteredKey(vc.getStart(), vc.getReference()), alleles);
150152
}
@@ -291,7 +293,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
291293
int idx = 0;
292294
try (CSVWriter writer = new CSVWriter(IOUtil.openFileForBufferedUtf8Writing(output), '\t', CSVWriter.NO_QUOTE_CHARACTER))
293295
{
294-
writer.writeNext(new String[]{"OutputFileId", "ReadsetId", "Contig", "Start", "End", "Ref", "AltAlleles", "OrigRef", "OrigAlts", "Depth", "RefAF", "AltAFs"});
296+
writer.writeNext(new String[]{"OutputFileId", "ReadsetId", "Contig", "Start", "End", "Ref", "AltAlleles", "OrigRef", "OrigAlts", "Depth", "RefAF", "AltAFs", "NonRefCount", "AltCounts"});
295297

296298
for (Pair<String, Integer> site : whitelistSites)
297299
{
@@ -331,6 +333,8 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
331333
line.add(String.valueOf(depth));
332334
line.add("ND");
333335
line.add("ND");
336+
line.add("ND");
337+
line.add("ND");
334338
}
335339
else
336340
{
@@ -339,16 +343,21 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
339343
line.add(String.valueOf(depth));
340344
line.add("1");
341345
line.add(";0".repeat(siteDef._alternates.size()).substring(1));
346+
347+
line.add("0");
348+
line.add(";0".repeat(siteDef._alternates.size()).substring(1));
342349
}
343350

344351
writer.writeNext(line.toArray(new String[]{}));
345352
idx++;
346353
}
347354
else
348355
{
349-
Integer depth = null;
356+
Integer siteDepth = null;
350357
Double totalAltAf = 0.0;
358+
Integer totalAltDepth = 0;
351359
Map<Allele, Double> alleleToAf = new HashMap<>();
360+
Map<Allele, Integer> alleleToDp = new HashMap<>();
352361
Set<Allele> refs = new HashSet<>();
353362

354363
while (it.hasNext())
@@ -360,50 +369,71 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
360369
throw new PipelineJobException("Unexpected variant start. site: " + siteDef._start + " / vc: " + vc.getStart() + " / " + so.getFile().getPath());
361370
}
362371

372+
if (vc.getAlternateAlleles().size() > 1)
373+
{
374+
throw new PipelineJobException("Expected LoFreq VCFs to have only one alternate allele per line. line " + key + " in file: " + so.getFile().getPath());
375+
}
376+
363377
if (vc.getAttribute("GATK_DP") == null)
364378
{
365379
throw new PipelineJobException("Expected GATK_DP annotation on line " + key + " in file: " + so.getFile().getPath());
366380
}
367381

382+
if (vc.getAttribute("DP") == null)
383+
{
384+
throw new PipelineJobException("Expected DP annotation on line " + key + " in file: " + so.getFile().getPath());
385+
}
386+
368387
if (vc.getAttribute("AF") == null)
369388
{
370389
throw new PipelineJobException("Expected AF annotation on line " + key + " in file: " + so.getFile().getPath());
371390
}
372391

373-
depth = vc.getAttributeAsInt("GATK_DP", 0);
374-
if (depth < minDepth)
392+
siteDepth = vc.getAttributeAsInt("GATK_DP", 0);
393+
int aDepth = vc.getAttributeAsInt("DP", 0);
394+
if (siteDepth < minDepth)
375395
{
376396
vc.getAlternateAlleles().forEach(a -> {
377397
Allele translatedAllele = siteDef.getRenamedAllele(vc, a);
378398
if (translatedAllele != null)
379399
{
380400
double val = alleleToAf.getOrDefault(a, NO_DATA_VAL);
381401
alleleToAf.put(a, val);
402+
403+
int val1 = alleleToDp.getOrDefault(a, (int)NO_DATA_VAL);
404+
alleleToDp.put(a, val1);
382405
}
383406
});
384407
}
385408
else
386409
{
387-
List<Double> afs = vc.getAttributeAsDoubleList("AF", 0);
388-
if (afs.size() != vc.getAlternateAlleles().size())
389-
{
390-
throw new PipelineJobException("Expected AF annotation on line " + key + " in file: " + so.getFile().getPath() + " to be of length: " + vc.getAlternateAlleles().size());
391-
}
410+
Double af = vc.getAttributeAsDouble("AF", 0.0);
392411

393-
totalAltAf += afs.stream().reduce(0.0, Double::sum);
394-
vc.getAlternateAlleles().forEach(a -> {
395-
int aIdx = vc.getAlternateAlleles().indexOf(a);
412+
Allele a = vc.getAlternateAlleles().get(0);
413+
a = siteDef.getRenamedAllele(vc, a);
414+
if (a != null)
415+
{
416+
totalAltAf += af;
417+
totalAltDepth += aDepth;
396418

397-
a = siteDef.getRenamedAllele(vc, a);
398419
double val = alleleToAf.getOrDefault(a, 0.0);
399420
if (val == NO_DATA_VAL)
400421
{
401422
val = 0;
402423
}
403424

404-
val = val + afs.get(aIdx);
425+
val = val + af;
405426
alleleToAf.put(a, val);
406-
});
427+
428+
int val1 = alleleToDp.getOrDefault(a, 0);
429+
if (val1 == NO_DATA_VAL)
430+
{
431+
val1 = 0;
432+
}
433+
434+
val1 = val1 + aDepth;
435+
alleleToDp.put(a, val1);
436+
}
407437
}
408438
}
409439

@@ -433,22 +463,29 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
433463

434464
alleleSets.add(StringUtils.join(siteDef._encounteredAlleles.get(r).keySet(), ","));
435465
});
436-
toWrite.add(alleleSets.stream().collect(Collectors.joining(";")));
466+
toWrite.add(StringUtils.join(alleleSets, ";"));
437467
}
438468
}
439469

440-
toWrite.add(String.valueOf(depth));
470+
toWrite.add(String.valueOf(siteDepth));
441471
toWrite.add(String.valueOf(1 - totalAltAf));
442472

443473
//Add AFs in order:
444474
List<Object> toAdd = new ArrayList<>();
475+
List<Object> toAddDp = new ArrayList<>();
445476
for (Allele a : siteDef._alternates)
446477
{
447478
double af = alleleToAf.getOrDefault(a, 0.0);
448479
toAdd.add(af == NO_DATA_VAL ? "ND" : af);
480+
481+
int dp = alleleToDp.getOrDefault(a, 0);
482+
toAddDp.add(dp == NO_DATA_VAL ? "ND" : dp);
449483
}
450484
toWrite.add(toAdd.stream().map(String::valueOf).collect(Collectors.joining(";")));
451485

486+
toWrite.add(String.valueOf(totalAltDepth));
487+
toWrite.add(toAddDp.stream().map(String::valueOf).collect(Collectors.joining(";")));
488+
452489
writer.writeNext(toWrite.toArray(new String[]{}));
453490
idx++;
454491
}

0 commit comments

Comments
 (0)