@@ -122,8 +122,13 @@ public SiteAndAlleles(String contig, int start, Allele ref)
122122
123123 public Allele getRenamedAllele (VariantContext vc , Allele toCheck )
124124 {
125- Allele ret = _encounteredAlleles .get (getEncounteredKey (vc .getStart (), vc .getReference ())).get (toCheck );
125+ Map <Allele , Allele > map = _encounteredAlleles .get (getEncounteredKey (vc .getStart (), vc .getReference ()));
126+ if (map == null )
127+ {
128+ throw new IllegalArgumentException ("Ref not found: " + vc .toStringWithoutGenotypes ());
129+ }
126130
131+ Allele ret = map .get (toCheck );
127132 return ret == null ? toCheck : ret ;
128133 }
129134
@@ -137,14 +142,11 @@ public void addSite(VariantContext vc, Logger log)
137142 Map <Allele , Allele > alleles = _encounteredAlleles .getOrDefault (getEncounteredKey (vc .getStart (), vc .getReference ()), new HashMap <>());
138143 vc .getAlternateAlleles ().forEach (a -> {
139144 Allele translated = extractAlleleForPosition (vc , a , log );
140- if (translated != null )
141- {
142- if (!alleles .containsKey (a ))
143- alleles .put (a , translated );
145+ if (!alleles .containsKey (a ))
146+ alleles .put (a , translated );
144147
145- if (!_alternates .contains (translated ))
146- _alternates .add (translated );
147- }
148+ if (translated != null && !_alternates .contains (translated ))
149+ _alternates .add (translated );
148150 });
149151 _encounteredAlleles .put (getEncounteredKey (vc .getStart (), vc .getReference ()), alleles );
150152 }
@@ -291,7 +293,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
291293 int idx = 0 ;
292294 try (CSVWriter writer = new CSVWriter (IOUtil .openFileForBufferedUtf8Writing (output ), '\t' , CSVWriter .NO_QUOTE_CHARACTER ))
293295 {
294- writer .writeNext (new String []{"OutputFileId" , "ReadsetId" , "Contig" , "Start" , "End" , "Ref" , "AltAlleles" , "OrigRef" , "OrigAlts" , "Depth" , "RefAF" , "AltAFs" });
296+ writer .writeNext (new String []{"OutputFileId" , "ReadsetId" , "Contig" , "Start" , "End" , "Ref" , "AltAlleles" , "OrigRef" , "OrigAlts" , "Depth" , "RefAF" , "AltAFs" , "NonRefCount" , "AltCounts" });
295297
296298 for (Pair <String , Integer > site : whitelistSites )
297299 {
@@ -331,6 +333,8 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
331333 line .add (String .valueOf (depth ));
332334 line .add ("ND" );
333335 line .add ("ND" );
336+ line .add ("ND" );
337+ line .add ("ND" );
334338 }
335339 else
336340 {
@@ -339,16 +343,21 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
339343 line .add (String .valueOf (depth ));
340344 line .add ("1" );
341345 line .add (";0" .repeat (siteDef ._alternates .size ()).substring (1 ));
346+
347+ line .add ("0" );
348+ line .add (";0" .repeat (siteDef ._alternates .size ()).substring (1 ));
342349 }
343350
344351 writer .writeNext (line .toArray (new String []{}));
345352 idx ++;
346353 }
347354 else
348355 {
349- Integer depth = null ;
356+ Integer siteDepth = null ;
350357 Double totalAltAf = 0.0 ;
358+ Integer totalAltDepth = 0 ;
351359 Map <Allele , Double > alleleToAf = new HashMap <>();
360+ Map <Allele , Integer > alleleToDp = new HashMap <>();
352361 Set <Allele > refs = new HashSet <>();
353362
354363 while (it .hasNext ())
@@ -360,50 +369,71 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
360369 throw new PipelineJobException ("Unexpected variant start. site: " + siteDef ._start + " / vc: " + vc .getStart () + " / " + so .getFile ().getPath ());
361370 }
362371
372+ if (vc .getAlternateAlleles ().size () > 1 )
373+ {
374+ throw new PipelineJobException ("Expected LoFreq VCFs to have only one alternate allele per line. line " + key + " in file: " + so .getFile ().getPath ());
375+ }
376+
363377 if (vc .getAttribute ("GATK_DP" ) == null )
364378 {
365379 throw new PipelineJobException ("Expected GATK_DP annotation on line " + key + " in file: " + so .getFile ().getPath ());
366380 }
367381
382+ if (vc .getAttribute ("DP" ) == null )
383+ {
384+ throw new PipelineJobException ("Expected DP annotation on line " + key + " in file: " + so .getFile ().getPath ());
385+ }
386+
368387 if (vc .getAttribute ("AF" ) == null )
369388 {
370389 throw new PipelineJobException ("Expected AF annotation on line " + key + " in file: " + so .getFile ().getPath ());
371390 }
372391
373- depth = vc .getAttributeAsInt ("GATK_DP" , 0 );
374- if (depth < minDepth )
392+ siteDepth = vc .getAttributeAsInt ("GATK_DP" , 0 );
393+ int aDepth = vc .getAttributeAsInt ("DP" , 0 );
394+ if (siteDepth < minDepth )
375395 {
376396 vc .getAlternateAlleles ().forEach (a -> {
377397 Allele translatedAllele = siteDef .getRenamedAllele (vc , a );
378398 if (translatedAllele != null )
379399 {
380400 double val = alleleToAf .getOrDefault (a , NO_DATA_VAL );
381401 alleleToAf .put (a , val );
402+
403+ int val1 = alleleToDp .getOrDefault (a , (int )NO_DATA_VAL );
404+ alleleToDp .put (a , val1 );
382405 }
383406 });
384407 }
385408 else
386409 {
387- List <Double > afs = vc .getAttributeAsDoubleList ("AF" , 0 );
388- if (afs .size () != vc .getAlternateAlleles ().size ())
389- {
390- throw new PipelineJobException ("Expected AF annotation on line " + key + " in file: " + so .getFile ().getPath () + " to be of length: " + vc .getAlternateAlleles ().size ());
391- }
410+ Double af = vc .getAttributeAsDouble ("AF" , 0.0 );
392411
393- totalAltAf += afs .stream ().reduce (0.0 , Double ::sum );
394- vc .getAlternateAlleles ().forEach (a -> {
395- int aIdx = vc .getAlternateAlleles ().indexOf (a );
412+ Allele a = vc .getAlternateAlleles ().get (0 );
413+ a = siteDef .getRenamedAllele (vc , a );
414+ if (a != null )
415+ {
416+ totalAltAf += af ;
417+ totalAltDepth += aDepth ;
396418
397- a = siteDef .getRenamedAllele (vc , a );
398419 double val = alleleToAf .getOrDefault (a , 0.0 );
399420 if (val == NO_DATA_VAL )
400421 {
401422 val = 0 ;
402423 }
403424
404- val = val + afs . get ( aIdx ) ;
425+ val = val + af ;
405426 alleleToAf .put (a , val );
406- });
427+
428+ int val1 = alleleToDp .getOrDefault (a , 0 );
429+ if (val1 == NO_DATA_VAL )
430+ {
431+ val1 = 0 ;
432+ }
433+
434+ val1 = val1 + aDepth ;
435+ alleleToDp .put (a , val1 );
436+ }
407437 }
408438 }
409439
@@ -433,22 +463,29 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
433463
434464 alleleSets .add (StringUtils .join (siteDef ._encounteredAlleles .get (r ).keySet (), "," ));
435465 });
436- toWrite .add (alleleSets . stream (). collect ( Collectors . joining ( ";" ) ));
466+ toWrite .add (StringUtils . join ( alleleSets , ";" ));
437467 }
438468 }
439469
440- toWrite .add (String .valueOf (depth ));
470+ toWrite .add (String .valueOf (siteDepth ));
441471 toWrite .add (String .valueOf (1 - totalAltAf ));
442472
443473 //Add AFs in order:
444474 List <Object > toAdd = new ArrayList <>();
475+ List <Object > toAddDp = new ArrayList <>();
445476 for (Allele a : siteDef ._alternates )
446477 {
447478 double af = alleleToAf .getOrDefault (a , 0.0 );
448479 toAdd .add (af == NO_DATA_VAL ? "ND" : af );
480+
481+ int dp = alleleToDp .getOrDefault (a , 0 );
482+ toAddDp .add (dp == NO_DATA_VAL ? "ND" : dp );
449483 }
450484 toWrite .add (toAdd .stream ().map (String ::valueOf ).collect (Collectors .joining (";" )));
451485
486+ toWrite .add (String .valueOf (totalAltDepth ));
487+ toWrite .add (toAddDp .stream ().map (String ::valueOf ).collect (Collectors .joining (";" )));
488+
452489 writer .writeNext (toWrite .toArray (new String []{}));
453490 idx ++;
454491 }
0 commit comments