@@ -466,9 +466,10 @@ else if ("I".equals(line[0]))
466466
467467 File output = new File (ctx .getOutputDir (), basename + "txt.gz" );
468468 int idx = 0 ;
469+ int totalAdjusted = 0 ;
469470 try (CSVWriter writer = new CSVWriter (new BufferedWriter (new OutputStreamWriter (new GZIPOutputStream (new FileOutputStream (output )), StringUtilsLabKey .DEFAULT_CHARSET )), '\t' , CSVWriter .NO_QUOTE_CHARACTER ))
470471 {
471- writer .writeNext (new String []{"ReadsetName" , "OutputFileId" , "ReadsetId" , "Contig" , "Start" , "End" , "Ref" , "AltAlleles" , "GatkDepth" , "LoFreqDepth" , "RefAF" , "AltAFs" , "NonRefCount" , "AltCounts" });
472+ writer .writeNext (new String []{"ReadsetName" , "OutputFileId" , "ReadsetId" , "Contig" , "Start" , "End" , "Ref" , "AltAlleles" , "GatkDepth" , "LoFreqDepth" , "RefAF" , "AltAFs" , "NonRefCount" , "AltCounts" , "OrigAltAFs" });
472473
473474 int siteIdx = 0 ;
474475 for (Pair <String , Integer > site : whitelistSites )
@@ -513,19 +514,22 @@ else if ("I".equals(line[0]))
513514 {
514515 line .add (String .valueOf (depth ));
515516 line .add ("ND" );
516- line .add ("ND" );
517- line .add ("ND" );
518- line .add ("ND" );
517+ line .add ("ND" ); //RefAF
518+ line .add ("ND" ); //AltAFs
519+ line .add ("ND" ); //Non-Ref Count
520+ line .add ("ND" ); //AltCounts
521+ line .add ("ND" ); //Orig AFs
519522 }
520523 else
521524 {
522525 line .add (String .valueOf (depth ));
523526 line .add ("ND" );
524- line .add ("1" );
525- line .add (";0" .repeat (siteDef ._alternates .size ()).substring (1 ));
527+ line .add ("1" ); //RefAF
528+ line .add (";0" .repeat (siteDef ._alternates .size ()).substring (1 )); //AltAFs
526529
527- line .add ("0" );
528- line .add (";0" .repeat (siteDef ._alternates .size ()).substring (1 ));
530+ line .add ("0" ); //NonRefCount
531+ line .add (";0" .repeat (siteDef ._alternates .size ()).substring (1 )); //AltCounts
532+ line .add (";0" .repeat (siteDef ._alternates .size ()).substring (1 )); //Orig AFs
529533 }
530534
531535 writer .writeNext (line .toArray (new String []{}));
@@ -620,11 +624,13 @@ else if ("I".equals(line[0]))
620624 //NOTE: because an upstream deletion could be merged with this site (and their depths / AFs computed slightly differently), recalculate here:
621625
622626 //First establish true ALT depth, which could include upstream deletions:
627+ Map <String , Double > alleleToOrigAf = new HashMap <>();
623628 int totalAltDepth = 0 ;
624629 double totalAltAf = 0.0 ;
625630 for (String a : siteDef ._alternates )
626631 {
627632 double af = alleleToAf .getOrDefault (a , 0.0 );
633+ alleleToOrigAf .put (a , af );
628634 int dp = alleleToDp .getOrDefault (a , 0 );
629635
630636 if (dp == (int )NO_DATA_VAL )
@@ -638,7 +644,12 @@ else if ("I".equals(line[0]))
638644 double diff = Math .abs (adjAF - af );
639645 if (diff > 0.01 )
640646 {
641- ctx .getLogger ().warn ("Significant AF adjustment: readset: " + line .get (0 ) + " / site: " + line .get (4 ) + " / allele: " + a + " / AF: " + af + " / New AF: " + adjAF + " / diff: " + diff + " / gatk depth: " + gatkDepth + " / lofreq depth: " + lofreqDepth );
647+ totalAdjusted ++;
648+
649+ if (diff > 0.05 )
650+ {
651+ ctx .getLogger ().warn ("Significant AF adjustment: readset: " + line .get (0 ) + " / site: " + line .get (4 ) + " / allele: " + a + " / AF: " + af + " / New AF: " + adjAF + " / diff: " + diff + " / gatk depth: " + gatkDepth + " / lofreq depth: " + lofreqDepth );
652+ }
642653 }
643654
644655 totalAltAf += adjAF ;
@@ -655,18 +666,23 @@ else if ("I".equals(line[0]))
655666 //Add AFs in order:
656667 List <Object > toAdd = new ArrayList <>();
657668 List <Object > toAddDp = new ArrayList <>();
669+ List <Object > toAddAfOrig = new ArrayList <>();
658670 for (String a : siteDef ._alternates )
659671 {
660672 double af = alleleToAf .getOrDefault (a , 0.0 );
661673 toAdd .add (af == NO_DATA_VAL ? "ND" : af );
662674
675+ double afOrig = alleleToOrigAf .getOrDefault (a , 0.0 );
676+ toAddAfOrig .add (afOrig == NO_DATA_VAL ? "ND" : afOrig );
677+
663678 int dp = alleleToDp .getOrDefault (a , 0 );
664679 toAddDp .add (dp == NO_DATA_VAL ? "ND" : dp );
665680 }
666681 toWrite .add (toAdd .stream ().map (String ::valueOf ).collect (Collectors .joining (";" )));
667682
668683 toWrite .add (String .valueOf (totalAltDepth ));
669684 toWrite .add (toAddDp .stream ().map (String ::valueOf ).collect (Collectors .joining (";" )));
685+ toWrite .add (toAddAfOrig .stream ().map (String ::valueOf ).collect (Collectors .joining (";" )));
670686
671687 writer .writeNext (toWrite .toArray (new String []{}));
672688 idx ++;
@@ -697,6 +713,7 @@ else if ("I".equals(line[0]))
697713 }
698714 }
699715
716+ ctx .getLogger ().info ("total sites with AF adjusted >0.01%: " + totalAdjusted );
700717 ctx .getFileManager ().addSequenceOutput (output , "Merged LoFreq Variants: " + inputFiles .size () + " VCFs" , "Merged LoFreq Variant Table" , null , null , genome .getGenomeId (), null );
701718 }
702719
0 commit comments