88import htsjdk .samtools .reference .IndexedFastaSequenceFile ;
99import htsjdk .samtools .reference .ReferenceSequence ;
1010import htsjdk .variant .utils .SAMSequenceDictionaryExtractor ;
11+ import org .apache .commons .lang .StringUtils ;
1112import org .json .JSONObject ;
1213import org .labkey .api .pipeline .PipelineJobException ;
1314import org .labkey .api .reader .Readers ;
@@ -308,7 +309,7 @@ private static void parsePindelOutput(PipelineContext ctx, CSVWriter writer, Fil
308309
309310 //NOTE: this is the indel region itself, no flanking. so for a deletion with REF/ALT of ATTC / A--C, it reports TT. for an insertion of ATT / AGTT, it reports G
310311 String pindelAllele = tokens [2 ].split (" " )[2 ];
311- pindelAllele = pindelAllele .replaceAll ("\" " , "" );
312+ pindelAllele = StringUtils . trimToNull ( pindelAllele .replaceAll ("\" " , "" ) );
312313
313314 File dict = new File (fasta .getPath ().replace ("fasta" , "dict" ));
314315 if (!dict .exists ())
@@ -353,6 +354,10 @@ private static void parsePindelOutput(PipelineContext ctx, CSVWriter writer, Fil
353354
354355 eventCoverage = eventCoverage / (baseAfterEnd - basePriorToStart - 1 );
355356 }
357+ else if ("I" .equals (type ))
358+ {
359+ eventCoverage = (double )depth ;
360+ }
356361
357362 double meanCoverage = (leadingCoverage + trailingCoverage ) / 2.0 ;
358363 double pct = (double )support / meanCoverage ;
@@ -368,24 +373,31 @@ private static void parsePindelOutput(PipelineContext ctx, CSVWriter writer, Fil
368373 String ref = "" ;
369374 if ("I" .equals (type ))
370375 {
376+ if (pindelAllele == null )
377+ {
378+ throw new IllegalArgumentException ("Unexpected empty allele for insertion: " + basePriorToStart );
379+ }
380+
371381 ref = sequence .getBaseString ().substring (basePriorToStart -1 , basePriorToStart );
372382 alt = ref + pindelAllele ;
373383 }
374384 else if ("D" .equals (type ))
375385 {
376386 ref = sequence .getBaseString ().substring (basePriorToStart -1 , trueEnd );
377387 alt = sequence .getBaseString ().substring (basePriorToStart -1 , basePriorToStart );
378-
379- // Pindel reports the region over the deletion. so add the leading base to match the reference (VCF-style)
380- // Our sequence based on coordinates should match what pindel reported
381- String predictedPindelAllele = alt + pindelAllele ;
382- if (!predictedPindelAllele .equals (ref ))
388+ if (pindelAllele != null )
383389 {
384- throw new IllegalArgumentException ("Unexpected pindel allele: " + ref + " / " + predictedPindelAllele + " / " + pindelAllele );
390+ type = "S" ;
391+ alt = alt + pindelAllele ;
392+
393+ if (alt .length () != ref .length ())
394+ {
395+ throw new IllegalArgumentException ("Unexpected pindel allele at " + basePriorToStart + ": " + ref + " / " + alt + " / " + pindelAllele );
396+ }
385397 }
386398 }
387399
388- writer .writeNext (new String []{type , contig , String .valueOf (basePriorToStart ), String .valueOf (trueEnd ), String .valueOf (depth ), String .valueOf (support ), String .valueOf (pct ), String .valueOf (meanCoverage ), String .valueOf (leadingCoverage ), String .valueOf (trailingCoverage ), (eventCoverage == null ? "" : String .valueOf (eventCoverage )), ref , alt , pindelAllele });
400+ writer .writeNext (new String []{type , contig , String .valueOf (basePriorToStart ), String .valueOf (trueEnd ), String .valueOf (depth ), String .valueOf (support ), String .valueOf (pct ), String .valueOf (meanCoverage ), String .valueOf (leadingCoverage ), String .valueOf (trailingCoverage ), (eventCoverage == null ? "" : String .valueOf (eventCoverage )), ref , alt , ( pindelAllele == null ? "" : pindelAllele ) });
389401 totalPassing ++;
390402 }
391403 else
0 commit comments