2929import org .labkey .api .sequenceanalysis .pipeline .ReferenceGenome ;
3030import org .labkey .api .sequenceanalysis .pipeline .SequenceAnalysisJobSupport ;
3131import org .labkey .api .sequenceanalysis .pipeline .SequenceOutputHandler ;
32+ import org .labkey .api .sequenceanalysis .pipeline .SequencePipelineService ;
3233import org .labkey .api .sequenceanalysis .pipeline .ToolParameterDescriptor ;
3334import org .labkey .api .sequenceanalysis .run .SimpleScriptWrapper ;
3435import org .labkey .api .util .PageFlowUtil ;
6465public class MergeLoFreqVcfHandler extends AbstractParameterizedOutputHandler <SequenceOutputHandler .SequenceOutputProcessor >
6566{
6667 private static final String MIN_AF = "minAfThreshold" ;
68+ private static final String MIN_AF_INDEL = "minIndelAfThreshold" ;
6769 private static final String MIN_COVERAGE = "minCoverage" ;
6870
6971 public MergeLoFreqVcfHandler ()
@@ -77,6 +79,11 @@ public MergeLoFreqVcfHandler()
7779 put ("maxValue" , 1 );
7880 put ("decimalPrecision" , 2 );
7981 }}, 0.10 ),
82+ ToolParameterDescriptor .create (MIN_AF , "Min AF to Include (Indels)" , "An indel will be included in the indel output if there is a passing variant above this AF in at least one sample." , "ldk-numberfield" , new JSONObject (){{
83+ put ("minValue" , 0 );
84+ put ("maxValue" , 1 );
85+ put ("decimalPrecision" , 2 );
86+ }}, 0.05 ),
8087 ToolParameterDescriptor .create (MIN_COVERAGE , "Min Coverage To Include" , "A site will be reported as ND, unless coverage is above this threshold" , "ldk-integerfield" , new JSONObject (){{
8188 put ("minValue" , 0 );
8289 }}, 25 ),
@@ -221,6 +228,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
221228 basename = basename + (basename .endsWith ("." ) ? "" : "." );
222229
223230 final double minAfThreshold = ctx .getParams ().optDouble (MIN_AF , 0.0 );
231+ final double minIndelAfThreshold = ctx .getParams ().optDouble (MIN_AF_INDEL , 0.05 );
224232 final int minDepth = ctx .getParams ().optInt (MIN_COVERAGE , 0 );
225233
226234 Set <String > errors = new HashSet <>();
@@ -279,16 +287,6 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
279287 double af = vc .getAttributeAsDouble ("AF" , 0.0 );
280288 if (af >= minAfThreshold )
281289 {
282- if (vc .isIndel ())
283- {
284- uniqueIndels .add (vc .getContig () + "<>" + vc .getStart () + "<>" + vc .getReference ().getBaseString () + "<>" + vc .getAlternateAlleles ().get (0 ).getBaseString ());
285- List <Integer > depths = vc .getAttributeAsIntList ("DP4" , 0 );
286- int alleleDepth = depths .get (2 ) + depths .get (3 );
287-
288- int length = vc .getLengthOnReference () - 1 ; //NOTE: indels are left-padded including one ref base
289- writer .writeNext (new String []{ctx .getSequenceSupport ().getCachedReadset (so .getReadset ()).getName (), String .valueOf (so .getRowid ()), String .valueOf (so .getReadset ()), "LoFreq" , vc .getContig (), String .valueOf (vc .getStart ()), String .valueOf (vc .getEnd ()), String .valueOf (length ), vc .getReference ().getBaseString (), vc .getAlternateAlleles ().get (0 ).getBaseString (), vc .getAttributeAsString ("GATK_DP" , "ND" ), vc .getAttributeAsString ("DP" , "ND" ), String .valueOf (alleleDepth ), vc .getAttributeAsString ("AF" , "ND" )});
290- }
291-
292290 for (int i = 0 ; i < vc .getLengthOnReference (); i ++)
293291 {
294292 int effectiveStart = vc .getStart () + i ;
@@ -302,6 +300,16 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
302300 siteToAllele .put (key , site );
303301 }
304302 }
303+
304+ if (af > minIndelAfThreshold && vc .isIndel ())
305+ {
306+ uniqueIndels .add (vc .getContig () + "<>" + vc .getStart () + "<>" + vc .getReference ().getBaseString () + "<>" + vc .getAlternateAlleles ().get (0 ).getBaseString ());
307+ List <Integer > depths = vc .getAttributeAsIntList ("DP4" , 0 );
308+ int alleleDepth = depths .get (2 ) + depths .get (3 );
309+
310+ int length = vc .getLengthOnReference () - 1 ; //NOTE: indels are left-padded including one ref base
311+ writer .writeNext (new String []{ctx .getSequenceSupport ().getCachedReadset (so .getReadset ()).getName (), String .valueOf (so .getRowid ()), String .valueOf (so .getReadset ()), "LoFreq" , vc .getContig (), String .valueOf (vc .getStart ()), String .valueOf (vc .getEnd ()), String .valueOf (length ), vc .getReference ().getBaseString (), vc .getAlternateAlleles ().get (0 ).getBaseString (), vc .getAttributeAsString ("GATK_DP" , "ND" ), vc .getAttributeAsString ("DP" , "ND" ), String .valueOf (alleleDepth ), vc .getAttributeAsString ("AF" , "ND" )});
312+ }
305313 }
306314 }
307315
@@ -711,7 +719,7 @@ private void sortTsvFile(JobContext ctx, File input) throws PipelineJobException
711719 writer .println ("set -e" );
712720 writer .println ("{" );
713721 writer .println ("zcat " + input .getPath () +" | head -n 1;" );
714- writer .println ("zcat " + input .getPath () +" | tail +2 | sort -k5,5 -k6,6n -k7,7n;" );
722+ writer .println ("zcat " + input .getPath () +" | tail -n +2 | sort -k5,5 -k6,6n -k7,7n;" );
715723 writer .println ("} | gzip -c > " + output .getPath () + "\n " );
716724 }
717725 catch (IOException e )
@@ -723,6 +731,13 @@ private void sortTsvFile(JobContext ctx, File input) throws PipelineJobException
723731 wrapper .setWorkingDir (script .getParentFile ());
724732 wrapper .execute (Arrays .asList ("/bin/bash" , script .getPath ()));
725733
734+ long lineCountIn = SequencePipelineService .get ().getLineCount (input );
735+ long lineCountOut = SequencePipelineService .get ().getLineCount (output );
736+ if (lineCountIn != lineCountOut )
737+ {
738+ throw new PipelineJobException ("Error sorting file" );
739+ }
740+
726741 input .delete ();
727742 script .delete ();
728743
0 commit comments