5959import java .io .IOException ;
6060import java .io .InputStream ;
6161import java .io .PrintWriter ;
62+ import java .text .DecimalFormat ;
6263import java .text .NumberFormat ;
6364import java .util .ArrayList ;
6465import java .util .Arrays ;
@@ -97,7 +98,11 @@ public Provider()
9798 }}, 25 ),
9899 ToolParameterDescriptor .createExpDataParam ("primerBedFile" , "Primer Sites (BED File)" , "This is a BED file specifying the primer binding sites, which will be used to flag variants. Strandedness is ignored." , "ldk-expdatafield" , new JSONObject ()
99100 {{
100- put ("allowBlank" , false );
101+ put ("allowBlank" , true );
102+ }}, null ),
103+ ToolParameterDescriptor .createExpDataParam ("ampliconBedFile" , "Amplicons (BED File)" , "This is a BED file specifying the amplicons used for amplification (or any named regions of interest). If provided, depth will be summarized across them." , "ldk-expdatafield" , new JSONObject ()
104+ {{
105+ put ("allowBlank" , true );
101106 }}, null ),
102107 ToolParameterDescriptor .create ("minFractionForConsensus" , "Min AF For Consensus" , "Any LoFreq variant greater than this threshold will be used as the consensus base." , "ldk-numberfield" , new JSONObject (){{
103108 put ("minValue" , 0.5 );
@@ -133,21 +138,147 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
133138
134139 DepthOfCoverageWrapper wrapper = new DepthOfCoverageWrapper (getPipelineCtx ().getLogger ());
135140 List <String > extraArgs = new ArrayList <>();
136- extraArgs .add ("--includeDeletions" );
137-
138- //GATK4 only:
139- //extraArgs.add("--include-deletions");
140- //extraArgs.add("--omit-per-sample-statistics");
141- //extraArgs.add("--omit-interval-statistics");
142- //extraArgs.add("--min-base-quality");
143- //extraArgs.add("15");
141+ extraArgs .add ("--include-deletions" );
142+ extraArgs .add ("--omit-per-sample-statistics" );
143+ extraArgs .add ("--omit-interval-statistics" );
144144
145145 wrapper .run (Collections .singletonList (inputBam ), coverageOut .getPath (), referenceGenome .getWorkingFastaFile (), extraArgs , true );
146146 if (!coverageOut .exists ())
147147 {
148148 throw new PipelineJobException ("Unable to find file: " + coverageOut .getPath ());
149149 }
150150
151+ //Create a BED file with all regions of coverage below MIN_COVERAGE:
152+ int minCoverage = getProvider ().getParameterByName ("minCoverage" ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Integer .class );
153+ int positionsSkipped = 0 ;
154+ int gapIntervals = 0 ;
155+
156+ File mask = new File (outputDir , "mask.bed" );
157+ Map <String , Integer > gatkDepth = new HashMap <>();
158+ try (CSVReader reader = new CSVReader (Readers .getReader (coverageOut ), '\t' );CSVWriter writer = new CSVWriter (IOUtil .openFileForBufferedUtf8Writing (mask ), '\t' , CSVWriter .NO_QUOTE_CHARACTER ))
159+ {
160+ String [] line ;
161+
162+ Interval intervalOfCurrentGap = null ;
163+
164+ int i = 0 ;
165+ while ((line = reader .readNext ()) != null )
166+ {
167+ i ++;
168+ if (i == 1 )
169+ {
170+ continue ;
171+ }
172+
173+ String [] tokens = line [0 ].split (":" );
174+ int depth = Integer .parseInt (line [1 ]);
175+ gatkDepth .put (line [0 ], depth );
176+
177+ if (depth < minCoverage )
178+ {
179+ positionsSkipped ++;
180+
181+ if (intervalOfCurrentGap != null )
182+ {
183+ if (intervalOfCurrentGap .getContig ().equals (tokens [0 ]))
184+ {
185+ //extend
186+ intervalOfCurrentGap = new Interval (intervalOfCurrentGap .getContig (), intervalOfCurrentGap .getStart (), Integer .parseInt (tokens [1 ]));
187+ }
188+ else
189+ {
190+ //switched contigs, write and make new:
191+ writer .writeNext (new String []{intervalOfCurrentGap .getContig (), String .valueOf (intervalOfCurrentGap .getStart ()-1 ), String .valueOf (intervalOfCurrentGap .getEnd ())});
192+ gapIntervals ++;
193+ intervalOfCurrentGap = new Interval (tokens [0 ], Integer .parseInt (tokens [1 ]), Integer .parseInt (tokens [1 ]));
194+ }
195+ }
196+ else
197+ {
198+ //Not existing gap, just start one:
199+ intervalOfCurrentGap = new Interval (tokens [0 ], Integer .parseInt (tokens [1 ]), Integer .parseInt (tokens [1 ]));
200+ }
201+ }
202+ else
203+ {
204+ //We just existed a gap, so write:
205+ if (intervalOfCurrentGap != null )
206+ {
207+ writer .writeNext (new String []{intervalOfCurrentGap .getContig (), String .valueOf (intervalOfCurrentGap .getStart ()-1 ), String .valueOf (intervalOfCurrentGap .getEnd ())});
208+ gapIntervals ++;
209+ }
210+
211+ intervalOfCurrentGap = null ;
212+ }
213+ }
214+
215+ //Ensure we count final gap
216+ if (intervalOfCurrentGap != null )
217+ {
218+ writer .writeNext (new String []{intervalOfCurrentGap .getContig (), String .valueOf (intervalOfCurrentGap .getStart ()-1 ), String .valueOf (intervalOfCurrentGap .getEnd ())});
219+ gapIntervals ++;
220+ }
221+ }
222+ catch (IOException e )
223+ {
224+ throw new PipelineJobException (e );
225+ }
226+
227+ //Optional: amplicons/depth:
228+ NumberFormat decimal = DecimalFormat .getNumberInstance ();
229+ decimal .setMaximumFractionDigits (2 );
230+ Integer ampliconBedId = getProvider ().getParameterByName ("ampliconBedFile" ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Integer .class );
231+ if (ampliconBedId != null )
232+ {
233+ File ampliconDepths = new File (outputDir , "ampliconDepths.txt" );
234+ try (CSVWriter writer = new CSVWriter (IOUtil .openFileForBufferedUtf8Writing (ampliconDepths ), '\t' , CSVWriter .NO_QUOTE_CHARACTER ))
235+ {
236+ writer .writeNext (new String []{"AmpliconName" , "Length" , "TotalDepth" , "AvgDepth" });
237+ File primerFile = getPipelineCtx ().getSequenceSupport ().getCachedData (ampliconBedId );
238+ try (CSVReader reader = new CSVReader (Readers .getReader (primerFile ), '\t' ))
239+ {
240+ String [] line ;
241+ while ((line = reader .readNext ()) != null )
242+ {
243+ if (line [0 ].startsWith ("#" ))
244+ {
245+ continue ;
246+ }
247+
248+ String contig = line [0 ];
249+ int start = Integer .parseInt (line [1 ]) + 1 ;
250+ int end = Integer .parseInt (line [2 ]);
251+ String name = line [3 ];
252+
253+ int totalDepth = 0 ;
254+ for (int i = start ;i <=end ;i ++)
255+ {
256+ String key = contig + ":" + i ;
257+ if (!gatkDepth .containsKey (key ))
258+ {
259+ getPipelineCtx ().getLogger ().error ("Missing depth: " + key );
260+ continue ;
261+ }
262+
263+ totalDepth += gatkDepth .get (key );
264+ }
265+
266+ int length = (end - start + 1 );
267+ Double avg = (double )totalDepth / length ;
268+ writer .writeNext (new String []{name , String .valueOf (length ), String .valueOf (totalDepth ), decimal .format (avg )});
269+ }
270+ }
271+ }
272+ catch (IOException e )
273+ {
274+ throw new PipelineJobException (e );
275+ }
276+ }
277+ else
278+ {
279+ getPipelineCtx ().getLogger ().info ("No amplicon BED provided, skipping" );
280+ }
281+
151282 //SnpEff:
152283 Integer geneFileId = getProvider ().getParameterByName (SNPEffStep .GENE_PARAM ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Integer .class );
153284 File snpEffBaseDir = SNPEffStep .checkOrCreateIndex (getPipelineCtx (), referenceGenome , geneFileId );
@@ -192,6 +323,10 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
192323 throw new PipelineJobException (e );
193324 }
194325 }
326+ else
327+ {
328+ getPipelineCtx ().getLogger ().info ("No primer BED provided, skipping" );
329+ }
195330
196331 int totalVariants = 0 ;
197332 int totalGT1 = 0 ;
@@ -215,6 +350,7 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
215350 //Add INFO annotations
216351 header .addMetaDataLine (new VCFInfoHeaderLine ("IN_CONSENSUS" , 1 , VCFHeaderLineType .Flag , "A flag to indicate whether this variant appears in the consensus" ));
217352 header .addMetaDataLine (new VCFInfoHeaderLine ("WITHIN_PBS" , 1 , VCFHeaderLineType .Flag , "A flag to indicate whether this variant is located in primer binding sites" ));
353+ header .addMetaDataLine (new VCFInfoHeaderLine ("GATK_DP" , 1 , VCFHeaderLineType .Integer , "The depth of coverage provided by GATK DepthOfCoverage" ));
218354
219355 header .setSequenceDictionary (dict );
220356 writer1 .writeHeader (header );
@@ -240,6 +376,9 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
240376 totalGT50 ++;
241377 }
242378
379+ int gDepth = gatkDepth .get (vc .getContig () + ":" + vc .getStart ());
380+ vcb .attribute ("GATK_DP" , gDepth );
381+
243382 boolean withinPrimer = false ;
244383 if (!primerIntervals .isEmpty ())
245384 {
@@ -258,7 +397,7 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
258397 }
259398 }
260399
261- if (vc .hasAttribute ("AF" ) && vc .getAttributeAsDouble ("AF" , 0.0 ) > minFractionForConsensus )
400+ if (gDepth >= minCoverage && vc .hasAttribute ("AF" ) && vc .getAttributeAsDouble ("AF" , 0.0 ) > minFractionForConsensus )
262401 {
263402 totalGTThreshold ++;
264403
@@ -291,80 +430,6 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
291430 }
292431 }
293432
294- //Create a BED file with all regions of coverage below MIN_COVERAGE:
295- int minCoverage = getProvider ().getParameterByName ("minCoverage" ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Integer .class );
296- int positionsSkipped = 0 ;
297- int gapIntervals = 0 ;
298-
299- File mask = new File (outputDir , "mask.bed" );
300- try (CSVReader reader = new CSVReader (Readers .getReader (coverageOut ), '\t' );CSVWriter writer = new CSVWriter (IOUtil .openFileForBufferedUtf8Writing (mask ), '\t' , CSVWriter .NO_QUOTE_CHARACTER ))
301- {
302- String [] line ;
303-
304- Interval intervalOfCurrentGap = null ;
305-
306- int i = 0 ;
307- while ((line = reader .readNext ()) != null )
308- {
309- i ++;
310- if (i == 1 )
311- {
312- continue ;
313- }
314-
315- String [] tokens = line [0 ].split (":" );
316- int depth = Integer .parseInt (line [1 ]);
317-
318- if (depth < minCoverage )
319- {
320- positionsSkipped ++;
321-
322- if (intervalOfCurrentGap != null )
323- {
324- if (intervalOfCurrentGap .getContig ().equals (tokens [0 ]))
325- {
326- //extend
327- intervalOfCurrentGap = new Interval (intervalOfCurrentGap .getContig (), intervalOfCurrentGap .getStart (), Integer .parseInt (tokens [1 ]));
328- }
329- else
330- {
331- //switched contigs, write and make new:
332- writer .writeNext (new String []{intervalOfCurrentGap .getContig (), String .valueOf (intervalOfCurrentGap .getStart ()-1 ), String .valueOf (intervalOfCurrentGap .getEnd ())});
333- gapIntervals ++;
334- intervalOfCurrentGap = new Interval (tokens [0 ], Integer .parseInt (tokens [1 ]), Integer .parseInt (tokens [1 ]));
335- }
336- }
337- else
338- {
339- //Not existing gap, just start one:
340- intervalOfCurrentGap = new Interval (tokens [0 ], Integer .parseInt (tokens [1 ]), Integer .parseInt (tokens [1 ]));
341- }
342- }
343- else
344- {
345- //We just existed a gap, so write:
346- if (intervalOfCurrentGap != null )
347- {
348- writer .writeNext (new String []{intervalOfCurrentGap .getContig (), String .valueOf (intervalOfCurrentGap .getStart ()-1 ), String .valueOf (intervalOfCurrentGap .getEnd ())});
349- gapIntervals ++;
350- }
351-
352- intervalOfCurrentGap = null ;
353- }
354- }
355-
356- //Ensure we count final gap
357- if (intervalOfCurrentGap != null )
358- {
359- writer .writeNext (new String []{intervalOfCurrentGap .getContig (), String .valueOf (intervalOfCurrentGap .getStart ()-1 ), String .valueOf (intervalOfCurrentGap .getEnd ())});
360- gapIntervals ++;
361- }
362- }
363- catch (IOException e )
364- {
365- throw new PipelineJobException (e );
366- }
367-
368433 NumberFormat fmt = NumberFormat .getPercentInstance ();
369434 fmt .setMaximumFractionDigits (2 );
370435
0 commit comments