Skip to content

Commit ef66ee8

Browse files
committed
Report depth by amplicon
1 parent e0866a8 commit ef66ee8

File tree

1 file changed

+149
-84
lines changed

1 file changed

+149
-84
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/LofreqAnalysis.java

Lines changed: 149 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
import java.io.IOException;
6060
import java.io.InputStream;
6161
import java.io.PrintWriter;
62+
import java.text.DecimalFormat;
6263
import java.text.NumberFormat;
6364
import java.util.ArrayList;
6465
import java.util.Arrays;
@@ -97,7 +98,11 @@ public Provider()
9798
}}, 25),
9899
ToolParameterDescriptor.createExpDataParam("primerBedFile", "Primer Sites (BED File)", "This is a BED file specifying the primer binding sites, which will be used to flag variants. Strandedness is ignored.", "ldk-expdatafield", new JSONObject()
99100
{{
100-
put("allowBlank", false);
101+
put("allowBlank", true);
102+
}}, null),
103+
ToolParameterDescriptor.createExpDataParam("ampliconBedFile", "Amplicons (BED File)", "This is a BED file specifying the amplicons used for amplification (or any named regions of interest). If provided, depth will be summarized across them.", "ldk-expdatafield", new JSONObject()
104+
{{
105+
put("allowBlank", true);
101106
}}, null),
102107
ToolParameterDescriptor.create("minFractionForConsensus", "Min AF For Consensus", "Any LoFreq variant greater than this threshold will be used as the consensus base.", "ldk-numberfield", new JSONObject(){{
103108
put("minValue", 0.5);
@@ -133,21 +138,147 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
133138

134139
DepthOfCoverageWrapper wrapper = new DepthOfCoverageWrapper(getPipelineCtx().getLogger());
135140
List<String> extraArgs = new ArrayList<>();
136-
extraArgs.add("--includeDeletions");
137-
138-
//GATK4 only:
139-
//extraArgs.add("--include-deletions");
140-
//extraArgs.add("--omit-per-sample-statistics");
141-
//extraArgs.add("--omit-interval-statistics");
142-
//extraArgs.add("--min-base-quality");
143-
//extraArgs.add("15");
141+
extraArgs.add("--include-deletions");
142+
extraArgs.add("--omit-per-sample-statistics");
143+
extraArgs.add("--omit-interval-statistics");
144144

145145
wrapper.run(Collections.singletonList(inputBam), coverageOut.getPath(), referenceGenome.getWorkingFastaFile(), extraArgs, true);
146146
if (!coverageOut.exists())
147147
{
148148
throw new PipelineJobException("Unable to find file: " + coverageOut.getPath());
149149
}
150150

151+
//Create a BED file with all regions of coverage below MIN_COVERAGE:
152+
int minCoverage = getProvider().getParameterByName("minCoverage").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
153+
int positionsSkipped = 0;
154+
int gapIntervals = 0;
155+
156+
File mask = new File(outputDir, "mask.bed");
157+
Map<String, Integer> gatkDepth = new HashMap<>();
158+
try (CSVReader reader = new CSVReader(Readers.getReader(coverageOut), '\t');CSVWriter writer = new CSVWriter(IOUtil.openFileForBufferedUtf8Writing(mask), '\t', CSVWriter.NO_QUOTE_CHARACTER))
159+
{
160+
String[] line;
161+
162+
Interval intervalOfCurrentGap = null;
163+
164+
int i = 0;
165+
while ((line = reader.readNext()) != null)
166+
{
167+
i++;
168+
if (i == 1)
169+
{
170+
continue;
171+
}
172+
173+
String[] tokens = line[0].split(":");
174+
int depth = Integer.parseInt(line[1]);
175+
gatkDepth.put(line[0], depth);
176+
177+
if (depth < minCoverage)
178+
{
179+
positionsSkipped++;
180+
181+
if (intervalOfCurrentGap != null)
182+
{
183+
if (intervalOfCurrentGap.getContig().equals(tokens[0]))
184+
{
185+
//extend
186+
intervalOfCurrentGap = new Interval(intervalOfCurrentGap.getContig(), intervalOfCurrentGap.getStart(), Integer.parseInt(tokens[1]));
187+
}
188+
else
189+
{
190+
//switched contigs, write and make new:
191+
writer.writeNext(new String[]{intervalOfCurrentGap.getContig(), String.valueOf(intervalOfCurrentGap.getStart()-1), String.valueOf(intervalOfCurrentGap.getEnd())});
192+
gapIntervals++;
193+
intervalOfCurrentGap = new Interval(tokens[0], Integer.parseInt(tokens[1]), Integer.parseInt(tokens[1]));
194+
}
195+
}
196+
else
197+
{
198+
//Not existing gap, just start one:
199+
intervalOfCurrentGap = new Interval(tokens[0], Integer.parseInt(tokens[1]), Integer.parseInt(tokens[1]));
200+
}
201+
}
202+
else
203+
{
204+
//We just existed a gap, so write:
205+
if (intervalOfCurrentGap != null)
206+
{
207+
writer.writeNext(new String[]{intervalOfCurrentGap.getContig(), String.valueOf(intervalOfCurrentGap.getStart()-1), String.valueOf(intervalOfCurrentGap.getEnd())});
208+
gapIntervals++;
209+
}
210+
211+
intervalOfCurrentGap = null;
212+
}
213+
}
214+
215+
//Ensure we count final gap
216+
if (intervalOfCurrentGap != null)
217+
{
218+
writer.writeNext(new String[]{intervalOfCurrentGap.getContig(), String.valueOf(intervalOfCurrentGap.getStart()-1), String.valueOf(intervalOfCurrentGap.getEnd())});
219+
gapIntervals++;
220+
}
221+
}
222+
catch (IOException e)
223+
{
224+
throw new PipelineJobException(e);
225+
}
226+
227+
//Optional: amplicons/depth:
228+
NumberFormat decimal = DecimalFormat.getNumberInstance();
229+
decimal.setMaximumFractionDigits(2);
230+
Integer ampliconBedId = getProvider().getParameterByName("ampliconBedFile").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
231+
if (ampliconBedId != null)
232+
{
233+
File ampliconDepths = new File(outputDir, "ampliconDepths.txt");
234+
try (CSVWriter writer = new CSVWriter(IOUtil.openFileForBufferedUtf8Writing(ampliconDepths), '\t', CSVWriter.NO_QUOTE_CHARACTER))
235+
{
236+
writer.writeNext(new String[]{"AmpliconName", "Length", "TotalDepth", "AvgDepth"});
237+
File primerFile = getPipelineCtx().getSequenceSupport().getCachedData(ampliconBedId);
238+
try (CSVReader reader = new CSVReader(Readers.getReader(primerFile), '\t'))
239+
{
240+
String[] line;
241+
while ((line = reader.readNext()) != null)
242+
{
243+
if (line[0].startsWith("#"))
244+
{
245+
continue;
246+
}
247+
248+
String contig = line[0];
249+
int start = Integer.parseInt(line[1]) + 1;
250+
int end = Integer.parseInt(line[2]);
251+
String name = line[3];
252+
253+
int totalDepth = 0;
254+
for (int i = start;i<=end;i++)
255+
{
256+
String key = contig + ":" + i;
257+
if (!gatkDepth.containsKey(key))
258+
{
259+
getPipelineCtx().getLogger().error("Missing depth: " + key);
260+
continue;
261+
}
262+
263+
totalDepth += gatkDepth.get(key);
264+
}
265+
266+
int length = (end - start + 1);
267+
Double avg = (double)totalDepth / length;
268+
writer.writeNext(new String[]{name, String.valueOf(length), String.valueOf(totalDepth), decimal.format(avg)});
269+
}
270+
}
271+
}
272+
catch (IOException e)
273+
{
274+
throw new PipelineJobException(e);
275+
}
276+
}
277+
else
278+
{
279+
getPipelineCtx().getLogger().info("No amplicon BED provided, skipping");
280+
}
281+
151282
//SnpEff:
152283
Integer geneFileId = getProvider().getParameterByName(SNPEffStep.GENE_PARAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
153284
File snpEffBaseDir = SNPEffStep.checkOrCreateIndex(getPipelineCtx(), referenceGenome, geneFileId);
@@ -192,6 +323,10 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
192323
throw new PipelineJobException(e);
193324
}
194325
}
326+
else
327+
{
328+
getPipelineCtx().getLogger().info("No primer BED provided, skipping");
329+
}
195330

196331
int totalVariants = 0;
197332
int totalGT1 = 0;
@@ -215,6 +350,7 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
215350
//Add INFO annotations
216351
header.addMetaDataLine(new VCFInfoHeaderLine("IN_CONSENSUS", 1, VCFHeaderLineType.Flag, "A flag to indicate whether this variant appears in the consensus"));
217352
header.addMetaDataLine(new VCFInfoHeaderLine("WITHIN_PBS", 1, VCFHeaderLineType.Flag, "A flag to indicate whether this variant is located in primer binding sites"));
353+
header.addMetaDataLine(new VCFInfoHeaderLine("GATK_DP", 1, VCFHeaderLineType.Integer, "The depth of coverage provided by GATK DepthOfCoverage"));
218354

219355
header.setSequenceDictionary(dict);
220356
writer1.writeHeader(header);
@@ -240,6 +376,9 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
240376
totalGT50++;
241377
}
242378

379+
int gDepth = gatkDepth.get(vc.getContig() + ":" + vc.getStart());
380+
vcb.attribute("GATK_DP", gDepth);
381+
243382
boolean withinPrimer = false;
244383
if (!primerIntervals.isEmpty())
245384
{
@@ -258,7 +397,7 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
258397
}
259398
}
260399

261-
if (vc.hasAttribute("AF") && vc.getAttributeAsDouble("AF", 0.0) > minFractionForConsensus)
400+
if (gDepth >= minCoverage && vc.hasAttribute("AF") && vc.getAttributeAsDouble("AF", 0.0) > minFractionForConsensus)
262401
{
263402
totalGTThreshold++;
264403

@@ -291,80 +430,6 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
291430
}
292431
}
293432

294-
//Create a BED file with all regions of coverage below MIN_COVERAGE:
295-
int minCoverage = getProvider().getParameterByName("minCoverage").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
296-
int positionsSkipped = 0;
297-
int gapIntervals = 0;
298-
299-
File mask = new File(outputDir, "mask.bed");
300-
try (CSVReader reader = new CSVReader(Readers.getReader(coverageOut), '\t');CSVWriter writer = new CSVWriter(IOUtil.openFileForBufferedUtf8Writing(mask), '\t', CSVWriter.NO_QUOTE_CHARACTER))
301-
{
302-
String[] line;
303-
304-
Interval intervalOfCurrentGap = null;
305-
306-
int i = 0;
307-
while ((line = reader.readNext()) != null)
308-
{
309-
i++;
310-
if (i == 1)
311-
{
312-
continue;
313-
}
314-
315-
String[] tokens = line[0].split(":");
316-
int depth = Integer.parseInt(line[1]);
317-
318-
if (depth < minCoverage)
319-
{
320-
positionsSkipped++;
321-
322-
if (intervalOfCurrentGap != null)
323-
{
324-
if (intervalOfCurrentGap.getContig().equals(tokens[0]))
325-
{
326-
//extend
327-
intervalOfCurrentGap = new Interval(intervalOfCurrentGap.getContig(), intervalOfCurrentGap.getStart(), Integer.parseInt(tokens[1]));
328-
}
329-
else
330-
{
331-
//switched contigs, write and make new:
332-
writer.writeNext(new String[]{intervalOfCurrentGap.getContig(), String.valueOf(intervalOfCurrentGap.getStart()-1), String.valueOf(intervalOfCurrentGap.getEnd())});
333-
gapIntervals++;
334-
intervalOfCurrentGap = new Interval(tokens[0], Integer.parseInt(tokens[1]), Integer.parseInt(tokens[1]));
335-
}
336-
}
337-
else
338-
{
339-
//Not existing gap, just start one:
340-
intervalOfCurrentGap = new Interval(tokens[0], Integer.parseInt(tokens[1]), Integer.parseInt(tokens[1]));
341-
}
342-
}
343-
else
344-
{
345-
//We just existed a gap, so write:
346-
if (intervalOfCurrentGap != null)
347-
{
348-
writer.writeNext(new String[]{intervalOfCurrentGap.getContig(), String.valueOf(intervalOfCurrentGap.getStart()-1), String.valueOf(intervalOfCurrentGap.getEnd())});
349-
gapIntervals++;
350-
}
351-
352-
intervalOfCurrentGap = null;
353-
}
354-
}
355-
356-
//Ensure we count final gap
357-
if (intervalOfCurrentGap != null)
358-
{
359-
writer.writeNext(new String[]{intervalOfCurrentGap.getContig(), String.valueOf(intervalOfCurrentGap.getStart()-1), String.valueOf(intervalOfCurrentGap.getEnd())});
360-
gapIntervals++;
361-
}
362-
}
363-
catch (IOException e)
364-
{
365-
throw new PipelineJobException(e);
366-
}
367-
368433
NumberFormat fmt = NumberFormat.getPercentInstance();
369434
fmt.setMaximumFractionDigits(2);
370435

0 commit comments

Comments
 (0)