Skip to content

Commit 3296c0c

Browse files
committed
Add option to count variants using bcftools index, which is much faster than parsing the file itself
1 parent 6589748 commit 3296c0c

File tree

4 files changed

+49
-8
lines changed

4 files changed

+49
-8
lines changed
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package org.labkey.api.sequenceanalysis.pipeline;
2+
3+
import org.apache.logging.log4j.Logger;
4+
import org.jetbrains.annotations.Nullable;
5+
import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
6+
7+
import java.io.File;
8+
9+
/**
10+
* User: bimber
11+
* Date: 12/15/12
12+
* Time: 9:11 PM
13+
*/
14+
public class BcftoolsRunner extends AbstractCommandWrapper
15+
{
16+
public BcftoolsRunner(@Nullable Logger logger)
17+
{
18+
super(logger);
19+
}
20+
21+
public File getSamtoolsPath()
22+
{
23+
return SequencePipelineService.get().getExeForPackage("SAMTOOLSPATH", "samtools");
24+
}
25+
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisServiceImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ private void appendParents(Set<String> distinctSubjects, PedigreeRecord p, List<
433433
@Override
434434
public String getVCFLineCount(File vcf, Logger log, boolean passOnly) throws PipelineJobException
435435
{
436-
return ProcessVariantsHandler.getVCFLineCount(vcf, log, passOnly);
436+
return passOnly ? ProcessVariantsHandler.getVCFLineCount(vcf, log, passOnly, false) : ProcessVariantsHandler.getVCFLineCount(vcf, log, passOnly, true);
437437
}
438438

439439
@Override

SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -321,20 +321,20 @@ public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceG
321321
Long mapped = null;
322322
if (output.exists())
323323
{
324-
String mappedStr = ProcessVariantsHandler.getVCFLineCount(output, job.getLogger(), false);
324+
String mappedStr = ProcessVariantsHandler.getVCFLineCount(output, job.getLogger(), false, true);
325325
mapped = StringUtils.trimToNull(mappedStr) == null ? 0L : Long.parseLong(mappedStr);
326326
job.getLogger().info("total variants mapped: " + mappedStr);
327-
job.getLogger().info("passing variants mapped: " + ProcessVariantsHandler.getVCFLineCount(output, job.getLogger(), true));
327+
job.getLogger().info("passing variants mapped: " + ProcessVariantsHandler.getVCFLineCount(output, job.getLogger(), true, false));
328328
SequenceAnalysisService.get().ensureVcfIndex(output, job.getLogger());
329329
}
330330

331331
Long unmapped = 0L;
332332
if (unmappedOutput != null && unmappedOutput.exists())
333333
{
334-
String unmappedStr = ProcessVariantsHandler.getVCFLineCount(unmappedOutput, job.getLogger(), false);
334+
String unmappedStr = ProcessVariantsHandler.getVCFLineCount(unmappedOutput, job.getLogger(), false, true);
335335
unmapped = StringUtils.trimToNull(unmappedStr) == null ? 0L : Long.parseLong(unmappedStr);
336336
job.getLogger().info("total unmapped variants: " + unmappedStr);
337-
job.getLogger().info("passing unmapped variants: " + ProcessVariantsHandler.getVCFLineCount(unmappedOutput, job.getLogger(), true));
337+
job.getLogger().info("passing unmapped variants: " + ProcessVariantsHandler.getVCFLineCount(unmappedOutput, job.getLogger(), true, false));
338338
SequenceAnalysisService.get().ensureVcfIndex(unmappedOutput, job.getLogger());
339339
}
340340

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
2828
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
2929
import org.labkey.api.sequenceanalysis.pipeline.AbstractResumer;
30+
import org.labkey.api.sequenceanalysis.pipeline.BcftoolsRunner;
3031
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepCtx;
3132
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
3233
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
@@ -430,8 +431,8 @@ public static File processVCF(File input, Integer libraryId, JobContext ctx, Res
430431
{
431432
currentVCF = output.getVCF();
432433

433-
ctx.getJob().getLogger().info("total variants: " + getVCFLineCount(currentVCF, ctx.getJob().getLogger(), false));
434-
ctx.getJob().getLogger().info("passing variants: " + getVCFLineCount(currentVCF, ctx.getJob().getLogger(), true));
434+
ctx.getJob().getLogger().info("total variants: " + getVCFLineCount(currentVCF, ctx.getJob().getLogger(), false, true));
435+
ctx.getJob().getLogger().info("passing variants: " + getVCFLineCount(currentVCF, ctx.getJob().getLogger(), true, false));
435436
ctx.getJob().getLogger().debug("index exists: " + (new File(currentVCF.getPath() + ".tbi")).exists());
436437

437438
try
@@ -468,8 +469,23 @@ public static File processVCF(File input, Integer libraryId, JobContext ctx, Res
468469
return null;
469470
}
470471

471-
public static String getVCFLineCount(File vcf, Logger log, boolean passOnly) throws PipelineJobException
472+
private static String countUsingBcfTools(File vcf, Logger log) throws PipelineJobException
472473
{
474+
return new BcftoolsRunner(log).executeWithOutput(Arrays.asList("index", "-n", vcf.getPath()));
475+
}
476+
477+
public static String getVCFLineCount(File vcf, Logger log, boolean passOnly, boolean useBcfTools) throws PipelineJobException
478+
{
479+
if (useBcfTools)
480+
{
481+
if (passOnly)
482+
{
483+
throw new PipelineJobException("bcftools VCF count cannot be used with passOnly");
484+
}
485+
486+
return countUsingBcfTools(vcf, log);
487+
}
488+
473489
String cat = vcf.getName().endsWith(".gz") ? "zcat" : "cat";
474490
SimpleScriptWrapper wrapper = new SimpleScriptWrapper(null);
475491

0 commit comments

Comments
 (0)