Skip to content

Commit 0ed18cf

Browse files
committed
Add BcftoolsSetGtStep
1 parent 3999b68 commit 0ed18cf

File tree

2 files changed

+117
-0
lines changed

2 files changed

+117
-0
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,7 @@ public static void registerPipelineSteps()
302302
SequencePipelineService.get().registerPipelineStep(new MendelianViolationReportStep.Provider());
303303
SequencePipelineService.get().registerPipelineStep(new SummarizeGenotypeQualityStep.Provider());
304304
SequencePipelineService.get().registerPipelineStep(new BcftoolsFillTagsStep.Provider());
305+
SequencePipelineService.get().registerPipelineStep(new BcftoolsSetGtStep.Provider());
305306
SequencePipelineService.get().registerPipelineStep(new SVAnnotateStep.Provider());
306307

307308
//handlers
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
package org.labkey.sequenceanalysis.run.analysis;
2+
3+
import htsjdk.samtools.util.Interval;
4+
import org.apache.commons.lang3.StringUtils;
5+
import org.jetbrains.annotations.Nullable;
6+
import org.labkey.api.pipeline.PipelineJobException;
7+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
8+
import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider;
9+
import org.labkey.api.sequenceanalysis.pipeline.BcftoolsRunner;
10+
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
11+
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
12+
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
13+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
14+
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep;
15+
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl;
16+
import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
17+
import org.labkey.sequenceanalysis.pipeline.SequenceTaskHelper;
18+
19+
import java.io.File;
20+
import java.io.IOException;
21+
import java.util.ArrayList;
22+
import java.util.Arrays;
23+
import java.util.List;
24+
import java.util.stream.Collectors;
25+
26+
public class BcftoolsSetGtStep extends AbstractCommandPipelineStep<BcftoolsRunner> implements VariantProcessingStep
27+
{
28+
public BcftoolsSetGtStep(PipelineStepProvider<?> provider, PipelineContext ctx)
29+
{
30+
super(provider, ctx, new BcftoolsRunner(ctx.getLogger()));
31+
}
32+
33+
public static class Provider extends AbstractVariantProcessingStepProvider<BcftoolsSetGtStep> implements SupportsScatterGather
34+
{
35+
public Provider()
36+
{
37+
super("BcftoolsSetGtStep", "Bcftools Set GT", "bcftools", "Can be used to convert single dot ('.') genotypes to './.' for compatibility with more tools.", Arrays.asList(
38+
39+
), null, null);
40+
}
41+
42+
@Override
43+
public BcftoolsSetGtStep create(PipelineContext ctx)
44+
{
45+
return new BcftoolsSetGtStep(this, ctx);
46+
}
47+
}
48+
49+
@Override
50+
public Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List<Interval> intervals) throws PipelineJobException
51+
{
52+
VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl();
53+
54+
List<String> options = new ArrayList<>();
55+
options.add(BcftoolsRunner.getBcfToolsPath().getPath());
56+
options.add("+setGT");
57+
58+
options.add(inputVCF.getPath());
59+
60+
if (intervals != null)
61+
{
62+
options.add("--regions");
63+
options.add(intervals.stream().map(interval -> interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd()).collect(Collectors.joining(",")));
64+
}
65+
66+
options.add("-O");
67+
options.add("z9");
68+
69+
Integer threads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger());
70+
if (threads != null)
71+
{
72+
options.add("--threads");
73+
options.add(threads.toString());
74+
}
75+
76+
File outputVcf = new File(outputDirectory, SequenceTaskHelper.getUnzippedBaseName(inputVCF) + ".ft.vcf.gz");
77+
options.add("-o");
78+
options.add(outputVcf.getPath());
79+
80+
options.add("--");
81+
82+
options.add("-t");
83+
options.add(".");
84+
85+
options.add("-n");
86+
options.add("./.");
87+
88+
BcftoolsRunner wrapper = getWrapper();
89+
90+
String bcfPluginDir = StringUtils.trimToNull(System.getenv("BCFTOOLS_PLUGINS"));
91+
if (bcfPluginDir != null)
92+
{
93+
getPipelineCtx().getLogger().debug("Setting BCFTOOLS_PLUGINS environment variable: " + bcfPluginDir);
94+
wrapper.addToEnvironment("BCFTOOLS_PLUGINS", bcfPluginDir);
95+
}
96+
97+
wrapper.execute(options);
98+
if (!outputVcf.exists())
99+
{
100+
throw new PipelineJobException("output not found: " + outputVcf);
101+
}
102+
103+
try
104+
{
105+
SequenceAnalysisService.get().ensureVcfIndex(outputVcf, getWrapper().getLogger());
106+
}
107+
catch (IOException e)
108+
{
109+
throw new PipelineJobException(e);
110+
}
111+
112+
output.setVcf(outputVcf);
113+
114+
return output;
115+
}
116+
}

0 commit comments

Comments
 (0)