Skip to content

Commit b2dc955

Browse files
committed
Add support for DeepVariant
1 parent 5cdc941 commit b2dc955

File tree

3 files changed

+262
-21
lines changed

3 files changed

+262
-21
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -78,25 +78,7 @@
7878
import org.labkey.sequenceanalysis.run.alignment.Pbmm2Wrapper;
7979
import org.labkey.sequenceanalysis.run.alignment.StarWrapper;
8080
import org.labkey.sequenceanalysis.run.alignment.VulcanWrapper;
81-
import org.labkey.sequenceanalysis.run.analysis.BamIterator;
82-
import org.labkey.sequenceanalysis.run.analysis.BcftoolsFillTagsStep;
83-
import org.labkey.sequenceanalysis.run.analysis.ExportOverlappingReadsAnalysis;
84-
import org.labkey.sequenceanalysis.run.analysis.GenrichStep;
85-
import org.labkey.sequenceanalysis.run.analysis.HaplotypeCallerAnalysis;
86-
import org.labkey.sequenceanalysis.run.analysis.ImmunoGenotypingAnalysis;
87-
import org.labkey.sequenceanalysis.run.analysis.LofreqAnalysis;
88-
import org.labkey.sequenceanalysis.run.analysis.MergeLoFreqVcfHandler;
89-
import org.labkey.sequenceanalysis.run.analysis.NextCladeHandler;
90-
import org.labkey.sequenceanalysis.run.analysis.PARalyzerAnalysis;
91-
import org.labkey.sequenceanalysis.run.analysis.PangolinHandler;
92-
import org.labkey.sequenceanalysis.run.analysis.PbsvAnalysis;
93-
import org.labkey.sequenceanalysis.run.analysis.PbsvJointCallingHandler;
94-
import org.labkey.sequenceanalysis.run.analysis.PindelAnalysis;
95-
import org.labkey.sequenceanalysis.run.analysis.SequenceBasedTypingAnalysis;
96-
import org.labkey.sequenceanalysis.run.analysis.SnpCountAnalysis;
97-
import org.labkey.sequenceanalysis.run.analysis.SubreadAnalysis;
98-
import org.labkey.sequenceanalysis.run.analysis.UnmappedReadExportHandler;
99-
import org.labkey.sequenceanalysis.run.analysis.ViralAnalysis;
81+
import org.labkey.sequenceanalysis.run.analysis.*;
10082
import org.labkey.sequenceanalysis.run.assembly.TrinityRunner;
10183
import org.labkey.sequenceanalysis.run.bampostprocessing.AddOrReplaceReadGroupsStep;
10284
import org.labkey.sequenceanalysis.run.bampostprocessing.BaseQualityScoreRecalibrator;
@@ -281,6 +263,7 @@ public static void registerPipelineSteps()
281263
SequencePipelineService.get().registerPipelineStep(new ImmunoGenotypingAnalysis.Provider());
282264
SequencePipelineService.get().registerPipelineStep(new ViralAnalysis.Provider());
283265
SequencePipelineService.get().registerPipelineStep(new HaplotypeCallerAnalysis.Provider());
266+
SequencePipelineService.get().registerPipelineStep(new DeepVariantAnalysis.Provider());
284267
SequencePipelineService.get().registerPipelineStep(new SnpCountAnalysis.Provider());
285268
SequencePipelineService.get().registerPipelineStep(new ExportOverlappingReadsAnalysis.Provider());
286269
SequencePipelineService.get().registerPipelineStep(new SubreadAnalysis.Provider());
Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
package org.labkey.sequenceanalysis.run.analysis;
2+
3+
import org.apache.commons.io.FileUtils;
4+
import org.apache.commons.lang3.StringUtils;
5+
import org.apache.logging.log4j.Logger;
6+
import org.json.JSONObject;
7+
import org.labkey.api.pipeline.PipelineJobException;
8+
import org.labkey.api.sequenceanalysis.model.AnalysisModel;
9+
import org.labkey.api.sequenceanalysis.model.Readset;
10+
import org.labkey.api.sequenceanalysis.pipeline.AbstractAnalysisStepProvider;
11+
import org.labkey.api.sequenceanalysis.pipeline.AnalysisOutputImpl;
12+
import org.labkey.api.sequenceanalysis.pipeline.AnalysisStep;
13+
import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam;
14+
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
15+
import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker;
16+
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
17+
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
18+
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
19+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
20+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
21+
import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
22+
import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
23+
import org.labkey.api.util.FileUtil;
24+
import org.labkey.api.writer.PrintWriters;
25+
26+
import java.io.File;
27+
import java.io.IOException;
28+
import java.io.PrintWriter;
29+
import java.util.ArrayList;
30+
import java.util.Arrays;
31+
import java.util.List;
32+
33+
/**
34+
* User: bimber
35+
* Date: 7/3/2014
36+
* Time: 11:29 AM
37+
*/
38+
public class DeepVariantAnalysis extends AbstractCommandPipelineStep<DeepVariantAnalysis.DeepVariantWrapper> implements AnalysisStep
39+
{
40+
public DeepVariantAnalysis(PipelineStepProvider<?> provider, PipelineContext ctx)
41+
{
42+
super(provider, ctx, new DeepVariantAnalysis.DeepVariantWrapper(ctx.getLogger()));
43+
}
44+
45+
public static class Provider extends AbstractAnalysisStepProvider<DeepVariantAnalysis>
46+
{
47+
public Provider()
48+
{
49+
super("DeepVariantAnalysis", "DeepVariant", "DeepVariant", "This will run DeepVariant on the selected data to generate a gVCF.", getToolDescriptors(), null, null);
50+
}
51+
52+
@Override
53+
public DeepVariantAnalysis create(PipelineContext ctx)
54+
{
55+
return new DeepVariantAnalysis(this, ctx);
56+
}
57+
}
58+
59+
public static List<ToolParameterDescriptor> getToolDescriptors()
60+
{
61+
return Arrays.asList(
62+
ToolParameterDescriptor.create("modelType", "Model Type", "", "ldk-simplecombo", new JSONObject(){{
63+
put("storeValues", "AUTO;WGS;WES;PACBIO;ONT_R104;HYBRID_PACBIO_ILLUMINA");
64+
put("multiSelect", false);
65+
put("allowBlank", false);
66+
}}, "AUTO"),
67+
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("--haploid_contigs"), "haploidContigs", "Haploid Contigs", "", "textfield", new JSONObject(){{
68+
69+
}}, "X,Y")
70+
);
71+
}
72+
73+
@Override
74+
public void init(SequenceAnalysisJobSupport support) throws PipelineJobException
75+
{
76+
// TODO: handle auto-detection
77+
String modelType = getProvider().getParameterByName("modelType").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class);
78+
if (modelType == null)
79+
{
80+
throw new PipelineJobException("Missing model type");
81+
}
82+
83+
if ("AUTO".equals(modelType))
84+
{
85+
getPipelineCtx().getLogger().info("Inferring model type by readset type:");
86+
if (support.getCachedReadsets().size() != 1)
87+
{
88+
throw new PipelineJobException("Expected a single cached readset, found: " + support.getCachedReadsets().size());
89+
}
90+
91+
Readset rs = support.getCachedReadsets().get(0);
92+
if ("ILLUMINA".equals(rs.getPlatform()))
93+
{
94+
switch (rs.getApplication())
95+
{
96+
case "Whole Genome: Deep Coverage":
97+
modelType = "WGS";
98+
break;
99+
case "Whole Genome: Light Coverage":
100+
modelType = "WGS";
101+
break;
102+
case "Whole Exome":
103+
modelType = "WXS";
104+
break;
105+
default:
106+
throw new IllegalArgumentException("Unknown application: " + rs.getApplication());
107+
}
108+
}
109+
else if ("PACBIO".equals(rs.getPlatform()))
110+
{
111+
modelType = "PACBIO";
112+
}
113+
114+
if ("AUTO".equals(modelType))
115+
{
116+
throw new PipelineJobException("Unable to infer modelType for: " + rs.getName());
117+
}
118+
119+
support.cacheObject("modelType", modelType);
120+
}
121+
}
122+
123+
@Override
124+
public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, ReferenceGenome referenceGenome, File outputDir) throws PipelineJobException
125+
{
126+
AnalysisOutputImpl output = new AnalysisOutputImpl();
127+
output.addInput(inputBam, "Input BAM File");
128+
129+
File outputFile = new File(outputDir, FileUtil.getBaseName(inputBam) + ".g.vcf.gz");
130+
File idxFile = new File(outputDir, FileUtil.getBaseName(inputBam) + ".g.vcf.gz.idx");
131+
132+
String inferredModelType = getPipelineCtx().getSequenceSupport().getCachedObject("modelType", String.class);
133+
String modelType = inferredModelType == null ? getProvider().getParameterByName("modelType").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class) : inferredModelType;
134+
if (modelType == null)
135+
{
136+
throw new PipelineJobException("Missing model type");
137+
}
138+
139+
getWrapper().setOutputDir(outputDir);
140+
getWrapper().setWorkingDir(outputDir);
141+
getWrapper().execute(inputBam, referenceGenome.getWorkingFastaFile(), outputFile, output, modelType, getClientCommandArgs());
142+
143+
output.addOutput(outputFile, "gVCF File");
144+
output.addSequenceOutput(outputFile, outputFile.getName(), "DeepVariant gVCF File", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null);
145+
if (idxFile.exists())
146+
{
147+
output.addOutput(idxFile, "VCF Index");
148+
}
149+
150+
return output;
151+
}
152+
153+
@Override
154+
public Output performAnalysisPerSampleLocal(AnalysisModel model, File inputBam, File referenceFasta, File outDir) throws PipelineJobException
155+
{
156+
return null;
157+
}
158+
159+
public static class DeepVariantWrapper extends AbstractCommandWrapper
160+
{
161+
public DeepVariantWrapper(Logger logger)
162+
{
163+
super(logger);
164+
}
165+
166+
private File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTracker output) throws PipelineJobException
167+
{
168+
try
169+
{
170+
if (workingDirectory.equals(input.getParentFile()))
171+
{
172+
return input;
173+
}
174+
175+
File local = new File(workingDirectory, input.getName());
176+
if (!local.exists())
177+
{
178+
getLogger().debug("Copying file locally: " + input.getPath());
179+
FileUtils.copyFile(input, local);
180+
}
181+
182+
output.addIntermediateFile(local);
183+
184+
return local;
185+
}
186+
catch (IOException e)
187+
{
188+
throw new PipelineJobException(e);
189+
}
190+
}
191+
192+
public void execute(File inputBam, File refFasta, File outputGvcf, PipelineOutputTracker tracker, String modelType, List<String> extraArgs) throws PipelineJobException
193+
{
194+
File workDir = outputGvcf.getParentFile();
195+
196+
File inputBamLocal = ensureLocalCopy(inputBam, workDir, tracker);
197+
ensureLocalCopy(new File(inputBam.getPath() + ".bai"), workDir, tracker);
198+
199+
File refFastaLocal = ensureLocalCopy(refFasta, workDir, tracker);
200+
ensureLocalCopy(new File(refFastaLocal.getPath() + ".fai"), workDir, tracker);
201+
ensureLocalCopy(new File(FileUtil.getBaseName(refFasta.getPath()) + ".dict"), workDir, tracker);
202+
203+
File localBashScript = new File(workDir, "docker.sh");
204+
File dockerBashScript = new File(workDir, "dockerRun.sh");
205+
tracker.addIntermediateFile(localBashScript);
206+
tracker.addIntermediateFile(dockerBashScript);
207+
208+
String binVersion = "";
209+
List<String> bashArgs = new ArrayList<>(Arrays.asList("/opt/deepvariant/bin/run_deepvariant"));
210+
bashArgs.add("--ref=/work/" + refFastaLocal.getName());
211+
bashArgs.add("--reads=/work/" + inputBamLocal.getName());
212+
bashArgs.add("--output_gvcf=/work/" + outputGvcf.getName());
213+
Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger());
214+
if (maxThreads != null)
215+
{
216+
bashArgs.add("--num_shards=" + maxThreads);
217+
}
218+
219+
if (extraArgs != null)
220+
{
221+
bashArgs.addAll(extraArgs);
222+
}
223+
224+
try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript); PrintWriter dockerWriter = PrintWriters.getPrintWriter(dockerBashScript))
225+
{
226+
writer.println("#!/bin/bash");
227+
writer.println("set -x");
228+
writer.println("WD=`pwd`");
229+
writer.println("HOME=`echo ~/`");
230+
writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'");
231+
writer.println("sudo $DOCKER pull google/deepvariant:" + binVersion);
232+
writer.println("sudo $DOCKER run --rm=true \\");
233+
writer.println("\t-v \"${WD}:/work\" \\");
234+
writer.println("\t-v \"${HOME}:/homeDir\" \\");
235+
writer.println("\t-u $UID \\");
236+
writer.println("\t-e TMPDIR=/work/tmpDir \\");
237+
writer.println("\t-e USERID=$UID \\");
238+
writer.println("\t-w /work \\");
239+
writer.println("\tgoogle/deepvariant:" + binVersion + " \\");
240+
writer.println("\t/work/" + dockerBashScript.getName());
241+
writer.println("EXIT_CODE=$?");
242+
writer.println("echo 'Docker run exit code: '$EXIT_CODE");
243+
writer.println("exit $EXIT_CODE");
244+
245+
dockerWriter.println("#!/bin/bash");
246+
dockerWriter.println("set -x");
247+
dockerWriter.println(StringUtils.join(bashArgs, " "));
248+
dockerWriter.println("EXIT_CODE=$?");
249+
dockerWriter.println("echo 'Exit code: '$?");
250+
dockerWriter.println("exit $EXIT_CODE");
251+
}
252+
catch (IOException e)
253+
{
254+
throw new PipelineJobException(e);
255+
}
256+
}
257+
}
258+
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/HaplotypeCallerAnalysis.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
*/
2929
public class HaplotypeCallerAnalysis extends AbstractCommandPipelineStep<HaplotypeCallerWrapper> implements AnalysisStep
3030
{
31-
public HaplotypeCallerAnalysis(PipelineStepProvider provider, PipelineContext ctx)
31+
public HaplotypeCallerAnalysis(PipelineStepProvider<?> provider, PipelineContext ctx)
3232
{
3333
super(provider, ctx, new HaplotypeCallerWrapper(ctx.getLogger()));
3434
}
@@ -51,7 +51,7 @@ public static List<ToolParameterDescriptor> getToolDescriptors()
5151
{
5252
return Arrays.asList(
5353
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("--dont-use-soft-clipped-bases"), "dontUseSoftClippedBases", "Don't Use Soft Clipped Bases", "If specified, we will not analyze soft clipped bases in the reads", "checkbox", null, false),
54-
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("max-alternate-alleles"), "maxAlternateAlleles", "Max Alternate Alleles", "Passed to --max-alternate-alleles", "ldk-integerfield", new JSONObject(){{
54+
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("--max-alternate-alleles"), "maxAlternateAlleles", "Max Alternate Alleles", "Passed to --max-alternate-alleles", "ldk-integerfield", new JSONObject(){{
5555
put("minValue", 0);
5656
}}, 6)
5757
);

0 commit comments

Comments
 (0)