Skip to content

Commit 881d11d

Browse files
committed
Switch ParagraphStep to use docker
1 parent 82ecaa8 commit 881d11d

File tree

2 files changed

+123
-9
lines changed

2 files changed

+123
-9
lines changed
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
package org.labkey.api.sequenceanalysis.run;
2+
3+
import org.apache.commons.io.FileUtils;
4+
import org.apache.commons.lang3.StringUtils;
5+
import org.apache.logging.log4j.Logger;
6+
import org.labkey.api.pipeline.PipelineJobException;
7+
import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker;
8+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
9+
import org.labkey.api.writer.PrintWriters;
10+
11+
import java.io.File;
12+
import java.io.IOException;
13+
import java.io.PrintWriter;
14+
import java.util.List;
15+
16+
public class DockerWrapper extends AbstractCommandWrapper
17+
{
18+
private final String _containerName;
19+
private File _tmpDir = null;
20+
21+
public DockerWrapper(String containerName, Logger log)
22+
{
23+
super(log);
24+
_containerName = containerName;
25+
}
26+
27+
public void setTmpDir(File tmpDir)
28+
{
29+
_tmpDir = tmpDir;
30+
}
31+
32+
public void execute(List<String> containerArgs, File workDir, PipelineOutputTracker tracker) throws PipelineJobException
33+
{
34+
File localBashScript = new File(workDir, "docker.sh");
35+
File dockerBashScript = new File(workDir, "dockerRun.sh");
36+
tracker.addIntermediateFile(localBashScript);
37+
tracker.addIntermediateFile(dockerBashScript);
38+
39+
setWorkingDir(workDir);
40+
try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript); PrintWriter dockerWriter = PrintWriters.getPrintWriter(dockerBashScript))
41+
{
42+
writer.println("#!/bin/bash");
43+
writer.println("set -x");
44+
writer.println("WD=`pwd`");
45+
writer.println("HOME=`echo ~/`");
46+
writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'");
47+
writer.println("sudo $DOCKER pull " + _containerName);
48+
writer.println("sudo $DOCKER run --rm=true \\");
49+
writer.println("\t-v \"${WD}:/work\" \\");
50+
writer.println("\t-v \"${HOME}:/homeDir\" \\");
51+
if (_tmpDir != null)
52+
{
53+
writer.println("\t-v \"" + _tmpDir.getPath() + ":/tmp\" \\");
54+
}
55+
writer.println("\t--entrypoint /bin/bash \\");
56+
writer.println("\t-w /work \\");
57+
Integer maxRam = SequencePipelineService.get().getMaxRam();
58+
if (maxRam != null)
59+
{
60+
writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM=" + maxRam + " \\");
61+
writer.println("\t--memory='" + maxRam + "g' \\");
62+
}
63+
writer.println("\t" + _containerName + " \\");
64+
writer.println("\t/work/" + dockerBashScript.getName());
65+
writer.println("EXIT_CODE=$?");
66+
writer.println("echo 'Docker run exit code: '$EXIT_CODE");
67+
writer.println("exit $EXIT_CODE");
68+
69+
dockerWriter.println("#!/bin/bash");
70+
dockerWriter.println("set -x");
71+
dockerWriter.println(StringUtils.join(containerArgs, " "));
72+
dockerWriter.println("EXIT_CODE=$?");
73+
dockerWriter.println("echo 'Exit code: '$?");
74+
dockerWriter.println("exit $EXIT_CODE");
75+
}
76+
catch (IOException e)
77+
{
78+
throw new PipelineJobException(e);
79+
}
80+
}
81+
82+
public File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTracker output) throws PipelineJobException
83+
{
84+
try
85+
{
86+
if (workingDirectory.equals(input.getParentFile()))
87+
{
88+
return input;
89+
}
90+
91+
File local = new File(workingDirectory, input.getName());
92+
if (!local.exists())
93+
{
94+
getLogger().debug("Copying file locally: " + input.getPath());
95+
FileUtils.copyFile(input, local);
96+
}
97+
98+
output.addIntermediateFile(local);
99+
100+
return local;
101+
}
102+
catch (IOException e)
103+
{
104+
throw new PipelineJobException(e);
105+
}
106+
}
107+
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
1717
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
1818
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
19-
import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
19+
import org.labkey.api.sequenceanalysis.run.DockerWrapper;
2020
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
2121
import org.labkey.api.util.FileUtil;
2222
import org.labkey.api.writer.PrintWriters;
@@ -153,25 +153,32 @@ else if (header.getReadGroups().size() > 1)
153153
}
154154
ctx.getFileManager().addIntermediateFile(coverageFile);
155155

156+
DockerWrapper dockerWrapper = new DockerWrapper("ghcr.io/bimberlabinternal/paragraph:latest", ctx.getLogger());
156157
List<String> paragraphArgs = new ArrayList<>();
157-
paragraphArgs.add(AbstractCommandWrapper.resolveFileInPath("multigrmpy.py", null, true).getPath());
158+
paragraphArgs.add("/opt/paragraph/bin/multigrmpy.py");
159+
158160
paragraphArgs.add("--verbose");
159161

160162
File paragraphOut = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile()) + ".paragraph.txt");
161163
paragraphArgs.add("-o");
162-
paragraphArgs.add(paragraphOut.getPath());
164+
paragraphArgs.add("/work/" + paragraphOut.getName());
163165

164166
paragraphArgs.add("-i");
165-
paragraphArgs.add(svVcf.getPath());
167+
dockerWrapper.ensureLocalCopy(svVcf, ctx.getWorkingDirectory(), ctx.getFileManager());
168+
paragraphArgs.add("/work/" + svVcf.getName());
166169

167170
paragraphArgs.add("-m");
168-
paragraphArgs.add(coverageFile.getPath());
171+
paragraphArgs.add("/work/" + coverageFile.getName());
169172

170173
paragraphArgs.add("-r");
171-
paragraphArgs.add(ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile().getPath());
174+
File genomeFasta = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile();
175+
dockerWrapper.ensureLocalCopy(genomeFasta, ctx.getWorkingDirectory(), ctx.getFileManager());
176+
dockerWrapper.ensureLocalCopy(new File(genomeFasta.getPath() + ".fai"), ctx.getWorkingDirectory(), ctx.getFileManager());
177+
paragraphArgs.add("/work/" + genomeFasta.getName());
172178

173179
paragraphArgs.add("--scratch-dir");
174-
paragraphArgs.add(SequencePipelineService.get().getJavaTempDir());
180+
paragraphArgs.add("/tmp");
181+
dockerWrapper.setTmpDir(new File(SequencePipelineService.get().getJavaTempDir()));
175182

176183
if (threads != null)
177184
{
@@ -180,9 +187,9 @@ else if (header.getReadGroups().size() > 1)
180187
}
181188

182189
paragraphArgs.add("--logfile");
183-
paragraphArgs.add(new File(ctx.getWorkingDirectory(), "paragraph.log").getPath());
190+
paragraphArgs.add(new File("/work/paragraph.log").getPath());
184191

185-
new SimpleScriptWrapper(ctx.getLogger()).execute(paragraphArgs);
192+
dockerWrapper.execute(paragraphArgs, ctx.getWorkingDirectory());
186193

187194
File genotypes = new File(ctx.getWorkingDirectory(), "genotypes.vcf.gz");
188195
if (!genotypes.exists())

0 commit comments

Comments
 (0)