Skip to content

Commit aabe6ff

Browse files
authored
Merge pull request #285 from BimberLab/24.3_fb_merge
Merge discvr-23.11 to discvr-24.3
2 parents 4cf4f57 + 5a3b5e3 commit aabe6ff

File tree

3 files changed

+139
-17
lines changed

3 files changed

+139
-17
lines changed
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
package org.labkey.api.sequenceanalysis.run;
2+
3+
import org.apache.commons.io.FileUtils;
4+
import org.apache.commons.lang3.StringUtils;
5+
import org.apache.logging.log4j.Logger;
6+
import org.labkey.api.pipeline.PipelineJobException;
7+
import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker;
8+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
9+
import org.labkey.api.writer.PrintWriters;
10+
11+
import java.io.File;
12+
import java.io.IOException;
13+
import java.io.PrintWriter;
14+
import java.util.Arrays;
15+
import java.util.List;
16+
17+
public class DockerWrapper extends AbstractCommandWrapper
18+
{
19+
private final String _containerName;
20+
private File _tmpDir = null;
21+
22+
public DockerWrapper(String containerName, Logger log)
23+
{
24+
super(log);
25+
_containerName = containerName;
26+
}
27+
28+
public void setTmpDir(File tmpDir)
29+
{
30+
_tmpDir = tmpDir;
31+
}
32+
33+
public void executeWithDocker(List<String> containerArgs, File workDir, PipelineOutputTracker tracker) throws PipelineJobException
34+
{
35+
File localBashScript = new File(workDir, "docker.sh");
36+
File dockerBashScript = new File(workDir, "dockerRun.sh");
37+
tracker.addIntermediateFile(localBashScript);
38+
tracker.addIntermediateFile(dockerBashScript);
39+
40+
setWorkingDir(workDir);
41+
try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript); PrintWriter dockerWriter = PrintWriters.getPrintWriter(dockerBashScript))
42+
{
43+
writer.println("#!/bin/bash");
44+
writer.println("set -x");
45+
writer.println("WD=`pwd`");
46+
writer.println("HOME=`echo ~/`");
47+
writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'");
48+
writer.println("sudo $DOCKER pull " + _containerName);
49+
writer.println("sudo $DOCKER run --rm=true \\");
50+
writer.println("\t-v \"${WD}:/work\" \\");
51+
writer.println("\t-v \"${HOME}:/homeDir\" \\");
52+
if (_tmpDir != null)
53+
{
54+
writer.println("\t-v \"" + _tmpDir.getPath() + ":/tmp\" \\");
55+
}
56+
writer.println("\t--entrypoint /bin/bash \\");
57+
writer.println("\t-w /work \\");
58+
Integer maxRam = SequencePipelineService.get().getMaxRam();
59+
if (maxRam != null)
60+
{
61+
writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM=" + maxRam + " \\");
62+
writer.println("\t--memory='" + maxRam + "g' \\");
63+
}
64+
writer.println("\t" + _containerName + " \\");
65+
writer.println("\t/work/" + dockerBashScript.getName());
66+
writer.println("EXIT_CODE=$?");
67+
writer.println("echo 'Docker run exit code: '$EXIT_CODE");
68+
writer.println("exit $EXIT_CODE");
69+
70+
dockerWriter.println("#!/bin/bash");
71+
dockerWriter.println("set -x");
72+
dockerWriter.println(StringUtils.join(containerArgs, " "));
73+
dockerWriter.println("EXIT_CODE=$?");
74+
dockerWriter.println("echo 'Exit code: '$?");
75+
dockerWriter.println("exit $EXIT_CODE");
76+
}
77+
catch (IOException e)
78+
{
79+
throw new PipelineJobException(e);
80+
}
81+
82+
execute(Arrays.asList("/bin/bash", localBashScript.getPath()));
83+
}
84+
85+
public File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTracker output) throws PipelineJobException
86+
{
87+
try
88+
{
89+
if (workingDirectory.equals(input.getParentFile()))
90+
{
91+
return input;
92+
}
93+
94+
File local = new File(workingDirectory, input.getName());
95+
if (!local.exists())
96+
{
97+
getLogger().debug("Copying file locally: " + input.getPath());
98+
FileUtils.copyFile(input, local);
99+
}
100+
101+
output.addIntermediateFile(local);
102+
103+
return local;
104+
}
105+
catch (IOException e)
106+
{
107+
throw new PipelineJobException(e);
108+
}
109+
}
110+
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
1717
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
1818
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
19-
import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
19+
import org.labkey.api.sequenceanalysis.run.DockerWrapper;
2020
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
2121
import org.labkey.api.util.FileUtil;
2222
import org.labkey.api.writer.PrintWriters;
@@ -127,6 +127,7 @@ else if (!svVcf.exists())
127127
// id path depth read length
128128
// TNPRC-IB18 ../IB18.cram 29.77 150
129129
File coverageFile = new File(ctx.getWorkingDirectory(), "coverage.txt");
130+
String rgId = null;
130131
try (PrintWriter writer = PrintWriters.getPrintWriter(coverageFile); SamReader reader = SamReaderFactory.makeDefault().open(so.getFile()))
131132
{
132133
SAMFileHeader header = reader.getFileHeader();
@@ -139,52 +140,58 @@ else if (header.getReadGroups().size() > 1)
139140
throw new PipelineJobException("More than one read group found in BAM");
140141
}
141142

142-
String rgId = header.getReadGroups().get(0).getSample();
143+
rgId = header.getReadGroups().get(0).getSample();
143144

144145
JSONObject json = new JSONObject(FileUtils.readFileToString(coverageJson, Charset.defaultCharset()));
145146
writer.println("id\tpath\tdepth\tread length");
146147
double depth = json.getJSONObject("autosome").getDouble("depth");
147148
double readLength = json.getInt("read_length");
148-
writer.println(rgId + "\t" + so.getFile().getPath() + "\t" + depth + "\t" + readLength);
149+
writer.println(rgId + "\t" + "/work/" + so.getFile().getName() + "\t" + depth + "\t" + readLength);
149150
}
150151
catch (IOException e)
151152
{
152153
throw new PipelineJobException(e);
153154
}
154155
ctx.getFileManager().addIntermediateFile(coverageFile);
155156

157+
DockerWrapper dockerWrapper = new DockerWrapper("ghcr.io/bimberlabinternal/paragraph:latest", ctx.getLogger());
156158
List<String> paragraphArgs = new ArrayList<>();
157-
paragraphArgs.add(AbstractCommandWrapper.resolveFileInPath("multigrmpy.py", null, true).getPath());
158-
paragraphArgs.add("--verbose");
159+
paragraphArgs.add("/opt/paragraph/bin/multigrmpy.py");
159160

160-
File paragraphOut = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile()) + ".paragraph.txt");
161+
dockerWrapper.ensureLocalCopy(so.getFile(), ctx.getWorkingDirectory(), ctx.getFileManager());
162+
dockerWrapper.ensureLocalCopy(SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()), ctx.getWorkingDirectory(), ctx.getFileManager());
163+
164+
File paragraphOutDir = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile()));
161165
paragraphArgs.add("-o");
162-
paragraphArgs.add(paragraphOut.getPath());
166+
paragraphArgs.add("/work/" + paragraphOutDir.getName());
163167

164168
paragraphArgs.add("-i");
165-
paragraphArgs.add(svVcf.getPath());
169+
dockerWrapper.ensureLocalCopy(svVcf, ctx.getWorkingDirectory(), ctx.getFileManager());
170+
dockerWrapper.ensureLocalCopy(new File(svVcf.getPath() + ".tbi"), ctx.getWorkingDirectory(), ctx.getFileManager());
171+
paragraphArgs.add("/work/" + svVcf.getName());
166172

167173
paragraphArgs.add("-m");
168-
paragraphArgs.add(coverageFile.getPath());
174+
paragraphArgs.add("/work/" + coverageFile.getName());
169175

170176
paragraphArgs.add("-r");
171-
paragraphArgs.add(ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile().getPath());
177+
File genomeFasta = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile();
178+
dockerWrapper.ensureLocalCopy(genomeFasta, ctx.getWorkingDirectory(), ctx.getFileManager());
179+
dockerWrapper.ensureLocalCopy(new File(genomeFasta.getPath() + ".fai"), ctx.getWorkingDirectory(), ctx.getFileManager());
180+
paragraphArgs.add("/work/" + genomeFasta.getName());
172181

173182
paragraphArgs.add("--scratch-dir");
174-
paragraphArgs.add(SequencePipelineService.get().getJavaTempDir());
183+
paragraphArgs.add("/tmp");
184+
dockerWrapper.setTmpDir(new File(SequencePipelineService.get().getJavaTempDir()));
175185

176186
if (threads != null)
177187
{
178188
paragraphArgs.add("--threads");
179189
paragraphArgs.add(threads.toString());
180190
}
181191

182-
paragraphArgs.add("--logfile");
183-
paragraphArgs.add(new File(ctx.getWorkingDirectory(), "paragraph.log").getPath());
184-
185-
new SimpleScriptWrapper(ctx.getLogger()).execute(paragraphArgs);
192+
dockerWrapper.executeWithDocker(paragraphArgs, ctx.getWorkingDirectory(), ctx.getFileManager());
186193

187-
File genotypes = new File(ctx.getWorkingDirectory(), "genotypes.vcf.gz");
194+
File genotypes = new File(paragraphOutDir, "genotypes.vcf.gz");
188195
if (!genotypes.exists())
189196
{
190197
throw new PipelineJobException("Missing file: " + genotypes.getPath());
@@ -200,6 +207,11 @@ else if (header.getReadGroups().size() > 1)
200207
}
201208

202209
ctx.getFileManager().addSequenceOutput(genotypes, "paraGRAPH Genotypes: " + so.getName(), "paraGRAPH Genoypes", so.getReadset(), null, so.getLibrary_id(), "Input VCF: " + svVcf.getName() + " (" + svVcfId + ")");
210+
211+
ctx.getFileManager().addIntermediateFile(new File(paragraphOutDir, "variants.json.gz"));
212+
ctx.getFileManager().addIntermediateFile(new File(paragraphOutDir, "variants.vcf.gz"));
213+
ctx.getFileManager().addIntermediateFile(new File(paragraphOutDir, "genotypes.json.gz"));
214+
ctx.getFileManager().addIntermediateFile(new File(paragraphOutDir, "grmpy.log"));
203215
}
204216
}
205217
}

jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ public JSONObject doSearch(User u, String searchString, final int pageSize, fina
194194
pointsConfigMap.put(field, doublePointsConfig);
195195
}
196196
case Integer -> {
197-
numericQueryParserFields.put(field, SortField.Type.INT);
197+
numericQueryParserFields.put(field, SortField.Type.LONG);
198198
pointsConfigMap.put(field, intPointsConfig);
199199
}
200200
}

0 commit comments

Comments
 (0)