Skip to content

Commit fce1f1f

Browse files
committed
Inital support for paraGRAPH
1 parent 043aa41 commit fce1f1f

File tree

2 files changed

+174
-0
lines changed

2 files changed

+174
-0
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
import org.labkey.sequenceanalysis.run.alignment.BowtieWrapper;
7878
import org.labkey.sequenceanalysis.run.alignment.GSnapWrapper;
7979
import org.labkey.sequenceanalysis.run.alignment.MosaikWrapper;
80+
import org.labkey.sequenceanalysis.run.alignment.ParagraphStep;
8081
import org.labkey.sequenceanalysis.run.alignment.Pbmm2Wrapper;
8182
import org.labkey.sequenceanalysis.run.alignment.StarWrapper;
8283
import org.labkey.sequenceanalysis.run.alignment.VulcanWrapper;
@@ -336,6 +337,7 @@ public static void registerPipelineSteps()
336337
SequenceAnalysisService.get().registerFileHandler(new PbsvJointCallingHandler());
337338
SequenceAnalysisService.get().registerFileHandler(new DeepVariantHandler());
338339
SequenceAnalysisService.get().registerFileHandler(new GLNexusHandler());
340+
SequenceAnalysisService.get().registerFileHandler(new ParagraphStep());
339341

340342
SequenceAnalysisService.get().registerReadsetHandler(new MultiQCHandler());
341343
SequenceAnalysisService.get().registerReadsetHandler(new RestoreSraDataHandler());
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
package org.labkey.sequenceanalysis.run.alignment;
2+
3+
import org.json.JSONObject;
4+
import org.labkey.api.module.ModuleLoader;
5+
import org.labkey.api.pipeline.PipelineJob;
6+
import org.labkey.api.pipeline.PipelineJobException;
7+
import org.labkey.api.pipeline.RecordedAction;
8+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
9+
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
10+
import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
11+
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
12+
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
13+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
14+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
15+
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
16+
import org.labkey.api.util.FileUtil;
17+
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
18+
import org.labkey.sequenceanalysis.run.variant.DepthOfCoverageHandler;
19+
import org.labkey.sequenceanalysis.util.SequenceUtil;
20+
21+
import java.io.File;
22+
import java.io.IOException;
23+
import java.util.ArrayList;
24+
import java.util.Arrays;
25+
import java.util.List;
26+
27+
public class ParagraphStep extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
28+
{
29+
public ParagraphStep()
30+
{
31+
super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Paragraph SV Genotyping", "This will run paraGRAPH on one or more BAM files to genotype SVs", null, Arrays.asList(
32+
ToolParameterDescriptor.createExpDataParam("svVCF", "Input VCF", "This is the DataId of the VCF containing the SVs to genotype", "ldk-expdatafield", new JSONObject()
33+
{{
34+
put("allowBlank", false);
35+
}}, null)
36+
));
37+
}
38+
39+
@Override
40+
public boolean canProcess(SequenceOutputFile o)
41+
{
42+
return o.getFile() != null && o.getFile().exists() && SequenceUtil.FILETYPE.bamOrCram.getFileType().isType(o.getFile());
43+
}
44+
45+
@Override
46+
public boolean doRunRemote()
47+
{
48+
return true;
49+
}
50+
51+
@Override
52+
public boolean doRunLocal()
53+
{
54+
return false;
55+
}
56+
57+
@Override
58+
public SequenceOutputProcessor getProcessor()
59+
{
60+
return new DepthOfCoverageHandler.Processor();
61+
}
62+
63+
public static class Processor implements SequenceOutputProcessor
64+
{
65+
@Override
66+
public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles, JSONObject params, File outputDir, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
67+
{
68+
69+
}
70+
71+
@Override
72+
public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException
73+
{
74+
File inputVCF = ctx.getSequenceSupport().getCachedData(ctx.getParams().getInt("svVCF"));
75+
if (!inputVCF.exists())
76+
{
77+
throw new PipelineJobException("Unable to find file: " + inputVCF.getPath());
78+
}
79+
80+
for (SequenceOutputFile so : inputFiles)
81+
{
82+
List<String> depthArgs = new ArrayList<>();
83+
depthArgs.add("idxdepth");
84+
depthArgs.add("-d");
85+
depthArgs.add(so.getFile().getPath());
86+
87+
File coverageFile = new File(ctx.getWorkingDirectory(), "coverage.txt");
88+
depthArgs.add("-o");
89+
depthArgs.add(coverageFile.getPath());
90+
91+
depthArgs.add("-r");
92+
depthArgs.add(ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile().getPath());
93+
94+
new SimpleScriptWrapper(ctx.getLogger()).execute(depthArgs);
95+
96+
if (!coverageFile.exists())
97+
{
98+
throw new PipelineJobException("Missing file: " + coverageFile.getPath());
99+
}
100+
101+
// Should produce a simple text file:
102+
// id path depth read length
103+
// TNPRC-IB18 ../IB18.cram 29.77 150
104+
105+
List<String> paragraphArgs = new ArrayList<>();
106+
paragraphArgs.add("multigrmpy.py");
107+
paragraphArgs.add("--verbose");
108+
109+
File paragraphOut = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile()) + ".paragraph.txt");
110+
paragraphArgs.add("-o");
111+
paragraphArgs.add(paragraphOut.getPath());
112+
113+
int svVcfId = ctx.getParams().optInt("svVCF");
114+
if (svVcfId == 0)
115+
{
116+
throw new PipelineJobException("Missing svVCF ID");
117+
}
118+
119+
File svVcf = ctx.getSequenceSupport().getCachedData(svVcfId);
120+
if (svVcf == null)
121+
{
122+
throw new PipelineJobException("File not found for ID: " + svVcfId);
123+
}
124+
else if (!svVcf.exists())
125+
{
126+
throw new PipelineJobException("Missing file: " + svVcf.getPath());
127+
}
128+
129+
paragraphArgs.add("-i");
130+
paragraphArgs.add(svVcf.getPath());
131+
132+
paragraphArgs.add("-m");
133+
paragraphArgs.add(coverageFile.getPath());
134+
135+
paragraphArgs.add("-r");
136+
paragraphArgs.add(ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile().getPath());
137+
138+
paragraphArgs.add("--scratch-dir");
139+
paragraphArgs.add(SequencePipelineService.get().getJavaTempDir());
140+
141+
Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger());
142+
if (threads != null)
143+
{
144+
paragraphArgs.add("--threads");
145+
paragraphArgs.add(threads.toString());
146+
}
147+
148+
paragraphArgs.add("--logfile");
149+
paragraphArgs.add(new File(ctx.getWorkingDirectory(), "paragraph.log").getPath());
150+
151+
new SimpleScriptWrapper(ctx.getLogger()).execute(paragraphArgs);
152+
153+
File genotypes = new File(ctx.getWorkingDirectory(), "genotypes.vcf.gz");
154+
if (!genotypes.exists())
155+
{
156+
throw new PipelineJobException("Missing file: " + genotypes.getPath());
157+
}
158+
159+
try
160+
{
161+
SequenceAnalysisService.get().ensureVcfIndex(genotypes, ctx.getLogger());
162+
}
163+
catch (IOException e)
164+
{
165+
throw new PipelineJobException(e);
166+
}
167+
168+
ctx.getFileManager().addSequenceOutput(genotypes, "paraGRAPH Genotypes: " + so.getName(), "paraGRAPH Genoypes", so.getReadset(), null, so.getLibrary_id(), "Input VCF: " + svVcf.getName() + " (" + svVcfId + ")");
169+
}
170+
}
171+
}
172+
}

0 commit comments

Comments
 (0)