Skip to content

Commit 7d11ce9

Browse files
committed
Support sawfish
1 parent 7382511 commit 7d11ce9

File tree

4 files changed

+291
-0
lines changed

4 files changed

+291
-0
lines changed

SequenceAnalysis/pipeline_code/extra_tools_install.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,3 +319,17 @@ then
319319
else
320320
echo "Already installed"
321321
fi
322+
323+
if [[ ! -e ${LKTOOLS_DIR}/sawfish || ! -z $FORCE_REINSTALL ]];
324+
then
325+
echo "Cleaning up previous installs"
326+
rm -Rf $LKTOOLS_DIR/sawfish*
327+
328+
wget https://github.com/PacificBiosciences/sawfish/releases/download/v2.0.0/sawfish-v2.0.0-x86_64-unknown-linux-gnu.tar.gz
329+
tar -xzf sawfish-v2.0.0-x86_64-unknown-linux-gnu.tar.gz
330+
331+
mv sawfish-v2.0.0-x86_64-unknown-linux-gnu $LKTOOLS_DIR/
332+
ln -s $LKTOOLS_DIR/sawfish-v2.0.0/bin/sawfish $LKTOOLS_DIR/
333+
else
334+
echo "Already installed"
335+
fi

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@
123123
import org.labkey.sequenceanalysis.run.analysis.PbsvAnalysis;
124124
import org.labkey.sequenceanalysis.run.analysis.PbsvJointCallingHandler;
125125
import org.labkey.sequenceanalysis.run.analysis.PindelAnalysis;
126+
import org.labkey.sequenceanalysis.run.analysis.SawfishAnalysis;
127+
import org.labkey.sequenceanalysis.run.analysis.SawfishJointCallingHandler;
126128
import org.labkey.sequenceanalysis.run.analysis.SequenceBasedTypingAnalysis;
127129
import org.labkey.sequenceanalysis.run.analysis.SnpCountAnalysis;
128130
import org.labkey.sequenceanalysis.run.analysis.SubreadAnalysis;
@@ -342,6 +344,7 @@ public static void registerPipelineSteps()
342344
SequencePipelineService.get().registerPipelineStep(new PindelAnalysis.Provider());
343345
SequencePipelineService.get().registerPipelineStep(new PbsvAnalysis.Provider());
344346
SequencePipelineService.get().registerPipelineStep(new GenrichStep.Provider());
347+
SequencePipelineService.get().registerPipelineStep(new SawfishAnalysis.Provider());
345348

346349
SequencePipelineService.get().registerPipelineStep(new PARalyzerAnalysis.Provider());
347350
SequencePipelineService.get().registerPipelineStep(new RnaSeQCStep.Provider());
@@ -400,6 +403,7 @@ public static void registerPipelineSteps()
400403
SequenceAnalysisService.get().registerFileHandler(new NextCladeHandler());
401404
SequenceAnalysisService.get().registerFileHandler(new ConvertToCramHandler());
402405
SequenceAnalysisService.get().registerFileHandler(new PbsvJointCallingHandler());
406+
SequenceAnalysisService.get().registerFileHandler(new SawfishJointCallingHandler());
403407
SequenceAnalysisService.get().registerFileHandler(new DeepVariantHandler());
404408
SequenceAnalysisService.get().registerFileHandler(new GLNexusHandler());
405409
SequenceAnalysisService.get().registerFileHandler(new ParagraphStep());
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
package org.labkey.sequenceanalysis.run.analysis;
2+
3+
import org.labkey.api.pipeline.PipelineJobException;
4+
import org.labkey.api.sequenceanalysis.model.AnalysisModel;
5+
import org.labkey.api.sequenceanalysis.model.Readset;
6+
import org.labkey.api.sequenceanalysis.pipeline.AbstractAnalysisStepProvider;
7+
import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStep;
8+
import org.labkey.api.sequenceanalysis.pipeline.AnalysisOutputImpl;
9+
import org.labkey.api.sequenceanalysis.pipeline.AnalysisStep;
10+
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
11+
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
12+
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
13+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
14+
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
15+
16+
import java.io.File;
17+
import java.util.ArrayList;
18+
import java.util.List;
19+
20+
public class SawfishAnalysis extends AbstractPipelineStep implements AnalysisStep
21+
{
22+
public SawfishAnalysis(PipelineStepProvider<?> provider, PipelineContext ctx)
23+
{
24+
super(provider, ctx);
25+
}
26+
27+
public static class Provider extends AbstractAnalysisStepProvider<SawfishAnalysis>
28+
{
29+
public Provider()
30+
{
31+
super("sawfish", "Sawfish Analysis", null, "This will run sawfish SV dicvoery and calling on the selected BAMs", List.of(), null, null);
32+
}
33+
34+
35+
@Override
36+
public SawfishAnalysis create(PipelineContext ctx)
37+
{
38+
return new SawfishAnalysis(this, ctx);
39+
}
40+
}
41+
42+
@Override
43+
public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, ReferenceGenome referenceGenome, File outputDir) throws PipelineJobException
44+
{
45+
AnalysisOutputImpl output = new AnalysisOutputImpl();
46+
47+
List<String> args = new ArrayList<>();
48+
args.add(getExe().getPath());
49+
args.add("discover");
50+
51+
args.add("--bam");
52+
args.add(inputBam.getPath());
53+
54+
args.add("--ref");
55+
args.add(referenceGenome.getWorkingFastaFile().getPath());
56+
57+
File svOutDir = new File(outputDir, "sawfish");
58+
args.add("--output-dir");
59+
args.add(svOutDir.getPath());
60+
61+
Integer maxThreads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger());
62+
if (maxThreads != null)
63+
{
64+
args.add("-threads");
65+
args.add(String.valueOf(maxThreads));
66+
}
67+
68+
new SimpleScriptWrapper(getPipelineCtx().getLogger()).execute(args);
69+
70+
File vcf = new File(svOutDir, "genotyped.sv.vcf.gz");
71+
if (!vcf.exists())
72+
{
73+
throw new PipelineJobException("Unable to find file: " + vcf.getPath());
74+
}
75+
76+
output.addSequenceOutput(vcf, rs.getName() + ": sawfish", "Sawfish SV Discovery", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null);
77+
return output;
78+
}
79+
80+
@Override
81+
public Output performAnalysisPerSampleLocal(AnalysisModel model, File inputBam, File referenceFasta, File outDir) throws PipelineJobException
82+
{
83+
return null;
84+
}
85+
86+
private File getExe()
87+
{
88+
return SequencePipelineService.get().getExeForPackage("SAWFISHPATH", "sawfish");
89+
}
90+
}
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
package org.labkey.sequenceanalysis.run.analysis;
2+
3+
import org.apache.commons.io.FileUtils;
4+
import org.json.JSONObject;
5+
import org.labkey.api.module.ModuleLoader;
6+
import org.labkey.api.pipeline.PipelineJob;
7+
import org.labkey.api.pipeline.PipelineJobException;
8+
import org.labkey.api.pipeline.RecordedAction;
9+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
10+
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
11+
import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
12+
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
13+
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
14+
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
15+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
16+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
17+
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
18+
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
19+
import org.labkey.sequenceanalysis.util.SequenceUtil;
20+
21+
import java.io.File;
22+
import java.io.IOException;
23+
import java.util.ArrayList;
24+
import java.util.Arrays;
25+
import java.util.LinkedHashSet;
26+
import java.util.List;
27+
import java.util.stream.Collectors;
28+
29+
public class SawfishJointCallingHandler extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
30+
{
31+
private static final String OUTPUT_CATEGORY = "Sawfish VCF";
32+
33+
public SawfishJointCallingHandler()
34+
{
35+
super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.NAME), "Sawfish Joint-Call", "Runs sawfish joint-call, which jointly calls SVs from PacBio CCS data", new LinkedHashSet<>(List.of("sequenceanalysis/panel/VariantScatterGatherPanel.js")), Arrays.asList(
36+
ToolParameterDescriptor.create("fileName", "VCF Filename", "The name of the resulting file.", "textfield", new JSONObject(){{
37+
put("allowBlank", false);
38+
put("doNotIncludeInTemplates", true);
39+
}}, null)
40+
));
41+
}
42+
43+
@Override
44+
public boolean canProcess(SequenceOutputFile o)
45+
{
46+
return o.getFile() != null && SequenceUtil.FILETYPE.vcf.getFileType().isType(o.getFile());
47+
}
48+
49+
@Override
50+
public boolean doRunRemote()
51+
{
52+
return true;
53+
}
54+
55+
@Override
56+
public boolean doRunLocal()
57+
{
58+
return false;
59+
}
60+
61+
@Override
62+
public SequenceOutputProcessor getProcessor()
63+
{
64+
return new Processor();
65+
}
66+
67+
public static class Processor implements SequenceOutputProcessor
68+
{
69+
@Override
70+
public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles, JSONObject params, File outputDir, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
71+
{
72+
73+
}
74+
75+
@Override
76+
public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException
77+
{
78+
List<File> filesToProcess = inputFiles.stream().map(SequenceOutputFile::getFile).collect(Collectors.toList());
79+
80+
ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenomes().iterator().next();
81+
String outputBaseName = ctx.getParams().getString("fileName");
82+
if (!outputBaseName.toLowerCase().endsWith(".gz"))
83+
{
84+
outputBaseName = outputBaseName.replaceAll(".gz$", "");
85+
}
86+
87+
if (!outputBaseName.toLowerCase().endsWith(".vcf"))
88+
{
89+
outputBaseName = outputBaseName.replaceAll(".vcf$", "");
90+
}
91+
92+
File expectedFinalOutput = new File(ctx.getOutputDir(), outputBaseName + ".vcf.gz");
93+
File expectedFinalOutputIdx = new File(expectedFinalOutput.getPath() + ".tbi");
94+
boolean jobCompleted = expectedFinalOutputIdx.exists(); // this would occur if the job died during the cleanup phase
95+
96+
File ouputVcf = runSawfishCall(ctx, filesToProcess, genome, outputBaseName);
97+
98+
SequenceOutputFile so = new SequenceOutputFile();
99+
so.setName("Sawfish call: " + outputBaseName);
100+
so.setFile(ouputVcf);
101+
so.setCategory(OUTPUT_CATEGORY);
102+
so.setLibrary_id(genome.getGenomeId());
103+
104+
ctx.addSequenceOutput(so);
105+
}
106+
107+
private File runSawfishCall(JobContext ctx, List<File> inputs, ReferenceGenome genome, String outputBaseName) throws PipelineJobException
108+
{
109+
if (inputs.isEmpty())
110+
{
111+
throw new PipelineJobException("No inputs provided");
112+
}
113+
114+
List<String> args = new ArrayList<>();
115+
args.add(getExe().getPath());
116+
args.add("joint-call");
117+
118+
Integer maxThreads = SequencePipelineService.get().getMaxThreads(ctx.getLogger());
119+
if (maxThreads != null)
120+
{
121+
args.add("--threads");
122+
args.add(String.valueOf(maxThreads));
123+
}
124+
125+
args.add("--ref");
126+
args.add(genome.getWorkingFastaFile().getPath());
127+
128+
for (File sample : inputs)
129+
{
130+
args.add("--sample");
131+
args.add(sample.getParentFile().getPath());
132+
}
133+
134+
File outDir = new File(ctx.getOutputDir(), "sawfish");
135+
args.add("--output-dir");
136+
args.add(outDir.getPath());
137+
138+
new SimpleScriptWrapper(ctx.getLogger()).execute(args);
139+
140+
File vcfOut = new File(outDir, "genotyped.sv.vcf.gz");
141+
if (!vcfOut.exists())
142+
{
143+
throw new PipelineJobException("Unable to find file: " + vcfOut.getPath());
144+
}
145+
146+
File vcfOutFinal = new File(ctx.getOutputDir(), outputBaseName + ".vcf.gz");
147+
148+
try
149+
{
150+
if (vcfOutFinal.exists())
151+
{
152+
vcfOutFinal.delete();
153+
FileUtils.moveFile(vcfOut, vcfOutFinal);
154+
155+
File targetIndex = new File(vcfOutFinal.getPath() + ".tbi");
156+
if (targetIndex.exists())
157+
{
158+
targetIndex.delete();
159+
}
160+
161+
File origIndex = new File(vcfOut.getPath() + ".tbi");
162+
if (origIndex.exists())
163+
{
164+
FileUtils.moveFile(origIndex, targetIndex);
165+
}
166+
167+
SequenceAnalysisService.get().ensureVcfIndex(vcfOutFinal, ctx.getLogger(), true);
168+
}
169+
}
170+
catch (IOException e)
171+
{
172+
throw new PipelineJobException(e);
173+
}
174+
175+
return vcfOutFinal;
176+
}
177+
178+
private File getExe()
179+
{
180+
return SequencePipelineService.get().getExeForPackage("PBSVPATH", "pbsv");
181+
}
182+
}
183+
}

0 commit comments

Comments
 (0)