Skip to content

Commit e713541

Browse files
committed
Initial support of bbmap
1 parent b2e66bb commit e713541

File tree

3 files changed

+306
-0
lines changed

3 files changed

+306
-0
lines changed

SequenceAnalysis/pipeline_code/extra_tools_install.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,7 @@ then
315315
tar -xf BBMap_39.25.tar.gz
316316

317317
mv bbmap $LKTOOLS_DIR/
318+
ln -s $LKTOOLS_DIR/bbmap/bbmap.sh $LKTOOLS_DIR/bbmap.sh
318319
else
319320
echo "Already installed"
320321
fi

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@
9191
import org.labkey.sequenceanalysis.query.SequenceAnalysisUserSchema;
9292
import org.labkey.sequenceanalysis.query.SequenceTriggerHelper;
9393
import org.labkey.sequenceanalysis.run.RestoreSraDataHandler;
94+
import org.labkey.sequenceanalysis.run.alignment.BBMapWrapper;
9495
import org.labkey.sequenceanalysis.run.alignment.BWAMem2Wrapper;
9596
import org.labkey.sequenceanalysis.run.alignment.BWAMemWrapper;
9697
import org.labkey.sequenceanalysis.run.alignment.BWASWWrapper;
@@ -309,6 +310,7 @@ public static void registerPipelineSteps()
309310
SequencePipelineService.get().registerPipelineStep(new StarWrapper.Provider());
310311
SequencePipelineService.get().registerPipelineStep(new Pbmm2Wrapper.Provider());
311312
SequencePipelineService.get().registerPipelineStep(new VulcanWrapper.Provider());
313+
SequencePipelineService.get().registerPipelineStep(new BBMapWrapper.Provider());
312314

313315
//de novo assembly
314316
SequencePipelineService.get().registerPipelineStep(new TrinityRunner.Provider());
Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,303 @@
1+
package org.labkey.sequenceanalysis.run.alignment;
2+
3+
import org.apache.commons.io.FileUtils;
4+
import org.apache.commons.lang3.StringUtils;
5+
import org.apache.logging.log4j.Logger;
6+
import org.jetbrains.annotations.Nullable;
7+
import org.json.JSONObject;
8+
import org.labkey.api.pipeline.PipelineJobException;
9+
import org.labkey.api.sequenceanalysis.model.Readset;
10+
import org.labkey.api.sequenceanalysis.pipeline.AbstractAlignmentStepProvider;
11+
import org.labkey.api.sequenceanalysis.pipeline.AlignerIndexUtil;
12+
import org.labkey.api.sequenceanalysis.pipeline.AlignmentOutputImpl;
13+
import org.labkey.api.sequenceanalysis.pipeline.AlignmentStep;
14+
import org.labkey.api.sequenceanalysis.pipeline.AlignmentStepProvider;
15+
import org.labkey.api.sequenceanalysis.pipeline.IndexOutputImpl;
16+
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
17+
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
18+
import org.labkey.api.sequenceanalysis.pipeline.SamtoolsRunner;
19+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
20+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
21+
import org.labkey.api.sequenceanalysis.run.AbstractAlignmentPipelineStep;
22+
import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
23+
24+
import java.io.File;
25+
import java.io.IOException;
26+
import java.util.ArrayList;
27+
import java.util.Arrays;
28+
import java.util.List;
29+
30+
/**
31+
* User: bimber
32+
* Date: 12/14/12
33+
* Time: 7:40 AM
34+
*/
35+
public class BBMapWrapper extends AbstractCommandWrapper
36+
{
37+
public BBMapWrapper(@Nullable Logger logger)
38+
{
39+
super(logger);
40+
}
41+
42+
public static class BBMapAlignmentStep extends AbstractAlignmentPipelineStep<BBMapWrapper> implements AlignmentStep
43+
{
44+
public BBMapAlignmentStep(AlignmentStepProvider<?> provider, PipelineContext ctx)
45+
{
46+
super(provider, ctx, new BBMapWrapper(ctx.getLogger()));
47+
}
48+
49+
@Override
50+
public boolean supportsGzipFastqs()
51+
{
52+
return true;
53+
}
54+
55+
@Override
56+
public IndexOutput createIndex(ReferenceGenome referenceGenome, File outputDir) throws PipelineJobException
57+
{
58+
IndexOutputImpl output = new IndexOutputImpl(referenceGenome);
59+
60+
File indexDir = new File(outputDir, getProvider().getName());
61+
boolean hasCachedIndex = AlignerIndexUtil.hasCachedIndex(this.getPipelineCtx(), getIndexCachedDirName(getPipelineCtx().getJob()), referenceGenome);
62+
if (!hasCachedIndex)
63+
{
64+
getWrapper().buildIndex(referenceGenome.getWorkingFastaFile(), indexDir);
65+
}
66+
67+
AlignerIndexUtil.saveCachedIndex(hasCachedIndex, getPipelineCtx(), indexDir, getProvider().getName(), referenceGenome);
68+
69+
return output;
70+
}
71+
72+
@Override
73+
public AlignmentOutput performAlignment(Readset rs, List<File> inputFastqs1, @Nullable List<File> inputFastqs2, File outputDirectory, ReferenceGenome referenceGenome, String basename, String readGroupId, @Nullable String platformUnit) throws PipelineJobException
74+
{
75+
File inputFastq1 = assertSingleFile(inputFastqs1);
76+
File inputFastq2 = assertSingleFile(inputFastqs2);
77+
78+
AlignmentOutputImpl output = new AlignmentOutputImpl();
79+
AlignerIndexUtil.copyIndexIfExists(this.getPipelineCtx(), output, getProvider().getName(), getProvider().getName(), referenceGenome, true);
80+
File localIdx = new File(getPipelineCtx().getWorkingDirectory(), "Shared/" + getProvider().getName());
81+
if (!localIdx.exists())
82+
{
83+
throw new PipelineJobException("Index not copied: " + localIdx);
84+
}
85+
output.addIntermediateFile(new File(getPipelineCtx().getWorkingDirectory(), "Shared"));
86+
87+
// NOTE: bbmap only supports the location ./ref for the index:
88+
localIdx = new File(localIdx, "ref");
89+
if (!localIdx.exists())
90+
{
91+
throw new PipelineJobException("ref dir not found: " + localIdx);
92+
}
93+
94+
File refDir = new File(getPipelineCtx().getWorkingDirectory(), "ref");
95+
try
96+
{
97+
FileUtils.moveDirectory(localIdx, refDir);
98+
}
99+
catch (IOException e)
100+
{
101+
throw new PipelineJobException(e);
102+
}
103+
104+
getWrapper().setOutputDir(outputDirectory);
105+
106+
List<String> params = new ArrayList<>();
107+
108+
String ambig = StringUtils.trimToNull(getProvider().getParameterByName("ambig").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class));
109+
if (ambig != null)
110+
{
111+
params.add("ambig=" + ambig);
112+
if ("all".equals(ambig))
113+
{
114+
params.add("xmtag=t");
115+
}
116+
}
117+
118+
for (String paramName : Arrays.asList("local", "semiperfectmode"))
119+
{
120+
if (getProvider().getParameterByName(paramName).hasValueInJson(getPipelineCtx().getJob(), getProvider(), getStepIdx()))
121+
{
122+
boolean val = getProvider().getParameterByName(paramName).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false);
123+
params.add(paramName + "=" + (val ? "t" : "f"));
124+
}
125+
}
126+
127+
if (getProvider().getParameterByName("midin").hasValueInJson(getPipelineCtx().getJob(), getProvider(), getStepIdx()))
128+
{
129+
Double val = getProvider().getParameterByName("midin").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class);
130+
params.add("midin=" + val);
131+
}
132+
133+
File bam = getWrapper().doAlignment(inputFastq1, inputFastq2, outputDirectory, basename, params);
134+
if (!bam.exists())
135+
{
136+
throw new PipelineJobException("BAM not created, expected: " + bam.getPath());
137+
}
138+
139+
output.setBAM(bam);
140+
output.addCommandsExecuted(getWrapper().getCommandsExecuted());
141+
142+
return output;
143+
}
144+
145+
@Override
146+
public boolean doAddReadGroups()
147+
{
148+
return true;
149+
}
150+
151+
@Override
152+
public boolean doSortIndexBam()
153+
{
154+
return true;
155+
}
156+
157+
@Override
158+
public boolean alwaysCopyIndexToWorkingDir()
159+
{
160+
return false;
161+
}
162+
}
163+
164+
public static class Provider extends AbstractAlignmentStepProvider<AlignmentStep>
165+
{
166+
public Provider()
167+
{
168+
super("BBMap", "BBMap is suitable for longer reads and has the option to retain multiple hits per read. The only downside is that it can be slower. When this pipeline was first written, this aligner was preferred for sequence-based genotyping and similar applications which require retaining multiple hits.", Arrays.asList(
169+
ToolParameterDescriptor.create("ambiguous", "Ambiguous Handing", "Set behavior on ambiguously-mapped reads (with multiple top-scoring mapping locations)", "ldk-simplecombo", new JSONObject()
170+
{{
171+
put("storeValues", "all;best;toss;random");
172+
put("delimiter", ";");
173+
put("multiSelect", false);
174+
}}, "all"),
175+
ToolParameterDescriptor.create("local", "Local Alignment", "Set to true to use local, rather than global, alignments. This will soft-clip ugly ends of poor alignments", "checkbox", new JSONObject()
176+
{{
177+
put("checked", true);
178+
}}, true),
179+
ToolParameterDescriptor.create("semiperfectmode", "Semi-perfectmode", "Allow only perfect and semiperfect (perfect except for N's in the reference) mappings", "checkbox", new JSONObject()
180+
{{
181+
put("checked", true);
182+
}}, true),
183+
ToolParameterDescriptor.create("midin", "Minimum Identity", "Approximate minimum alignment identity to look for. Higher is faster and less sensitive", "ldk-numberfield", new JSONObject()
184+
{{
185+
put("minValue", 0);
186+
put("maxValue", 1);
187+
put("decimalPrecision", 2);
188+
}}, 0.95)
189+
), null, "https://prost.readthedocs.io/en/latest/bbmap.html", true, true);
190+
}
191+
192+
@Override
193+
public BBMapAlignmentStep create(PipelineContext context)
194+
{
195+
return new BBMapAlignmentStep(this, context);
196+
}
197+
}
198+
199+
protected File getExe()
200+
{
201+
return SequencePipelineService.get().getExeForPackage("BBMAPPATH", "bbmap.sh");
202+
}
203+
204+
public File doAlignment(File inputFastq1, @Nullable File inputFastq2, File outputDirectory, String basename, List<String> options) throws PipelineJobException
205+
{
206+
List<String> args = new ArrayList<>();
207+
args.add(getExe().getPath());
208+
args.add("-in=" + inputFastq1.getPath());
209+
if (inputFastq2 != null)
210+
{
211+
args.add("-in2=" + inputFastq2.getPath());
212+
}
213+
214+
args.add("-eoom");
215+
216+
args.add("mdtag=t");
217+
args.add("nhtag=t");
218+
args.add("amtag=t");
219+
args.add("nmtag=t");
220+
args.add("printunmappedcount=t");
221+
args.add("overwrite=t");
222+
223+
// Maximum number of total alignments to print per read. Only relevant when secondary=t.
224+
args.add("maxsites=-1");
225+
226+
// Only print secondary alignments for ambiguously-mapped reads.
227+
args.add("secondary=t");
228+
args.add("ssao=t");
229+
230+
// CONSIDER: mappedonly=f If true, treats 'out' like 'outm'
231+
// CONSIDER: outu=<file> Write only unmapped reads to this file. Does not include unmapped paired reads with a mapped mate.
232+
File outputSam = new File(outputDirectory, basename + ".bbmap.sam");
233+
if (outputSam.exists())
234+
{
235+
outputSam.delete();
236+
}
237+
238+
args.add("outm=" + outputSam.getPath());
239+
240+
Integer maxRam = SequencePipelineService.get().getMaxRam();
241+
if (maxRam != null)
242+
{
243+
args.add("-Xmx=" + maxRam);
244+
}
245+
246+
Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger());
247+
args.add(maxThreads == null ? "threads=1" : "threads=" + maxThreads);
248+
249+
args.addAll(options);
250+
251+
setWorkingDir(outputDirectory);
252+
execute(args);
253+
254+
if (!outputSam.exists())
255+
{
256+
throw new PipelineJobException("File not found: " + outputSam.getPath());
257+
}
258+
259+
File outputBam = new File(outputDirectory, basename + ".bbmap.bam");
260+
if (outputBam.exists())
261+
{
262+
outputBam.delete();
263+
}
264+
265+
SamtoolsRunner samtoolsRunner = new SamtoolsRunner(getLogger());
266+
List<String> stArgs = new ArrayList<>();
267+
stArgs.add(samtoolsRunner.getSamtoolsPath().getPath());
268+
stArgs.add("view");
269+
stArgs.add("-o");
270+
stArgs.add(outputBam.getPath());
271+
stArgs.add(outputSam.getPath());
272+
samtoolsRunner.execute(stArgs);
273+
274+
if (!outputBam.exists())
275+
{
276+
throw new PipelineJobException("File not found: " + outputBam.getPath());
277+
}
278+
279+
outputSam.delete();
280+
281+
return outputBam;
282+
}
283+
284+
public File buildIndex(File inputFasta, File outDir) throws PipelineJobException
285+
{
286+
List<String> args = new ArrayList<>();
287+
args.add(getExe().getPath());
288+
args.add("k=7");
289+
args.add("path=" + outDir.getPath());
290+
args.add("ref=" + inputFasta.getPath());
291+
292+
setWorkingDir(outDir);
293+
execute(args);
294+
295+
File output = new File(outDir, "ref");
296+
if (!output.exists())
297+
{
298+
throw new PipelineJobException("Unable to find file: " + output);
299+
}
300+
301+
return output;
302+
}
303+
}

0 commit comments

Comments
 (0)