|
| 1 | +package org.labkey.sequenceanalysis.run.alignment; |
| 2 | + |
| 3 | +import org.apache.commons.io.FileUtils; |
| 4 | +import org.apache.commons.lang3.StringUtils; |
| 5 | +import org.apache.logging.log4j.Logger; |
| 6 | +import org.jetbrains.annotations.Nullable; |
| 7 | +import org.json.JSONObject; |
| 8 | +import org.labkey.api.pipeline.PipelineJobException; |
| 9 | +import org.labkey.api.sequenceanalysis.model.Readset; |
| 10 | +import org.labkey.api.sequenceanalysis.pipeline.AbstractAlignmentStepProvider; |
| 11 | +import org.labkey.api.sequenceanalysis.pipeline.AlignerIndexUtil; |
| 12 | +import org.labkey.api.sequenceanalysis.pipeline.AlignmentOutputImpl; |
| 13 | +import org.labkey.api.sequenceanalysis.pipeline.AlignmentStep; |
| 14 | +import org.labkey.api.sequenceanalysis.pipeline.AlignmentStepProvider; |
| 15 | +import org.labkey.api.sequenceanalysis.pipeline.IndexOutputImpl; |
| 16 | +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; |
| 17 | +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; |
| 18 | +import org.labkey.api.sequenceanalysis.pipeline.SamtoolsRunner; |
| 19 | +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; |
| 20 | +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; |
| 21 | +import org.labkey.api.sequenceanalysis.run.AbstractAlignmentPipelineStep; |
| 22 | +import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; |
| 23 | + |
| 24 | +import java.io.File; |
| 25 | +import java.io.IOException; |
| 26 | +import java.util.ArrayList; |
| 27 | +import java.util.Arrays; |
| 28 | +import java.util.List; |
| 29 | + |
| 30 | +/** |
| 31 | + * User: bimber |
| 32 | + * Date: 12/14/12 |
| 33 | + * Time: 7:40 AM |
| 34 | + */ |
| 35 | +public class BBMapWrapper extends AbstractCommandWrapper |
| 36 | +{ |
| 37 | + public BBMapWrapper(@Nullable Logger logger) |
| 38 | + { |
| 39 | + super(logger); |
| 40 | + } |
| 41 | + |
| 42 | + public static class BBMapAlignmentStep extends AbstractAlignmentPipelineStep<BBMapWrapper> implements AlignmentStep |
| 43 | + { |
| 44 | + public BBMapAlignmentStep(AlignmentStepProvider<?> provider, PipelineContext ctx) |
| 45 | + { |
| 46 | + super(provider, ctx, new BBMapWrapper(ctx.getLogger())); |
| 47 | + } |
| 48 | + |
| 49 | + @Override |
| 50 | + public boolean supportsGzipFastqs() |
| 51 | + { |
| 52 | + return true; |
| 53 | + } |
| 54 | + |
| 55 | + @Override |
| 56 | + public IndexOutput createIndex(ReferenceGenome referenceGenome, File outputDir) throws PipelineJobException |
| 57 | + { |
| 58 | + IndexOutputImpl output = new IndexOutputImpl(referenceGenome); |
| 59 | + |
| 60 | + File indexDir = new File(outputDir, getProvider().getName()); |
| 61 | + boolean hasCachedIndex = AlignerIndexUtil.hasCachedIndex(this.getPipelineCtx(), getIndexCachedDirName(getPipelineCtx().getJob()), referenceGenome); |
| 62 | + if (!hasCachedIndex) |
| 63 | + { |
| 64 | + getWrapper().buildIndex(referenceGenome.getWorkingFastaFile(), indexDir); |
| 65 | + } |
| 66 | + |
| 67 | + AlignerIndexUtil.saveCachedIndex(hasCachedIndex, getPipelineCtx(), indexDir, getProvider().getName(), referenceGenome); |
| 68 | + |
| 69 | + return output; |
| 70 | + } |
| 71 | + |
| 72 | + @Override |
| 73 | + public AlignmentOutput performAlignment(Readset rs, List<File> inputFastqs1, @Nullable List<File> inputFastqs2, File outputDirectory, ReferenceGenome referenceGenome, String basename, String readGroupId, @Nullable String platformUnit) throws PipelineJobException |
| 74 | + { |
| 75 | + File inputFastq1 = assertSingleFile(inputFastqs1); |
| 76 | + File inputFastq2 = assertSingleFile(inputFastqs2); |
| 77 | + |
| 78 | + AlignmentOutputImpl output = new AlignmentOutputImpl(); |
| 79 | + AlignerIndexUtil.copyIndexIfExists(this.getPipelineCtx(), output, getProvider().getName(), getProvider().getName(), referenceGenome, true); |
| 80 | + File localIdx = new File(getPipelineCtx().getWorkingDirectory(), "Shared/" + getProvider().getName()); |
| 81 | + if (!localIdx.exists()) |
| 82 | + { |
| 83 | + throw new PipelineJobException("Index not copied: " + localIdx); |
| 84 | + } |
| 85 | + output.addIntermediateFile(new File(getPipelineCtx().getWorkingDirectory(), "Shared")); |
| 86 | + |
| 87 | + // NOTE: bbmap only supports the location ./ref for the index: |
| 88 | + localIdx = new File(localIdx, "ref"); |
| 89 | + if (!localIdx.exists()) |
| 90 | + { |
| 91 | + throw new PipelineJobException("ref dir not found: " + localIdx); |
| 92 | + } |
| 93 | + |
| 94 | + File refDir = new File(getPipelineCtx().getWorkingDirectory(), "ref"); |
| 95 | + try |
| 96 | + { |
| 97 | + FileUtils.moveDirectory(localIdx, refDir); |
| 98 | + } |
| 99 | + catch (IOException e) |
| 100 | + { |
| 101 | + throw new PipelineJobException(e); |
| 102 | + } |
| 103 | + |
| 104 | + getWrapper().setOutputDir(outputDirectory); |
| 105 | + |
| 106 | + List<String> params = new ArrayList<>(); |
| 107 | + |
| 108 | + String ambig = StringUtils.trimToNull(getProvider().getParameterByName("ambig").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class)); |
| 109 | + if (ambig != null) |
| 110 | + { |
| 111 | + params.add("ambig=" + ambig); |
| 112 | + if ("all".equals(ambig)) |
| 113 | + { |
| 114 | + params.add("xmtag=t"); |
| 115 | + } |
| 116 | + } |
| 117 | + |
| 118 | + for (String paramName : Arrays.asList("local", "semiperfectmode")) |
| 119 | + { |
| 120 | + if (getProvider().getParameterByName(paramName).hasValueInJson(getPipelineCtx().getJob(), getProvider(), getStepIdx())) |
| 121 | + { |
| 122 | + boolean val = getProvider().getParameterByName(paramName).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); |
| 123 | + params.add(paramName + "=" + (val ? "t" : "f")); |
| 124 | + } |
| 125 | + } |
| 126 | + |
| 127 | + if (getProvider().getParameterByName("midin").hasValueInJson(getPipelineCtx().getJob(), getProvider(), getStepIdx())) |
| 128 | + { |
| 129 | + Double val = getProvider().getParameterByName("midin").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class); |
| 130 | + params.add("midin=" + val); |
| 131 | + } |
| 132 | + |
| 133 | + File bam = getWrapper().doAlignment(inputFastq1, inputFastq2, outputDirectory, basename, params); |
| 134 | + if (!bam.exists()) |
| 135 | + { |
| 136 | + throw new PipelineJobException("BAM not created, expected: " + bam.getPath()); |
| 137 | + } |
| 138 | + |
| 139 | + output.setBAM(bam); |
| 140 | + output.addCommandsExecuted(getWrapper().getCommandsExecuted()); |
| 141 | + |
| 142 | + return output; |
| 143 | + } |
| 144 | + |
| 145 | + @Override |
| 146 | + public boolean doAddReadGroups() |
| 147 | + { |
| 148 | + return true; |
| 149 | + } |
| 150 | + |
| 151 | + @Override |
| 152 | + public boolean doSortIndexBam() |
| 153 | + { |
| 154 | + return true; |
| 155 | + } |
| 156 | + |
| 157 | + @Override |
| 158 | + public boolean alwaysCopyIndexToWorkingDir() |
| 159 | + { |
| 160 | + return false; |
| 161 | + } |
| 162 | + } |
| 163 | + |
| 164 | + public static class Provider extends AbstractAlignmentStepProvider<AlignmentStep> |
| 165 | + { |
| 166 | + public Provider() |
| 167 | + { |
| 168 | + super("BBMap", "BBMap is suitable for longer reads and has the option to retain multiple hits per read. The only downside is that it can be slower. When this pipeline was first written, this aligner was preferred for sequence-based genotyping and similar applications which require retaining multiple hits.", Arrays.asList( |
| 169 | + ToolParameterDescriptor.create("ambiguous", "Ambiguous Handing", "Set behavior on ambiguously-mapped reads (with multiple top-scoring mapping locations)", "ldk-simplecombo", new JSONObject() |
| 170 | + {{ |
| 171 | + put("storeValues", "all;best;toss;random"); |
| 172 | + put("delimiter", ";"); |
| 173 | + put("multiSelect", false); |
| 174 | + }}, "all"), |
| 175 | + ToolParameterDescriptor.create("local", "Local Alignment", "Set to true to use local, rather than global, alignments. This will soft-clip ugly ends of poor alignments", "checkbox", new JSONObject() |
| 176 | + {{ |
| 177 | + put("checked", true); |
| 178 | + }}, true), |
| 179 | + ToolParameterDescriptor.create("semiperfectmode", "Semi-perfectmode", "Allow only perfect and semiperfect (perfect except for N's in the reference) mappings", "checkbox", new JSONObject() |
| 180 | + {{ |
| 181 | + put("checked", true); |
| 182 | + }}, true), |
| 183 | + ToolParameterDescriptor.create("midin", "Minimum Identity", "Approximate minimum alignment identity to look for. Higher is faster and less sensitive", "ldk-numberfield", new JSONObject() |
| 184 | + {{ |
| 185 | + put("minValue", 0); |
| 186 | + put("maxValue", 1); |
| 187 | + put("decimalPrecision", 2); |
| 188 | + }}, 0.95) |
| 189 | + ), null, "https://prost.readthedocs.io/en/latest/bbmap.html", true, true); |
| 190 | + } |
| 191 | + |
| 192 | + @Override |
| 193 | + public BBMapAlignmentStep create(PipelineContext context) |
| 194 | + { |
| 195 | + return new BBMapAlignmentStep(this, context); |
| 196 | + } |
| 197 | + } |
| 198 | + |
| 199 | + protected File getExe() |
| 200 | + { |
| 201 | + return SequencePipelineService.get().getExeForPackage("BBMAPPATH", "bbmap.sh"); |
| 202 | + } |
| 203 | + |
| 204 | + public File doAlignment(File inputFastq1, @Nullable File inputFastq2, File outputDirectory, String basename, List<String> options) throws PipelineJobException |
| 205 | + { |
| 206 | + List<String> args = new ArrayList<>(); |
| 207 | + args.add(getExe().getPath()); |
| 208 | + args.add("-in=" + inputFastq1.getPath()); |
| 209 | + if (inputFastq2 != null) |
| 210 | + { |
| 211 | + args.add("-in2=" + inputFastq2.getPath()); |
| 212 | + } |
| 213 | + |
| 214 | + args.add("-eoom"); |
| 215 | + |
| 216 | + args.add("mdtag=t"); |
| 217 | + args.add("nhtag=t"); |
| 218 | + args.add("amtag=t"); |
| 219 | + args.add("nmtag=t"); |
| 220 | + args.add("printunmappedcount=t"); |
| 221 | + args.add("overwrite=t"); |
| 222 | + |
| 223 | + // Maximum number of total alignments to print per read. Only relevant when secondary=t. |
| 224 | + args.add("maxsites=-1"); |
| 225 | + |
| 226 | + // Only print secondary alignments for ambiguously-mapped reads. |
| 227 | + args.add("secondary=t"); |
| 228 | + args.add("ssao=t"); |
| 229 | + |
| 230 | + // CONSIDER: mappedonly=f If true, treats 'out' like 'outm' |
| 231 | + // CONSIDER: outu=<file> Write only unmapped reads to this file. Does not include unmapped paired reads with a mapped mate. |
| 232 | + File outputSam = new File(outputDirectory, basename + ".bbmap.sam"); |
| 233 | + if (outputSam.exists()) |
| 234 | + { |
| 235 | + outputSam.delete(); |
| 236 | + } |
| 237 | + |
| 238 | + args.add("outm=" + outputSam.getPath()); |
| 239 | + |
| 240 | + Integer maxRam = SequencePipelineService.get().getMaxRam(); |
| 241 | + if (maxRam != null) |
| 242 | + { |
| 243 | + args.add("-Xmx=" + maxRam); |
| 244 | + } |
| 245 | + |
| 246 | + Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger()); |
| 247 | + args.add(maxThreads == null ? "threads=1" : "threads=" + maxThreads); |
| 248 | + |
| 249 | + args.addAll(options); |
| 250 | + |
| 251 | + setWorkingDir(outputDirectory); |
| 252 | + execute(args); |
| 253 | + |
| 254 | + if (!outputSam.exists()) |
| 255 | + { |
| 256 | + throw new PipelineJobException("File not found: " + outputSam.getPath()); |
| 257 | + } |
| 258 | + |
| 259 | + File outputBam = new File(outputDirectory, basename + ".bbmap.bam"); |
| 260 | + if (outputBam.exists()) |
| 261 | + { |
| 262 | + outputBam.delete(); |
| 263 | + } |
| 264 | + |
| 265 | + SamtoolsRunner samtoolsRunner = new SamtoolsRunner(getLogger()); |
| 266 | + List<String> stArgs = new ArrayList<>(); |
| 267 | + stArgs.add(samtoolsRunner.getSamtoolsPath().getPath()); |
| 268 | + stArgs.add("view"); |
| 269 | + stArgs.add("-o"); |
| 270 | + stArgs.add(outputBam.getPath()); |
| 271 | + stArgs.add(outputSam.getPath()); |
| 272 | + samtoolsRunner.execute(stArgs); |
| 273 | + |
| 274 | + if (!outputBam.exists()) |
| 275 | + { |
| 276 | + throw new PipelineJobException("File not found: " + outputBam.getPath()); |
| 277 | + } |
| 278 | + |
| 279 | + outputSam.delete(); |
| 280 | + |
| 281 | + return outputBam; |
| 282 | + } |
| 283 | + |
| 284 | + public File buildIndex(File inputFasta, File outDir) throws PipelineJobException |
| 285 | + { |
| 286 | + List<String> args = new ArrayList<>(); |
| 287 | + args.add(getExe().getPath()); |
| 288 | + args.add("k=7"); |
| 289 | + args.add("path=" + outDir.getPath()); |
| 290 | + args.add("ref=" + inputFasta.getPath()); |
| 291 | + |
| 292 | + setWorkingDir(outDir); |
| 293 | + execute(args); |
| 294 | + |
| 295 | + File output = new File(outDir, "ref"); |
| 296 | + if (!output.exists()) |
| 297 | + { |
| 298 | + throw new PipelineJobException("Unable to find file: " + output); |
| 299 | + } |
| 300 | + |
| 301 | + return output; |
| 302 | + } |
| 303 | +} |
0 commit comments