Skip to content

Commit 58c0b67

Browse files
committed
Add handler to convert existing BAMs to CRAM
1 parent 4661fa2 commit 58c0b67

File tree

3 files changed

+218
-0
lines changed

3 files changed

+218
-0
lines changed
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package org.labkey.api.sequenceanalysis.pipeline;
2+
3+
import org.apache.logging.log4j.Logger;
4+
import org.jetbrains.annotations.Nullable;
5+
import org.labkey.api.pipeline.PipelineJobException;
6+
7+
import java.io.File;
8+
import java.util.ArrayList;
9+
import java.util.List;
10+
11+
/**
12+
* Created by bimber on 11/4/2016.
13+
*/
14+
public class SamtoolsCramConverter extends SamtoolsRunner
15+
{
16+
public SamtoolsCramConverter(Logger log)
17+
{
18+
super(log);
19+
}
20+
21+
public File convert(File inputBam, File outputCram, File gzippedFasta, boolean doIndex, @Nullable Integer threads) throws PipelineJobException
22+
{
23+
getLogger().info("Converting SAM/BAM to CRAM: " + inputBam.getPath());
24+
25+
List<String> params = new ArrayList<>();
26+
params.add(getSamtoolsPath().getPath());
27+
params.add("view");
28+
29+
params.add("-C");
30+
31+
params.add("-o");
32+
params.add(outputCram.getPath());
33+
34+
params.add("-T");
35+
params.add(gzippedFasta.getPath());
36+
37+
if (threads != null)
38+
{
39+
params.add("--threads");
40+
params.add(String.valueOf(threads));
41+
}
42+
43+
params.add(inputBam.getPath());
44+
45+
execute(params);
46+
47+
if (doIndex)
48+
{
49+
doIndex(outputCram, threads);
50+
}
51+
52+
return outputCram;
53+
}
54+
55+
private void doIndex(File input, @Nullable Integer threads) throws PipelineJobException
56+
{
57+
List<String> params = new ArrayList<>();
58+
params.add(getSamtoolsPath().getPath());
59+
params.add("index");
60+
61+
if (threads != null)
62+
{
63+
params.add("--threads");
64+
params.add(String.valueOf(threads));
65+
}
66+
67+
params.add(input.getPath());
68+
execute(params);
69+
70+
File idx = new File(input.getPath() + ".crai");
71+
if (!idx.exists())
72+
{
73+
throw new PipelineJobException("Unable to find CRAM index: " + idx.getPath());
74+
}
75+
}
76+
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
import org.labkey.sequenceanalysis.pipeline.AlignmentImportJob;
6868
import org.labkey.sequenceanalysis.pipeline.CacheGenomePipelineJob;
6969
import org.labkey.sequenceanalysis.pipeline.CacheGenomeTrigger;
70+
import org.labkey.sequenceanalysis.pipeline.ConvertToCramHandler;
7071
import org.labkey.sequenceanalysis.pipeline.IlluminaImportJob;
7172
import org.labkey.sequenceanalysis.pipeline.ImportFastaSequencesPipelineJob;
7273
import org.labkey.sequenceanalysis.pipeline.ImportGenomeTrackPipelineJob;
@@ -361,6 +362,7 @@ public static void registerPipelineSteps()
361362
SequenceAnalysisService.get().registerFileHandler(new MergeLoFreqVcfHandler());
362363
SequenceAnalysisService.get().registerFileHandler(new PangolinHandler());
363364
SequenceAnalysisService.get().registerFileHandler(new NextCladeHandler());
365+
SequenceAnalysisService.get().registerFileHandler(new ConvertToCramHandler());
364366

365367
SequenceAnalysisService.get().registerReadsetHandler(new MultiQCHandler());
366368
SequenceAnalysisService.get().registerReadsetHandler(new RestoreSraDataHandler());
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
package org.labkey.sequenceanalysis.pipeline;
2+
3+
import org.json.JSONObject;
4+
import org.labkey.api.exp.api.ExpData;
5+
import org.labkey.api.module.ModuleLoader;
6+
import org.labkey.api.pipeline.PipelineJob;
7+
import org.labkey.api.pipeline.PipelineJobException;
8+
import org.labkey.api.pipeline.RecordedAction;
9+
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
10+
import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
11+
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
12+
import org.labkey.api.sequenceanalysis.pipeline.SamtoolsCramConverter;
13+
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
14+
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
15+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
16+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
17+
import org.labkey.api.util.FileUtil;
18+
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
19+
import org.labkey.sequenceanalysis.util.SequenceUtil;
20+
21+
import java.io.File;
22+
import java.util.Arrays;
23+
import java.util.List;
24+
25+
public class ConvertToCramHandler extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
26+
{
27+
public ConvertToCramHandler()
28+
{
29+
super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Convert To Cram", "This will convert a BAM file to CRAM, replacing the original", null, Arrays.asList(
30+
ToolParameterDescriptor.create("replaceOriginal", "Replace Original File", "If selected, the input BAM will be deleted and the database record will be switched to use this filepath.", "checkbox", new JSONObject(){{
31+
put("checked", true);
32+
}}, true))
33+
);
34+
}
35+
36+
@Override
37+
public boolean canProcess(SequenceOutputFile o)
38+
{
39+
return o.getFile() != null && o.getFile().exists() && SequenceUtil.FILETYPE.bam.getFileType().isType(o.getFile());
40+
}
41+
42+
@Override
43+
public boolean useWorkbooks()
44+
{
45+
return true;
46+
}
47+
48+
@Override
49+
public boolean doSplitJobs()
50+
{
51+
return true;
52+
}
53+
54+
@Override
55+
public boolean requiresGenome()
56+
{
57+
return true;
58+
}
59+
60+
@Override
61+
public boolean doRunRemote()
62+
{
63+
return true;
64+
}
65+
66+
@Override
67+
public boolean doRunLocal()
68+
{
69+
return false;
70+
}
71+
72+
@Override
73+
public SequenceOutputProcessor getProcessor()
74+
{
75+
return new Processor();
76+
}
77+
78+
public class Processor implements SequenceOutputProcessor
79+
{
80+
@Override
81+
public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles, JSONObject params, File outputDir, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
82+
{
83+
84+
}
85+
86+
@Override
87+
public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException
88+
{
89+
boolean replaceOriginal = ctx.getParams().optBoolean("replaceOriginal", false);
90+
ctx.getLogger().info("Replace input BAM: " + replaceOriginal);
91+
92+
Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger());
93+
for (SequenceOutputFile so : inputFiles)
94+
{
95+
ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id());
96+
File cram = new File(so.getFile().getParentFile(), FileUtil.getBaseName(so.getFile()) + ".cram");
97+
new SamtoolsCramConverter(ctx.getLogger()).convert(so.getFile(), cram, genome.getWorkingFastaFileGzipped(), true, threads);
98+
checkCramAndIndex(so);
99+
100+
if (replaceOriginal)
101+
{
102+
ctx.getLogger().info("Deleting original BAM: " + so.getFile().getPath());
103+
new File(so.getFile().getPath() + ".bai").delete();
104+
so.getFile().delete();
105+
}
106+
}
107+
}
108+
109+
private void checkCramAndIndex(SequenceOutputFile so) throws PipelineJobException
110+
{
111+
File cram = new File(so.getFile().getParentFile(), FileUtil.getBaseName(so.getFile()) + ".cram");
112+
if (!cram.exists())
113+
{
114+
throw new PipelineJobException("Unable to find file: " + cram.getPath());
115+
}
116+
117+
File cramIdx = new File(cram.getPath() + ".crai");
118+
if (!cramIdx.exists())
119+
{
120+
throw new PipelineJobException("Unable to find file: " + cramIdx.getPath());
121+
}
122+
}
123+
124+
@Override
125+
public void complete(PipelineJob job, List<SequenceOutputFile> inputs, List<SequenceOutputFile> outputsCreated, SequenceAnalysisJobSupport support) throws PipelineJobException
126+
{
127+
128+
for (SequenceOutputFile so : inputs)
129+
{
130+
File cram = new File(so.getFile().getParentFile(), FileUtil.getBaseName(so.getFile()) + ".cram");
131+
checkCramAndIndex(so);
132+
133+
job.getLogger().info("Updating ExpData record with new filepath: " + cram.getPath());
134+
ExpData d = so.getExpData();
135+
d.setDataFileURI(cram.toURI());
136+
d.save(job.getUser());
137+
}
138+
}
139+
}
140+
}

0 commit comments

Comments
 (0)