Skip to content

Commit 2fb3d8c

Browse files
committed
Better resume for ConvertToCramHandler
1 parent 39cc937 commit 2fb3d8c

File tree

2 files changed

+86
-44
lines changed

2 files changed

+86
-44
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java

Lines changed: 77 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.labkey.sequenceanalysis.pipeline;
22

3+
import org.apache.commons.io.FileUtils;
34
import org.json.JSONObject;
45
import org.labkey.api.collections.CaseInsensitiveHashMap;
56
import org.labkey.api.data.Container;
@@ -27,6 +28,7 @@
2728
import org.labkey.sequenceanalysis.util.SequenceUtil;
2829

2930
import java.io.File;
31+
import java.io.IOException;
3032
import java.sql.SQLException;
3133
import java.util.ArrayList;
3234
import java.util.Arrays;
@@ -39,16 +41,16 @@ public class ConvertToCramHandler extends AbstractParameterizedOutputHandler<Seq
3941
public ConvertToCramHandler()
4042
{
4143
super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Convert To Cram", "This will convert a BAM file to CRAM, replacing the original", null, Arrays.asList(
42-
ToolParameterDescriptor.create("replaceOriginal", "Replace Original File", "If selected, the input BAM will be deleted and the database record will be switched to use this filepath.", "checkbox", new JSONObject(){{
43-
put("checked", true);
44-
}}, true),
45-
ToolParameterDescriptor.create("doCramArchivalMode", "CRAM Archival Mode", "If selected, the CRAM will undergo additional compression to save space. This is lossy and may not be compatible with all downstream tools. See samtools view --output-fmt-option archive", "checkbox", new JSONObject(){{
46-
put("checked", false);
47-
}}, false),
48-
ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{
49-
put("checked", true);
50-
}}, true)
51-
)
44+
ToolParameterDescriptor.create("replaceOriginal", "Replace Original File", "If selected, the input BAM will be deleted and the database record will be switched to use this filepath.", "checkbox", new JSONObject(){{
45+
put("checked", true);
46+
}}, true),
47+
ToolParameterDescriptor.create("doCramArchivalMode", "CRAM Archival Mode", "If selected, the CRAM will undergo additional compression to save space. This is lossy and may not be compatible with all downstream tools. See samtools view --output-fmt-option archive", "checkbox", new JSONObject(){{
48+
put("checked", false);
49+
}}, false),
50+
ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{
51+
put("checked", true);
52+
}}, true)
53+
)
5254
);
5355
}
5456

@@ -94,7 +96,7 @@ public SequenceOutputProcessor getProcessor()
9496
return new Processor();
9597
}
9698

97-
public class Processor implements SequenceOutputProcessor
99+
public static class Processor implements SequenceOutputProcessor
98100
{
99101
@Override
100102
public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles, JSONObject params, File outputDir, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
@@ -113,7 +115,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
113115
for (SequenceOutputFile so : inputFiles)
114116
{
115117
ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id());
116-
File outputFile = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile()) + ".cram");
118+
File outputFile = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile()) + ".cram");
117119
if (!so.getFile().exists())
118120
{
119121
File inputAsCram = new File(so.getFile().getParentFile(), FileUtil.getBaseName(so.getFile()) + ".cram");
@@ -137,16 +139,44 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
137139
if (replaceOriginal)
138140
{
139141
ctx.getLogger().info("Deleting original BAM/CRAM: {}", so.getFile().getPath());
140-
if (so.getFile().exists())
142+
if (SequenceUtil.FILETYPE.bam.getFileType().isType(so.getFile()))
141143
{
142-
SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()).delete();
143-
so.getFile().delete();
144+
if (so.getFile().exists())
145+
{
146+
SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()).delete();
147+
so.getFile().delete();
148+
}
149+
else
150+
{
151+
ctx.getLogger().debug("Input BAM not found, possibly deleted in earlier job iteration?");
152+
}
144153
}
145-
else
154+
else if (SequenceUtil.FILETYPE.cram.getFileType().isType(so.getFile()))
146155
{
147-
ctx.getLogger().debug("Input BAM not found, possibly deleted in earlier job iteration?");
156+
try
157+
{
158+
if (!so.getFile().exists())
159+
{
160+
throw new PipelineJobException("Unable to find input CRAM/BAM: " + so.getFile().getPath());
161+
}
162+
163+
SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()).delete();
164+
so.getFile().delete();
165+
166+
FileUtils.moveFile(outputFile, so.getFile());
167+
FileUtils.moveFile(new File(outputFile.getPath() + ".crai"), new File(so.getFile() + ".crai"));
168+
}
169+
catch (IOException e)
170+
{
171+
throw new PipelineJobException(e);
172+
}
148173
}
149174
}
175+
else
176+
{
177+
String description = (so.getDescription() == null ? "" : so.getDescription() + "\n") + "CRAM Archival Mode";
178+
ctx.getFileManager().addSequenceOutput(outputFile, so.getName(), so.getCategory(), so.getReadset(), null, so.getLibrary_id(), description);
179+
}
150180
}
151181
}
152182

@@ -175,41 +205,45 @@ public void complete(JobContext ctx, List<SequenceOutputFile> inputs, List<Seque
175205
return(row);
176206
}).collect(Collectors.toList());
177207

208+
boolean replaceOriginal = ctx.getParams().optBoolean("replaceOriginal", false);
178209
boolean doCramArchivalMode = ctx.getParams().optBoolean("doCramArchivalMode", false);
179210
for (SequenceOutputFile so : inputs)
180211
{
181212
File cram = new File(so.getFile().getParentFile(), FileUtil.getBaseName(so.getFile()) + ".cram");
182213
checkCramAndIndex(so);
183214

184-
ctx.getJob().getLogger().info("Updating ExpData record with new filepath: " + cram.getPath());
185-
ExpData d = so.getExpData();
186-
d.setDataFileURI(cram.toURI());
187-
d.setName(cram.getName());
188-
d.save(ctx.getJob().getUser());
189-
190-
Map<String, Object> row = new CaseInsensitiveHashMap<>();
191-
row.put("rowid", so.getRowid());
192-
row.put("container", so.getContainer());
193-
boolean doUpdate = false;
194-
String description = so.getDescription();
195-
if (so.getName().contains(".bam"))
215+
if (replaceOriginal)
196216
{
197-
row.put("name", so.getName().replaceAll("\\.bam", "\\.cram"));
198-
description = (description == null ? "" : description + "\n") + "Converted from BAM to CRAM";
199-
row.put("description", description);
200-
doUpdate = true;
201-
}
217+
ctx.getJob().getLogger().info("Updating ExpData record with new filepath: " + cram.getPath());
218+
ExpData d = so.getExpData();
219+
d.setDataFileURI(cram.toURI());
220+
d.setName(cram.getName());
221+
d.save(ctx.getJob().getUser());
202222

203-
if (doCramArchivalMode)
204-
{
205-
description = (description == null ? "" : description + "\n") + "CRAM Archival Mode";
206-
row.put("description", description);
207-
doUpdate = true;
208-
}
223+
Map<String, Object> row = new CaseInsensitiveHashMap<>();
224+
row.put("rowid", so.getRowid());
225+
row.put("container", so.getContainer());
226+
boolean doUpdate = false;
227+
String description = so.getDescription();
228+
if (so.getName().contains(".bam"))
229+
{
230+
row.put("name", so.getName().replaceAll("\\.bam", "\\.cram"));
231+
description = (description == null ? "" : description + "\n") + "Converted from BAM to CRAM";
232+
row.put("description", description);
233+
doUpdate = true;
234+
}
209235

210-
if (doUpdate)
211-
{
212-
toUpdate.add(row);
236+
if (doCramArchivalMode)
237+
{
238+
description = (description == null ? "" : description + "\n") + "CRAM Archival Mode";
239+
row.put("description", description);
240+
doUpdate = true;
241+
}
242+
243+
if (doUpdate)
244+
{
245+
toUpdate.add(row);
246+
}
213247
}
214248
}
215249

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BamIterator.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,15 @@ public void iterateReads(String refName, int start, int stop) throws IOException
149149
if (r.getAlignmentEnd() < start || r.getAlignmentStart() > stop)
150150
continue;
151151

152-
processAlignment(r, indexedRef);
152+
try
153+
{
154+
processAlignment(r, indexedRef);
155+
}
156+
catch (Exception e)
157+
{
158+
_logger.error("Unable to parse alignment: " + r.toString() + " / " + r.getCigarString());
159+
throw e;
160+
}
153161

154162
if (i % 10000 == 0)
155163
{

0 commit comments

Comments
 (0)