Skip to content

Commit fb5685a

Browse files
authored
Remove most direct references to BAI BAM indexes in favor or BAM/CRAM helper (#265)
1 parent 1d01804 commit fb5685a

34 files changed

+129
-84
lines changed

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/SequenceAnalysisService.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ static public void setInstance(SequenceAnalysisService instance)
7676

7777
abstract public File ensureVcfIndex(File vcf, Logger log, boolean forceRecreate) throws IOException;
7878

79+
abstract public File ensureBamOrCramIdx(File bamOrCram, Logger log, boolean forceRecreate) throws PipelineJobException;
80+
81+
abstract public File getExpectedBamOrCramIndex(File bamOrCram);
82+
7983
abstract public File bgzipFile(File input, Logger log) throws PipelineJobException;
8084

8185
abstract public void ensureFastaIndex(File fasta, Logger log) throws PipelineJobException;

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SamtoolsCramConverter.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ public File convert(File inputBam, File outputCram, File gzippedFasta, boolean d
5757
return outputCram;
5858
}
5959

60-
private void doIndex(File input, @Nullable Integer threads) throws PipelineJobException
60+
public File doIndex(File input, @Nullable Integer threads) throws PipelineJobException
6161
{
6262
List<String> params = new ArrayList<>();
6363
params.add(getSamtoolsPath().getPath());
@@ -77,6 +77,8 @@ private void doIndex(File input, @Nullable Integer threads) throws PipelineJobEx
7777
{
7878
throw new PipelineJobException("Unable to find CRAM index: " + idx.getPath());
7979
}
80+
81+
return idx;
8082
}
8183

8284
public static File getExpectedCramIndex(File input)

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisServiceImpl.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.labkey.api.sequenceanalysis.SequenceDataProvider;
3737
import org.labkey.api.sequenceanalysis.model.Readset;
3838
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
39+
import org.labkey.api.sequenceanalysis.pipeline.SamtoolsCramConverter;
3940
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
4041
import org.labkey.api.util.FileType;
4142
import org.labkey.api.util.PageFlowUtil;
@@ -45,6 +46,7 @@
4546
import org.labkey.sequenceanalysis.pipeline.ReferenceGenomeImpl;
4647
import org.labkey.sequenceanalysis.pipeline.ReferenceLibraryPipelineJob;
4748
import org.labkey.sequenceanalysis.pipeline.SequenceTaskHelper;
49+
import org.labkey.sequenceanalysis.run.util.BuildBamIndexWrapper;
4850
import org.labkey.sequenceanalysis.run.util.FastaIndexer;
4951
import org.labkey.sequenceanalysis.run.util.GxfSorter;
5052
import org.labkey.sequenceanalysis.run.util.IndexFeatureFileWrapper;
@@ -294,6 +296,40 @@ public File ensureVcfIndex(File vcf, Logger log, boolean forceRecreate) throws I
294296
}
295297
}
296298

299+
@Override
300+
public File getExpectedBamOrCramIndex(File bamOrCram)
301+
{
302+
return SequenceUtil.getExpectedIndex(bamOrCram);
303+
}
304+
305+
@Override
306+
public File ensureBamOrCramIdx(File bamOrCram, Logger log, boolean forceRecreate) throws PipelineJobException
307+
{
308+
File idx = SequenceUtil.getExpectedIndex(bamOrCram);
309+
if (idx.exists())
310+
{
311+
if (forceRecreate)
312+
{
313+
idx.delete();
314+
}
315+
else
316+
{
317+
return null;
318+
}
319+
}
320+
321+
if (SequenceUtil.FILETYPE.bam.getFileType().isType(bamOrCram))
322+
{
323+
idx = new BuildBamIndexWrapper(log).executeCommand(bamOrCram);
324+
}
325+
else if (SequenceUtil.FILETYPE.cram.getFileType().isType(bamOrCram))
326+
{
327+
idx = new SamtoolsCramConverter(log).doIndex(bamOrCram, null);
328+
}
329+
330+
return idx;
331+
}
332+
297333
@Override
298334
public File bgzipFile(File input, Logger log) throws PipelineJobException
299335
{

SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/AlignmentMetricsHandler.java

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,20 @@
1616
import org.labkey.api.pipeline.PipelineJobException;
1717
import org.labkey.api.pipeline.RecordedAction;
1818
import org.labkey.api.pipeline.file.FileAnalysisJobSupport;
19+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
1920
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
2021
import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
2122
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
2223
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
2324
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
2425
import org.labkey.api.util.FileType;
2526
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
26-
import org.labkey.sequenceanalysis.run.util.BuildBamIndexWrapper;
2727

2828
import java.io.BufferedWriter;
2929
import java.io.File;
3030
import java.io.FileWriter;
3131
import java.io.IOException;
3232
import java.util.ArrayList;
33-
import java.util.Arrays;
3433
import java.util.Date;
3534
import java.util.List;
3635

@@ -103,11 +102,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
103102
{
104103
action.addInput(bam, "BAM File");
105104

106-
File bai = new File(bam.getPath() + ".bai");
107-
if (!bai.exists())
108-
{
109-
new BuildBamIndexWrapper(job.getLogger()).executeCommand(bam);
110-
}
105+
SequenceAnalysisService.get().ensureBamOrCramIdx(bam, ctx.getLogger(), false);
111106

112107
SamReaderFactory fact = SamReaderFactory.makeDefault();
113108
try (SamReader reader = fact.open(bam))

SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/PicardAlignmentMetricsHandler.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
269269
File metricsFile = wrapper.getMetricsFile(o.getFile());
270270
File tempBam = new File(ctx.getOutputDir(), FileUtil.getBaseName(o.getFile()) + ".markDuplicates.bam");
271271
ctx.getFileManager().addIntermediateFile(tempBam);
272-
ctx.getFileManager().addIntermediateFile(new File(tempBam.getPath() + ".bai"));
272+
ctx.getFileManager().addIntermediateFile(SequenceUtil.getExpectedIndex(tempBam));
273273

274274
if (tempBam.exists())
275275
{

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/AlignmentNormalizationTask.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import org.labkey.api.pipeline.RecordedAction;
1010
import org.labkey.api.pipeline.RecordedActionSet;
1111
import org.labkey.api.pipeline.WorkDirectoryTask;
12+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
1213
import org.labkey.api.sequenceanalysis.model.Readset;
1314
import org.labkey.api.sequenceanalysis.pipeline.AbstractSequenceTaskFactory;
1415
import org.labkey.api.sequenceanalysis.pipeline.BamProcessingStep;
@@ -222,11 +223,11 @@ else if (step.expectToCreateNewBam())
222223
{
223224
getJob().getLogger().debug("moving original BAM: " + originalFile.getPath());
224225
FileUtils.moveFile(originalFile, finalDestination);
225-
File idxOrig = new File(originalFile.getPath() + ".bai");
226+
File idxOrig = SequenceAnalysisService.get().getExpectedBamOrCramIndex(originalFile);
226227
if (idxOrig.exists())
227228
{
228229
getJob().getLogger().debug("moving BAM index: " + idxOrig.getPath());
229-
FileUtils.moveFile(idxOrig, new File(finalDestination.getPath() + ".bai"));
230+
FileUtils.moveFile(idxOrig, SequenceAnalysisService.get().getExpectedBamOrCramIndex(finalDestination));
230231
}
231232
}
232233
}
@@ -242,7 +243,7 @@ else if (step.expectToCreateNewBam())
242243

243244
//then index
244245
getJob().setStatus(PipelineJob.TaskStatus.running, "INDEXING BAM");
245-
File finalIndexFile = new File(finalDestination.getPath() + ".bai");
246+
File finalIndexFile = SequenceAnalysisService.get().getExpectedBamOrCramIndex(finalDestination);
246247
if (!finalIndexFile.exists())
247248
{
248249
new BuildBamIndexWrapper(getJob().getLogger()).executeCommand(finalDestination);
@@ -295,7 +296,7 @@ else if (step.expectToCreateNewBam())
295296
getJob().getLogger().info("deleting original BAM: " + originalFile.getPath());
296297
originalFile.delete();
297298

298-
File indexFile = new File(originalFile.getPath() + ".bai");
299+
File indexFile = SequenceAnalysisService.get().getExpectedBamOrCramIndex(originalFile);
299300
if (indexFile.exists())
300301
{
301302
getJob().getLogger().info("BAM index exists, deleting");

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.labkey.api.query.InvalidKeyException;
1313
import org.labkey.api.query.QueryService;
1414
import org.labkey.api.query.QueryUpdateServiceException;
15+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
1516
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
1617
import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
1718
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
@@ -133,7 +134,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
133134
ctx.getLogger().info("Deleting original BAM: " + so.getFile().getPath());
134135
if (so.getFile().exists())
135136
{
136-
new File(so.getFile().getPath() + ".bai").delete();
137+
SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()).delete();
137138
so.getFile().delete();
138139
}
139140
else

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.labkey.api.pipeline.WorkDirectory;
4343
import org.labkey.api.pipeline.WorkDirectoryTask;
4444
import org.labkey.api.pipeline.file.FileAnalysisJobSupport;
45+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
4546
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
4647
import org.labkey.api.sequenceanalysis.model.ReadData;
4748
import org.labkey.api.sequenceanalysis.model.Readset;
@@ -713,7 +714,7 @@ private void alignSet(Readset rs, String basename, Map<ReadData, Pair<File, File
713714
getJob().getLogger().info("***Starting BAM Post processing");
714715
getJob().setStatus(PipelineJob.TaskStatus.running, "BAM POST-PROCESSING");
715716
getHelper().getFileManager().addIntermediateFile(bam);
716-
File idx = new File(bam.getPath() + ".bai");
717+
File idx = SequenceAnalysisService.get().getExpectedBamOrCramIndex(bam);
717718
if (idx.exists())
718719
{
719720
getHelper().getFileManager().addIntermediateFile(idx);
@@ -723,7 +724,7 @@ private void alignSet(Readset rs, String basename, Map<ReadData, Pair<File, File
723724
{
724725
getJob().getLogger().info("performing step: " + stepCtx.getProvider().getLabel());
725726
getJob().setStatus(PipelineJob.TaskStatus.running, "RUNNING: " + stepCtx.getProvider().getLabel().toUpperCase());
726-
getJob().getLogger().debug("BAM index exists: " + (new File(bam.getPath() + ".bai")).exists());
727+
getJob().getLogger().debug("BAM index exists: " + (SequenceAnalysisService.get().getExpectedBamOrCramIndex(bam)).exists());
727728

728729
RecordedAction action = new RecordedAction(stepCtx.getProvider().getLabel());
729730
Date start = new Date();
@@ -741,7 +742,7 @@ private void alignSet(Readset rs, String basename, Map<ReadData, Pair<File, File
741742
if (!bam.equals(output.getBAM()))
742743
{
743744
getHelper().getFileManager().addIntermediateFile(bam);
744-
getHelper().getFileManager().addIntermediateFile(new File(bam.getPath() + ".bai"));
745+
getHelper().getFileManager().addIntermediateFile(SequenceAnalysisService.get().getExpectedBamOrCramIndex(bam));
745746
}
746747

747748
bam = output.getBAM();
@@ -759,7 +760,7 @@ else if (step.expectToCreateNewBam())
759760
getJob().getLogger().info("no BAM created by step, using output from previous step");
760761
}
761762

762-
getJob().getLogger().debug("index exists: " + (new File(bam.getPath() + ".bai")).exists());
763+
getJob().getLogger().debug("index exists: " + (SequenceAnalysisService.get().getExpectedBamOrCramIndex(bam)).exists());
763764

764765
Date end = new Date();
765766
action.setEndTime(end);
@@ -854,8 +855,8 @@ else if (step.expectToCreateNewBam())
854855
}
855856
FileUtils.moveFile(bam, renamedBam);
856857

857-
File bamIdxOrig = new File(bam.getPath() + ".bai");
858-
File finalBamIdx = new File(renamedBam.getPath() + ".bai");
858+
File bamIdxOrig = SequenceAnalysisService.get().getExpectedBamOrCramIndex(bam);
859+
File finalBamIdx = SequenceAnalysisService.get().getExpectedBamOrCramIndex(renamedBam);
859860
if (finalBamIdx.exists())
860861
{
861862
getJob().getLogger().warn("unexpected file, deleting: " + finalBamIdx.getPath());
@@ -900,7 +901,7 @@ else if (step.expectToCreateNewBam())
900901
Date start = new Date();
901902
indexAction.setStartTime(start);
902903
getHelper().getFileManager().addInput(indexAction, "Input BAM", bam);
903-
File originalIndex = new File(bam.getPath() + ".bai");
904+
File originalIndex = SequenceAnalysisService.get().getExpectedBamOrCramIndex(bam);
904905
if (originalIndex.exists())
905906
{
906907
getJob().getLogger().debug("deleting existing BAM index: " + originalIndex.getName());
@@ -910,7 +911,7 @@ else if (step.expectToCreateNewBam())
910911
renamedBam = new File(bam.getParentFile(), basename + ".bam");
911912
getHelper().getFileManager().addInput(indexAction, FINAL_BAM_ROLE, renamedBam);
912913

913-
File bai = new File(renamedBam.getPath() + ".bai");
914+
File bai = SequenceAnalysisService.get().getExpectedBamOrCramIndex(renamedBam);
914915
if (bai.exists())
915916
{
916917
getJob().getLogger().debug("deleting existing BAM index: " + bai.getName());
@@ -950,7 +951,7 @@ else if (step.expectToCreateNewBam())
950951
RecordedAction metricsAction = null;
951952
boolean supportsMetrics = alignmentStep.supportsMetrics();
952953
SAMFileHeader.SortOrder so = SequencePipelineService.get().getBamSortOrder(renamedBam);
953-
File index = new File(renamedBam.getPath() + ".bai");
954+
File index = SequenceAnalysisService.get().getExpectedBamOrCramIndex(renamedBam);
954955
if (!supportsMetrics)
955956
{
956957
getPipelineJob().getLogger().debug("this aligner does not support collection of alignment metrics");
@@ -1123,14 +1124,14 @@ else if (step.expectToCreateNewBam())
11231124
}
11241125

11251126
final File finalBam = renamedBam;
1126-
final File finalBamIdx = new File(renamedBam.getPath() + ".bai");
1127+
final File finalBamIdx = SequenceAnalysisService.get().getExpectedBamOrCramIndex(renamedBam);
11271128
_resumer.getRecordedActions().forEach(r -> {
11281129
r.updateForMovedFile(finalBam, cramFile);
11291130
r.updateForMovedFile(finalBamIdx, cramFileIdx);
11301131
});
11311132

11321133
getHelper().getFileManager().addIntermediateFile(renamedBam);
1133-
getHelper().getFileManager().addIntermediateFile(new File(renamedBam.getPath() + ".bai"));
1134+
getHelper().getFileManager().addIntermediateFile(SequenceAnalysisService.get().getExpectedBamOrCramIndex(renamedBam));
11341135
}
11351136
}
11361137

@@ -1279,7 +1280,7 @@ private File doAlignThenMerge(ReferenceGenome referenceGenome, Readset rs, Map<R
12791280
bams.add(o);
12801281
getHelper().getFileManager().addInput(mergeAction, "Input BAM", o);
12811282
getHelper().getFileManager().addIntermediateFile(o);
1282-
getHelper().getFileManager().addIntermediateFile(new File(o.getPath() + ".bai"));
1283+
getHelper().getFileManager().addIntermediateFile(SequenceAnalysisService.get().getExpectedBamOrCramIndex(o));
12831284
}
12841285

12851286
bam = new File(alignOutputs.get(0).getParent(), FileUtil.getBaseName(alignOutputs.get(0).getName()) + ".merged.bam");
@@ -1394,7 +1395,7 @@ public File doAlignmentForSet(List<Pair<File, File>> inputFiles, ReferenceGenome
13941395
{
13951396
getJob().setStatus(PipelineJob.TaskStatus.running, "MERGING UNALIGNED READS INTO BAM" + msgSuffix);
13961397
getJob().getLogger().info("merging unaligned reads into BAM");
1397-
File idx = new File(alignmentOutput.getBAM().getPath() + ".bai");
1398+
File idx = SequenceAnalysisService.get().getExpectedBamOrCramIndex(alignmentOutput.getBAM());
13981399
if (idx.exists())
13991400
{
14001401
getJob().getLogger().debug("deleting index: " + idx.getPath());

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAnalysisTask.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.labkey.api.pipeline.WorkDirectoryTask;
3737
import org.labkey.api.pipeline.file.FileAnalysisJobSupport;
3838
import org.labkey.api.query.FieldKey;
39+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
3940
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
4041
import org.labkey.api.sequenceanalysis.model.AnalysisModel;
4142
import org.labkey.api.sequenceanalysis.model.Readset;
@@ -333,7 +334,7 @@ else if (d.getFile().exists())
333334
{
334335
getJob().getLogger().info("BAM will be discarded: " + bam.getName());
335336
bam.delete();
336-
File idx = new File(bam.getPath() + ".bai");
337+
File idx = SequenceAnalysisService.get().getExpectedBamOrCramIndex(bam);
337338
if (idx.exists())
338339
{
339340
idx.delete();

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/TaskFileManagerImpl.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.labkey.api.pipeline.WorkDirectory;
2222
import org.labkey.api.pipeline.file.FileAnalysisJobSupport;
2323
import org.labkey.api.reader.Readers;
24+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
2425
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
2526
import org.labkey.api.sequenceanalysis.pipeline.AbstractResumer;
2627
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepOutput;
@@ -784,7 +785,7 @@ public void deleteIntermediateFiles(@NotNull Collection<File> filesToRetain) thr
784785

785786
if (SequenceUtil.FILETYPE.bam.getFileType().isType(f))
786787
{
787-
File idx = new File(f.getPath() + ".bai");
788+
File idx = SequenceAnalysisService.get().getExpectedBamOrCramIndex(f);
788789
if (idx.exists())
789790
{
790791
_job.getLogger().debug("Also deleting index: " + idx.getPath());
@@ -911,6 +912,13 @@ public void cleanup(Collection<RecordedAction> actions, @Nullable AbstractResume
911912
//then sort out which files were specified as named outputs later
912913
for (File input : _wd.getDir().listFiles())
913914
{
915+
if (input.getName().matches("^core.[0-9]+$") || input.getName().endsWith(".hprof"))
916+
{
917+
_job.getLogger().debug("Deleting core/hprof file: " + input.getPath());
918+
input.delete();
919+
continue;
920+
}
921+
914922
copyFile(input, actions, resumer);
915923
}
916924
}

0 commit comments

Comments
 (0)