Skip to content

Commit c9ed9fe

Browse files
committed
More rigorous cleanup of BAMs
1 parent 4ec5fba commit c9ed9fe

File tree

5 files changed

+58
-35
lines changed

5 files changed

+58
-35
lines changed

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/DefaultPipelineStepOutput.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,26 +101,31 @@ public List<SequenceOutput> getSequenceOutputs()
101101
return Collections.unmodifiableList(_sequenceOutputs);
102102
}
103103

104+
@Override
104105
public void addSequenceOutput(File file, String label, String category, @Nullable Integer readsetId, @Nullable Integer analysisId, @Nullable Integer genomeId, @Nullable String description)
105106
{
106107
_sequenceOutputs.add(new SequenceOutput(file, label, category, readsetId, analysisId, genomeId, description));
107108
}
108109

110+
@Override
109111
public void addInput(File input, String role)
110112
{
111113
_inputs.add(Pair.of(input, role));
112114
}
113115

116+
@Override
114117
public void addOutput(File output, String role)
115118
{
116119
_outputs.add(Pair.of(output, role));
117120
}
118121

122+
@Override
119123
public void addIntermediateFile(File file)
120124
{
121125
addIntermediateFile(file, null);
122126
}
123127

128+
@Override
124129
public void addIntermediateFile(File file, String role)
125130
{
126131
if (role != null)
@@ -136,6 +141,11 @@ public void addPicardMetricsFile(Readset rs, File metricFile, File inputFile)
136141

137142
public void addPicardMetricsFile(Readset rs, File metricFile, PicardMetricsOutput.TYPE type)
138143
{
144+
if (!metricFile.exists())
145+
{
146+
throw new IllegalArgumentException("File does not exist: " + metricFile.getPath());
147+
}
148+
139149
_picardMetricsFiles.add(new PipelineStepOutput.PicardMetricsOutput(metricFile, type, rs.getRowId()));
140150
}
141151

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/TaskFileManagerImpl.java

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepOutput;
2525
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
2626
import org.labkey.api.sequenceanalysis.pipeline.TaskFileManager;
27-
import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
2827
import org.labkey.api.util.Compress;
2928
import org.labkey.api.util.DebugInfoDumper;
3029
import org.labkey.api.util.FileType;
@@ -33,6 +32,7 @@
3332
import org.labkey.api.writer.PrintWriters;
3433
import org.labkey.sequenceanalysis.SequenceAnalysisManager;
3534
import org.labkey.sequenceanalysis.SequenceAnalysisSchema;
35+
import org.labkey.sequenceanalysis.util.SequenceUtil;
3636

3737
import java.io.BufferedReader;
3838
import java.io.BufferedWriter;
@@ -734,6 +734,34 @@ public void deleteIntermediateFiles() throws PipelineJobException
734734
else
735735
{
736736
f.delete();
737+
738+
if (SequenceUtil.FILETYPE.bam.getFileType().isType(f))
739+
{
740+
File idx = new File(f.getPath() + ".bai");
741+
if (idx.exists())
742+
{
743+
_job.getLogger().debug("Also deleting index: " + idx.getPath());
744+
idx.delete();
745+
}
746+
}
747+
else if (SequenceUtil.FILETYPE.vcf.getFileType().isType(f))
748+
{
749+
File idx = new File(f.getPath() + ".tbi");
750+
if (idx.exists())
751+
{
752+
_job.getLogger().debug("Also deleting index: " + idx.getPath());
753+
idx.delete();
754+
}
755+
}
756+
else if (SequenceUtil.FILETYPE.bed.getFileType().isType(f))
757+
{
758+
File idx = new File(f.getPath() + ".idx");
759+
if (idx.exists())
760+
{
761+
_job.getLogger().debug("Also deleting index: " + idx.getPath());
762+
idx.delete();
763+
}
764+
}
737765
}
738766

739767
if (f.exists())

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/MergeLoFreqVcfHandler.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -535,9 +535,9 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
535535
}
536536
}
537537

538-
if (idx % 100000 == 0)
538+
if (idx % 250000 == 0)
539539
{
540-
ctx.getLogger().info("Total sites written: " + idx);
540+
ctx.getLogger().info("Total lines written: " + idx);
541541
}
542542
}
543543
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/MarkDuplicatesStep.java

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,29 +55,33 @@ public Output processBam(Readset rs, File inputBam, ReferenceGenome referenceGen
5555
File outputBam = new File(outputDirectory, FileUtil.getBaseName(inputBam) + ".markduplicates.bam");
5656
output.addIntermediateFile(outputBam);
5757

58-
File sortedBam = new File(outputDirectory, FileUtil.getBaseName(inputBam) + ".sorted.bam");
59-
boolean sortedPreexisting = sortedBam.exists();
60-
6158
output.setBAM(getWrapper().executeCommand(inputBam, outputBam, getClientCommandArgs("=")));
59+
addStepOutputs(getWrapper(), rs, inputBam, outputDirectory, output);
6260

61+
return output;
62+
}
6363

64-
if (sortedBam.exists() && !sortedPreexisting)
64+
public static void addStepOutputs(MarkDuplicatesWrapper wrapper, Readset rs, File inputBam, File outputDirectory, BamProcessingOutputImpl output)
65+
{
66+
//Note:
67+
File sortedBam = new File(outputDirectory, FileUtil.getBaseName(inputBam) + ".sorted.bam");
68+
if (sortedBam.exists() && !inputBam.equals(sortedBam))
6569
{
70+
wrapper.getLogger().debug("Adding sorted BAM as intermediate file: " + sortedBam.getPath());
6671
output.addIntermediateFile(sortedBam);
72+
output.addIntermediateFile(new File(sortedBam.getPath() + ".bai"));
6773
}
6874

6975
//NOTE: depending on whether the BAM is sorted by the wrapper, the metrics file name will differ
70-
if (getWrapper().getMetricsFile(sortedBam).exists())
76+
if (wrapper.getMetricsFile(sortedBam).exists())
7177
{
72-
output.addPicardMetricsFile(rs, getWrapper().getMetricsFile(sortedBam), PipelineStepOutput.PicardMetricsOutput.TYPE.bam);
73-
output.addOutput(getWrapper().getMetricsFile(sortedBam), "MarkDuplicateMetrics");
78+
output.addPicardMetricsFile(rs, wrapper.getMetricsFile(sortedBam), PipelineStepOutput.PicardMetricsOutput.TYPE.bam);
79+
output.addOutput(wrapper.getMetricsFile(sortedBam), "MarkDuplicateMetrics");
7480
}
75-
else if (getWrapper().getMetricsFile(inputBam).exists())
81+
else if (wrapper.getMetricsFile(inputBam).exists())
7682
{
77-
output.addPicardMetricsFile(rs, getWrapper().getMetricsFile(inputBam), PipelineStepOutput.PicardMetricsOutput.TYPE.bam);
78-
output.addOutput(getWrapper().getMetricsFile(inputBam), "MarkDuplicateMetrics");
83+
output.addPicardMetricsFile(rs, wrapper.getMetricsFile(inputBam), PipelineStepOutput.PicardMetricsOutput.TYPE.bam);
84+
output.addOutput(wrapper.getMetricsFile(inputBam), "MarkDuplicateMetrics");
7985
}
80-
81-
return output;
8286
}
8387
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/MarkDuplicatesWithMateCigarStep.java

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -56,27 +56,8 @@ public Output processBam(Readset rs, File inputBam, ReferenceGenome referenceGen
5656
File outputBam = new File(outputDirectory, FileUtil.getBaseName(inputBam) + ".markduplicateswithmatecigar.bam");
5757
output.addIntermediateFile(outputBam);
5858

59-
File sortedBam = new File(outputDirectory, FileUtil.getBaseName(inputBam) + ".sorted.bam");
60-
boolean sortedPreexisting = sortedBam.exists();
61-
6259
output.setBAM(getWrapper().executeCommand(inputBam, outputBam, getClientCommandArgs("=")));
63-
64-
if (sortedBam.exists() && !sortedPreexisting)
65-
{
66-
output.addIntermediateFile(sortedBam);
67-
}
68-
69-
//NOTE: depending on whether the BAM is sorted by the wrapper, the metrics file name will differ
70-
if (getWrapper().getMetricsFile(sortedBam).exists())
71-
{
72-
output.addPicardMetricsFile(rs, getWrapper().getMetricsFile(sortedBam), PipelineStepOutput.PicardMetricsOutput.TYPE.bam);
73-
output.addOutput(getWrapper().getMetricsFile(sortedBam), "MarkDuplicateMetrics");
74-
}
75-
else if (getWrapper().getMetricsFile(inputBam).exists())
76-
{
77-
output.addPicardMetricsFile(rs, getWrapper().getMetricsFile(inputBam), PipelineStepOutput.PicardMetricsOutput.TYPE.bam);
78-
output.addOutput(getWrapper().getMetricsFile(inputBam), "MarkDuplicateMetrics");
79-
}
60+
MarkDuplicatesStep.addStepOutputs(getWrapper(), rs, inputBam, outputDirectory, output);
8061

8162
return output;
8263
}

0 commit comments

Comments
 (0)