Skip to content

Commit 91076c9

Browse files
committed
Always use txt.gz file for SBT
1 parent 2adfa0e commit 91076c9

File tree

2 files changed

+11
-46
lines changed

2 files changed

+11
-46
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAlignmentAggregator.java

Lines changed: 6 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,19 @@
1717

1818
import au.com.bytecode.opencsv.CSVReader;
1919
import au.com.bytecode.opencsv.CSVWriter;
20-
import htsjdk.samtools.SAMFormatException;
2120
import htsjdk.samtools.SAMRecord;
22-
import htsjdk.samtools.SAMRecordIterator;
23-
import htsjdk.samtools.SamReader;
24-
import htsjdk.samtools.SamReaderFactory;
25-
import htsjdk.samtools.ValidationStringency;
2621
import htsjdk.samtools.fastq.FastqReader;
2722
import htsjdk.samtools.fastq.FastqRecord;
2823
import htsjdk.samtools.fastq.FastqWriter;
2924
import htsjdk.samtools.fastq.FastqWriterFactory;
3025
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
3126
import htsjdk.samtools.reference.ReferenceSequence;
27+
import htsjdk.samtools.util.IOUtil;
3228
import htsjdk.samtools.util.Interval;
3329
import htsjdk.samtools.util.IntervalList;
3430
import org.apache.commons.io.FileUtils;
3531
import org.apache.commons.lang3.StringUtils;
3632
import org.apache.logging.log4j.Logger;
37-
import org.apache.logging.log4j.LogManager;
3833
import org.labkey.api.data.Container;
3934
import org.labkey.api.data.DbScope;
4035
import org.labkey.api.data.SimpleFilter;
@@ -46,29 +41,19 @@
4641
import org.labkey.api.reader.Readers;
4742
import org.labkey.api.security.User;
4843
import org.labkey.api.sequenceanalysis.model.AnalysisModel;
49-
import org.labkey.api.util.FileType;
5044
import org.labkey.api.util.Pair;
51-
import org.labkey.api.util.StringUtilsLabKey;
5245
import org.labkey.api.writer.PrintWriters;
5346
import org.labkey.sequenceanalysis.SequenceAnalysisSchema;
54-
import org.labkey.sequenceanalysis.api.picard.CigarPositionIterable;
5547
import org.labkey.sequenceanalysis.run.alignment.FastqCollapser;
5648
import org.labkey.sequenceanalysis.run.util.FlashWrapper;
5749
import org.labkey.sequenceanalysis.run.util.NTSnp;
5850
import org.labkey.sequenceanalysis.util.ReferenceLibraryHelperImpl;
5951
import org.labkey.sequenceanalysis.util.SequenceUtil;
6052

6153
import java.io.BufferedReader;
62-
import java.io.BufferedWriter;
6354
import java.io.File;
64-
import java.io.FileInputStream;
65-
import java.io.FileOutputStream;
6655
import java.io.IOException;
67-
import java.io.InputStreamReader;
68-
import java.io.OutputStream;
69-
import java.io.OutputStreamWriter;
7056
import java.io.PrintWriter;
71-
import java.nio.charset.StandardCharsets;
7257
import java.util.ArrayList;
7358
import java.util.Arrays;
7459
import java.util.Collection;
@@ -81,7 +66,6 @@
8166
import java.util.Map;
8267
import java.util.Set;
8368
import java.util.TreeSet;
84-
import java.util.zip.GZIPOutputStream;
8569

8670
/**
8771
* User: bimber
@@ -303,22 +287,9 @@ public String getKey(SAMRecord record)
303287
;
304288
}
305289

306-
public OutputStream getLogOutputStream(File outputLog) throws IOException
307-
{
308-
FileType gz = new FileType(".gz");
309-
if (gz.isType(outputLog))
310-
{
311-
return new GZIPOutputStream(new FileOutputStream(outputLog));
312-
}
313-
else
314-
{
315-
return new FileOutputStream(outputLog);
316-
}
317-
}
318-
319290
public Map<String, HitSet> getAlignmentSummary(File outputLog) throws IOException, PipelineJobException
320291
{
321-
try (CSVWriter writer = outputLog == null ? null : new CSVWriter(new BufferedWriter(new OutputStreamWriter(getLogOutputStream(outputLog), StandardCharsets.UTF_8)), '\t', CSVWriter.NO_QUOTE_CHARACTER))
292+
try (CSVWriter writer = outputLog == null ? null : new CSVWriter(IOUtil.openFileForBufferedUtf8Writing(outputLog), '\t', CSVWriter.NO_QUOTE_CHARACTER))
322293
{
323294
//these are stage-1 filters, filtering on the read-pair level
324295
Map<String, HitSet> totals = doFilterStage1(writer);
@@ -899,7 +870,7 @@ private Map<String, HitSet> doFilterStage4(CSVWriter writer, Map<String, HitSet>
899870
return stage4Totals;
900871
}
901872

902-
private class HitSet
873+
private static class HitSet
903874
{
904875
public Set<String> readNames = new HashSet<>();
905876
public Set<String> refNames = new TreeSet<>();
@@ -1047,7 +1018,7 @@ public void writeOutput(User u, Container c, AnalysisModel model)
10471018

10481019
public static void processSBTSummary(User u, Container c, AnalysisModel model, File output, File refFasta, Logger log) throws PipelineJobException
10491020
{
1050-
try (CSVReader reader = new CSVReader(new BufferedReader(new InputStreamReader(new FileInputStream(output), StandardCharsets.UTF_8)), '\t', CSVWriter.DEFAULT_QUOTE_CHARACTER))
1021+
try (CSVReader reader = new CSVReader(IOUtil.openFileForBufferedUtf8Reading(output), '\t', CSVWriter.DEFAULT_QUOTE_CHARACTER))
10511022
{
10521023
try (DbScope.Transaction transaction = ExperimentService.get().ensureTransaction())
10531024
{
@@ -1117,7 +1088,7 @@ public static void processSBTSummary(User u, Container c, AnalysisModel model, F
11171088

11181089
public void writeTable(File output) throws PipelineJobException
11191090
{
1120-
try (CSVWriter writer = new CSVWriter(PrintWriters.getPrintWriter(output), '\t'))
1091+
try (CSVWriter writer = new CSVWriter(IOUtil.openFileForBufferedUtf8Writing(output), '\t'))
11211092
{
11221093
Map<String, HitSet> map = writeSummary();
11231094

@@ -1326,7 +1297,7 @@ else if (f.getName().contains("_2"))
13261297

13271298
//rename reads to make it easier to combine later
13281299
File renamed = new File(outDir, basename + ".collapsed.tmp.fasta");
1329-
try (BufferedReader reader = Readers.getReader(collapsed);PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(renamed), StringUtilsLabKey.DEFAULT_CHARSET))))
1300+
try (BufferedReader reader = Readers.getReader(collapsed);PrintWriter writer = new PrintWriter(IOUtil.openFileForBufferedUtf8Writing(renamed)))
13301301
{
13311302
String line;
13321303
while ((line = reader.readLine()) != null)

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAnalysis.java

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -204,19 +204,13 @@ public void exec(ResultSet rs) throws SQLException
204204
@Override
205205
public Output performAnalysisPerSampleLocal(AnalysisModel model, File inputBam, File referenceFasta, File outDir) throws PipelineJobException
206206
{
207-
File expectedTxt = getSBTSummaryFile(outDir, inputBam, false);
207+
File expectedTxt = getSBTSummaryFile(outDir, inputBam);
208208
if (expectedTxt.exists())
209209
{
210210
getPipelineCtx().getLogger().info("Processing SBT output: " + expectedTxt.getPath());
211211

212212
SequenceBasedTypingAlignmentAggregator.processSBTSummary(getPipelineCtx().getJob().getUser(), getPipelineCtx().getJob().getContainer(), model, expectedTxt, referenceFasta, getPipelineCtx().getLogger());
213213

214-
File compressed = Compress.compressGzip(expectedTxt);
215-
if (compressed.exists() && expectedTxt.exists())
216-
{
217-
expectedTxt.delete();
218-
}
219-
220214
// Perform second pass to collapse groups:
221215
new AlignmentGroupCompare(model.getAnalysisId(), getPipelineCtx().getJob().getContainer(), getPipelineCtx().getJob().getUser()).collapseGroups(getPipelineCtx().getLogger(), getPipelineCtx().getJob().getUser());
222216
}
@@ -310,10 +304,10 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
310304
getPipelineCtx().getLogger().info("Inspection complete");
311305

312306
//write output as TSV
313-
agg.writeTable(getSBTSummaryFile(outputDir, inputBam, false));
307+
agg.writeTable(getSBTSummaryFile(outputDir, inputBam));
314308

315309
// This will be gzipped later:
316-
output.addSequenceOutput(getSBTSummaryFile(outputDir, inputBam, true), "SBT Results: " + inputBam.getName(), "SBT Results", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null);
310+
output.addSequenceOutput(getSBTSummaryFile(outputDir, inputBam), "SBT Results: " + inputBam.getName(), "SBT Results", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null);
317311

318312
//optionally output FASTQ of unmapped reads
319313
Double exportThreshold = getProvider().getParameterByName(EXPORT_UNMAPPED).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class);
@@ -376,9 +370,9 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
376370
}
377371
}
378372

379-
protected File getSBTSummaryFile(File outputDir, File bam, boolean doGzip)
373+
protected File getSBTSummaryFile(File outputDir, File bam)
380374
{
381-
return new File(outputDir, FileUtil.getBaseName(bam) + ".sbt_hits.txt" + (doGzip ? ".gz": ""));
375+
return new File(outputDir, FileUtil.getBaseName(bam) + ".sbt_hits.txt.gz");
382376
}
383377

384378
public static class AlignmentGroupCompare

0 commit comments

Comments
 (0)