Skip to content

Commit 26399c4

Browse files
committed
Write gzipped output
1 parent 705e0a5 commit 26399c4

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/MergeLoFreqVcfHandler.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import htsjdk.samtools.SAMSequenceDictionary;
55
import htsjdk.samtools.SAMSequenceRecord;
66
import htsjdk.samtools.util.CloseableIterator;
7-
import htsjdk.samtools.util.IOUtil;
87
import htsjdk.variant.utils.SAMSequenceDictionaryExtractor;
98
import htsjdk.variant.variantcontext.Allele;
109
import htsjdk.variant.variantcontext.VariantContext;
@@ -26,8 +25,12 @@
2625
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
2726
import org.labkey.sequenceanalysis.util.SequenceUtil;
2827

28+
import java.io.BufferedWriter;
2929
import java.io.File;
30+
import java.io.FileOutputStream;
3031
import java.io.IOException;
32+
import java.io.OutputStreamWriter;
33+
import java.nio.charset.Charset;
3134
import java.nio.file.Files;
3235
import java.util.ArrayList;
3336
import java.util.Arrays;
@@ -40,6 +43,7 @@
4043
import java.util.Set;
4144
import java.util.stream.Collectors;
4245
import java.util.stream.Stream;
46+
import java.util.zip.GZIPOutputStream;
4347

4448
public class MergeLoFreqVcfHandler extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
4549
{
@@ -206,7 +210,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
206210
for (SequenceOutputFile so : inputFiles)
207211
{
208212
//This will error if the coverage file is not found. Perform check now to fail fast
209-
getDepthFile();
213+
getDepthFile(so.getFile());
210214

211215
if (so.getLibrary_id() == null)
212216
{
@@ -232,7 +236,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
232236
//Also perform santity check of VCF early
233237
if (vc.getAttribute("GATK_DP") == null)
234238
{
235-
throw new PipelineJobException("Expected GATK_DP annotation on line " + key + " in file: " + so.getFile().getPath());
239+
throw new PipelineJobException("Expected GATK_DP annotation on line " + getCacheKey(vc.getContig(), vc.getStart()) + " in file: " + so.getFile().getPath());
236240
}
237241

238242
double af = vc.getAttributeAsDouble("AF", 0.0);
@@ -307,9 +311,9 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
307311

308312
ctx.getLogger().info("Pass 3: Building merged table");
309313

310-
File output = new File(ctx.getOutputDir(), basename + "txt");
314+
File output = new File(ctx.getOutputDir(), basename + "txt.gz");
311315
int idx = 0;
312-
try (CSVWriter writer = new CSVWriter(IOUtil.openFileForBufferedUtf8Writing(output), '\t', CSVWriter.NO_QUOTE_CHARACTER))
316+
try (CSVWriter writer = new CSVWriter(new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(output)), Charset.forName("UTF-8"))), '\t', CSVWriter.NO_QUOTE_CHARACTER))
313317
{
314318
writer.writeNext(new String[]{"ReadsetName", "OutputFileId", "ReadsetId", "Contig", "Start", "End", "Ref", "AltAlleles", "GatkDepth", "LoFreqDepth", "RefAF", "AltAFs", "NonRefCount", "AltCounts"});
315319

0 commit comments

Comments
 (0)