44import htsjdk .samtools .SAMSequenceDictionary ;
55import htsjdk .samtools .SAMSequenceRecord ;
66import htsjdk .samtools .util .CloseableIterator ;
7- import htsjdk .samtools .util .IOUtil ;
87import htsjdk .variant .utils .SAMSequenceDictionaryExtractor ;
98import htsjdk .variant .variantcontext .Allele ;
109import htsjdk .variant .variantcontext .VariantContext ;
2625import org .labkey .sequenceanalysis .SequenceAnalysisModule ;
2726import org .labkey .sequenceanalysis .util .SequenceUtil ;
2827
28+ import java .io .BufferedWriter ;
2929import java .io .File ;
30+ import java .io .FileOutputStream ;
3031import java .io .IOException ;
32+ import java .io .OutputStreamWriter ;
33+ import java .nio .charset .Charset ;
3134import java .nio .file .Files ;
3235import java .util .ArrayList ;
3336import java .util .Arrays ;
4043import java .util .Set ;
4144import java .util .stream .Collectors ;
4245import java .util .stream .Stream ;
46+ import java .util .zip .GZIPOutputStream ;
4347
4448public class MergeLoFreqVcfHandler extends AbstractParameterizedOutputHandler <SequenceOutputHandler .SequenceOutputProcessor >
4549{
@@ -206,7 +210,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
206210 for (SequenceOutputFile so : inputFiles )
207211 {
208212 //This will error if the coverage file is not found. Perform check now to fail fast
209- getDepthFile ();
213+ getDepthFile (so . getFile () );
210214
211215 if (so .getLibrary_id () == null )
212216 {
@@ -232,7 +236,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
232236 //Also perform santity check of VCF early
233237 if (vc .getAttribute ("GATK_DP" ) == null )
234238 {
235- throw new PipelineJobException ("Expected GATK_DP annotation on line " + key + " in file: " + so .getFile ().getPath ());
239+ throw new PipelineJobException ("Expected GATK_DP annotation on line " + getCacheKey ( vc . getContig (), vc . getStart ()) + " in file: " + so .getFile ().getPath ());
236240 }
237241
238242 double af = vc .getAttributeAsDouble ("AF" , 0.0 );
@@ -307,9 +311,9 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
307311
308312 ctx .getLogger ().info ("Pass 3: Building merged table" );
309313
310- File output = new File (ctx .getOutputDir (), basename + "txt" );
314+ File output = new File (ctx .getOutputDir (), basename + "txt.gz " );
311315 int idx = 0 ;
312- try (CSVWriter writer = new CSVWriter (IOUtil . openFileForBufferedUtf8Writing ( output ), '\t' , CSVWriter .NO_QUOTE_CHARACTER ))
316+ try (CSVWriter writer = new CSVWriter (new BufferedWriter ( new OutputStreamWriter ( new GZIPOutputStream ( new FileOutputStream ( output )), Charset . forName ( "UTF-8" )) ), '\t' , CSVWriter .NO_QUOTE_CHARACTER ))
313317 {
314318 writer .writeNext (new String []{"ReadsetName" , "OutputFileId" , "ReadsetId" , "Contig" , "Start" , "End" , "Ref" , "AltAlleles" , "GatkDepth" , "LoFreqDepth" , "RefAF" , "AltAFs" , "NonRefCount" , "AltCounts" });
315319
0 commit comments