@@ -250,8 +250,15 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
250250 Collections .sort (whitelistSites );
251251
252252 ctx .getLogger ().info ("Pass 2: establish alleles per site" );
253+ int fileNo = 0 ;
253254 for (SequenceOutputFile so : inputFiles )
254255 {
256+ fileNo ++;
257+ if (fileNo % 100 == 0 )
258+ {
259+ ctx .getLogger ().info ("Processed " + fileNo + " files" );
260+ }
261+
255262 try (VCFFileReader reader = new VCFFileReader (so .getFile ()))
256263 {
257264 for (Pair <String , Integer > site : whitelistSites )
@@ -289,7 +296,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
289296 SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor .extractDictionary (genome .getSequenceDictionary ().toPath ());
290297 Map <String , Integer > contigToOffset = getContigToOffset (dict );
291298
292- ctx .getLogger ().info ("Building merged table" );
299+ ctx .getLogger ().info ("Pass 3: Building merged table" );
293300
294301 File output = new File (ctx .getOutputDir (), basename + "txt" );
295302 int idx = 0 ;
@@ -328,7 +335,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
328335 if (!it .hasNext ())
329336 {
330337 //No variant was called, so this is either considered all WT, or no-call
331- int depth = getReadDepth (so .getFile (), contigToOffset , site .getLeft (), site .getRight ());
338+ int depth = getReadDepth (so .getFile (), contigToOffset , site .getLeft (), site .getRight (), ctx );
332339 if (depth < minDepth )
333340 {
334341 line .add (String .valueOf (depth ));
@@ -539,17 +546,17 @@ private Map<String, Integer> getContigToOffset(SAMSequenceDictionary dict)
539546 return ret ;
540547 }
541548
542- private int getReadDepth (File vcf , Map <String , Integer > contigToOffset , String contig , int position1 ) throws PipelineJobException
549+ private int getReadDepth (File vcf , Map <String , Integer > contigToOffset , String contig , int position1 , JobContext ctx ) throws PipelineJobException
543550 {
544551 File gatkDepth = new File (vcf .getParentFile (), vcf .getName ().replaceAll (".all.vcf.gz" , ".coverage" ));
545552 if (!gatkDepth .exists ())
546553 {
547554 throw new PipelineJobException ("File not found: " + gatkDepth .getPath ());
548555 }
549556
557+ int lineNo = contigToOffset .get (contig ) + position1 ;
550558 try (Stream <String > lines = Files .lines (gatkDepth .toPath ()))
551559 {
552- int lineNo = contigToOffset .get (contig ) + position1 ;
553560 String [] line = lines .skip (lineNo - 1 ).findFirst ().get ().split ("\t " );
554561
555562 if (!line [0 ].equals (contig + ":" + position1 ))
@@ -561,6 +568,7 @@ private int getReadDepth(File vcf, Map<String, Integer> contigToOffset, String c
561568 }
562569 catch (IOException e )
563570 {
571+ ctx .getLogger ().error ("Error parsing GATK depth: " + vcf .getName () + " / " + gatkDepth .getPath () + " / " + lineNo );
564572 throw new PipelineJobException (e );
565573 }
566574 }
0 commit comments