22
33import htsjdk .samtools .util .Interval ;
44import org .apache .commons .lang3 .StringUtils ;
5- import org .apache .logging .log4j .Logger ;
65import org .jetbrains .annotations .NotNull ;
76import org .labkey .api .pipeline .AbstractTaskFactory ;
87import org .labkey .api .pipeline .AbstractTaskFactorySettings ;
1514import org .labkey .api .sequenceanalysis .SequenceOutputFile ;
1615import org .labkey .api .sequenceanalysis .pipeline .ReferenceGenome ;
1716import org .labkey .api .sequenceanalysis .pipeline .SequenceOutputHandler ;
18- import org .labkey .api .sequenceanalysis .run . AbstractDiscvrSeqWrapper ;
17+ import org .labkey .api .sequenceanalysis .pipeline . VariantProcessingStep ;
1918import org .labkey .api .util .FileType ;
20- import org .labkey .api .writer .PrintWriters ;
2119import org .labkey .sequenceanalysis .run .variant .OutputVariantsStartingInIntervalsStep ;
2220
2321import java .io .File ;
2422import java .io .IOException ;
25- import java .io .PrintWriter ;
2623import java .util .ArrayList ;
2724import java .util .Collections ;
2825import java .util .HashSet ;
@@ -113,6 +110,7 @@ private VariantProcessingJob getPipelineJob()
113110 SequenceTaskHelper .logModuleVersions (getJob ().getLogger ());
114111 RecordedAction action = new RecordedAction (ACTION_NAME );
115112 TaskFileManagerImpl manager = new TaskFileManagerImpl (getPipelineJob (), _wd .getDir (), _wd );
113+ JobContextImpl ctx = new JobContextImpl (getPipelineJob (), getPipelineJob ().getSequenceSupport (), getPipelineJob ().getParameterJson (), _wd .getDir (), new TaskFileManagerImpl (getPipelineJob (), _wd .getDir (), _wd ), _wd );
116114
117115 File finalOut ;
118116 SequenceOutputHandler <SequenceOutputHandler .SequenceOutputProcessor > handler = getPipelineJob ().getHandler ();
@@ -122,7 +120,7 @@ private VariantProcessingJob getPipelineJob()
122120 }
123121 else
124122 {
125- finalOut = runDefaultVariantMerge (manager , action , handler );
123+ finalOut = runDefaultVariantMerge (ctx , manager , action , handler );
126124 }
127125
128126 Map <String , File > scatterOutputs = getPipelineJob ().getScatterJobOutputs ();
@@ -154,7 +152,7 @@ private VariantProcessingJob getPipelineJob()
154152 return new RecordedActionSet (action );
155153 }
156154
157- private File runDefaultVariantMerge (TaskFileManagerImpl manager , RecordedAction action , SequenceOutputHandler <SequenceOutputHandler .SequenceOutputProcessor > handler ) throws PipelineJobException
155+ private File runDefaultVariantMerge (JobContextImpl ctx , TaskFileManagerImpl manager , RecordedAction action , SequenceOutputHandler <SequenceOutputHandler .SequenceOutputProcessor > handler ) throws PipelineJobException
158156 {
159157 Map <String , List <Interval >> jobToIntervalMap = getPipelineJob ().getJobToIntervalMap ();
160158 getJob ().setStatus (PipelineJob .TaskStatus .running , "Combining Per-Contig VCFs: " + jobToIntervalMap .size ());
@@ -209,6 +207,15 @@ private File runDefaultVariantMerge(TaskFileManagerImpl manager, RecordedAction
209207 manager .addIntermediateFile (new File (vcf .getPath () + ".tbi" ));
210208 }
211209
210+ Set <Integer > genomeIds = new HashSet <>();
211+ getPipelineJob ().getFiles ().forEach (x -> genomeIds .add (x .getLibrary_id ()));
212+ if (genomeIds .size () != 1 )
213+ {
214+ throw new PipelineJobException ("Expected a single genome, found: " + StringUtils .join (genomeIds , ", " ));
215+ }
216+
217+ ReferenceGenome genome = getPipelineJob ().getSequenceSupport ().getCachedGenome (genomeIds .iterator ().next ());
218+
212219 String basename = SequenceAnalysisService .get ().getUnzippedBaseName (toConcat .get (0 ).getName ());
213220 File combined = new File (getPipelineJob ().getAnalysisDirectory (), basename + ".vcf.gz" );
214221 File combinedIdx = new File (combined .getPath () + ".tbi" );
@@ -223,18 +230,15 @@ private File runDefaultVariantMerge(TaskFileManagerImpl manager, RecordedAction
223230 throw new PipelineJobException ("Missing one of more VCFs: " + missing .stream ().map (File ::getPath ).collect (Collectors .joining ("," )));
224231 }
225232
226- Set <Integer > genomeIds = new HashSet <>();
227- getPipelineJob ().getFiles ().forEach (x -> genomeIds .add (x .getLibrary_id ()));
228- if (genomeIds .size () != 1 )
229- {
230- throw new PipelineJobException ("Expected a single genome, found: " + StringUtils .join (genomeIds , ", " ));
231- }
232-
233- ReferenceGenome genome = getPipelineJob ().getSequenceSupport ().getCachedGenome (genomeIds .iterator ().next ());
234233 combined = SequenceAnalysisService .get ().combineVcfs (toConcat , combined , genome , getJob ().getLogger (), true , null );
235234 }
236235 manager .addOutput (action , "Merged VCF" , combined );
237236
237+ if (handler instanceof VariantProcessingStep .SupportsScatterGather )
238+ {
239+ ((VariantProcessingStep .SupportsScatterGather ) handler ).performAdditionalMergeTasks (ctx , getPipelineJob (), manager , genome , toConcat );
240+ }
241+
238242 return combined ;
239243 }
240244}
0 commit comments