33import htsjdk .samtools .util .IOUtil ;
44import org .apache .commons .io .FileUtils ;
55import org .apache .commons .lang3 .StringUtils ;
6+ import org .apache .logging .log4j .Logger ;
67import org .json .JSONObject ;
78import org .labkey .api .module .ModuleLoader ;
89import org .labkey .api .pipeline .PipelineJob ;
1617import org .labkey .api .sequenceanalysis .pipeline .SequenceOutputHandler ;
1718import org .labkey .api .sequenceanalysis .pipeline .SequencePipelineService ;
1819import org .labkey .api .sequenceanalysis .pipeline .ToolParameterDescriptor ;
20+ import org .labkey .api .sequenceanalysis .run .AbstractGatk4Wrapper ;
1921import org .labkey .api .sequenceanalysis .run .SimpleScriptWrapper ;
2022import org .labkey .api .util .PageFlowUtil ;
2123import org .labkey .api .writer .PrintWriters ;
@@ -305,18 +307,8 @@ else if (outFiles.length > 1)
305307 throw new PipelineJobException ("Unable to find cellsnp calls VCF" );
306308 }
307309
308- try
309- {
310- SequencePipelineService .get ().sortVcf (cellSnpBaseVcf , null , genome .getSequenceDictionary (), ctx .getLogger ());
311- SequenceAnalysisService .get ().ensureVcfIndex (cellSnpBaseVcf , ctx .getLogger ());
312-
313- SequencePipelineService .get ().sortVcf (cellSnpCellsVcf , null , genome .getSequenceDictionary (), ctx .getLogger ());
314- SequenceAnalysisService .get ().ensureVcfIndex (cellSnpCellsVcf , ctx .getLogger ());
315- }
316- catch (IOException e )
317- {
318- throw new PipelineJobException (e );
319- }
310+ sortAndFixVcf (cellSnpBaseVcf , genome , ctx .getLogger ());
311+ sortAndFixVcf (cellSnpCellsVcf , genome , ctx .getLogger ());
320312
321313 if (storeCellSnpVcf )
322314 {
@@ -336,5 +328,65 @@ else if (outFiles.length > 1)
336328 ctx .addSequenceOutput (so );
337329 }
338330 }
331+
332+ private void sortAndFixVcf (File vcf , ReferenceGenome genome , Logger log ) throws PipelineJobException
333+ {
334+ // NOTE: this is required since cellsnp-lite creates a non-compliant header dictionary
335+ new UpdateVCFSequenceDictionary (log ).execute (vcf , genome .getSequenceDictionary ());
336+
337+ try
338+ {
339+ SequencePipelineService .get ().sortVcf (vcf , null , genome .getSequenceDictionary (), log );
340+ SequenceAnalysisService .get ().ensureVcfIndex (vcf , log );
341+ }
342+ catch (IOException e )
343+ {
344+ throw new PipelineJobException (e );
345+ }
346+ }
347+
348+ public static class UpdateVCFSequenceDictionary extends AbstractGatk4Wrapper
349+ {
350+ public UpdateVCFSequenceDictionary (Logger log )
351+ {
352+ super (log );
353+ }
354+
355+ public void execute (File vcf , File dict ) throws PipelineJobException
356+ {
357+ List <String > args = new ArrayList <>(getBaseArgs ("UpdateVCFSequenceDictionary" ));
358+ args .add ("-V" );
359+ args .add (vcf .getPath ());
360+
361+ args .add ("--source-dictionary" );
362+ args .add (dict .getPath ());
363+
364+ args .add ("--replace" );
365+ args .add ("true" );
366+
367+ File output = new File (vcf .getParentFile (), "tmp.vcf.gz" );
368+ args .add ("-O" );
369+ args .add (output .getPath ());
370+
371+ execute (args );
372+
373+ if (!output .exists ())
374+ {
375+ throw new PipelineJobException ("Unable to find file: " + output .getPath ());
376+ }
377+
378+ vcf .delete ();
379+
380+ try
381+ {
382+ SequenceAnalysisService .get ().ensureVcfIndex (vcf , getLogger (), true );
383+ }
384+ catch (IOException e )
385+ {
386+ throw new PipelineJobException (e );
387+ }
388+
389+ }
390+ }
339391 }
340392}
0 commit comments