1212import org .labkey .api .sequenceanalysis .SequenceAnalysisService ;
1313import org .labkey .api .sequenceanalysis .SequenceOutputFile ;
1414import org .labkey .api .sequenceanalysis .pipeline .AbstractParameterizedOutputHandler ;
15+ import org .labkey .api .sequenceanalysis .pipeline .BcftoolsRunner ;
1516import org .labkey .api .sequenceanalysis .pipeline .ReferenceGenome ;
1617import org .labkey .api .sequenceanalysis .pipeline .SequenceAnalysisJobSupport ;
1718import org .labkey .api .sequenceanalysis .pipeline .SequenceOutputHandler ;
@@ -256,6 +257,49 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
256257 }
257258 }
258259
260+ File cellSnpBaseVcf = new File (cellsnpDir , "cellSNP.base.vcf.gz" );
261+ if (!cellSnpBaseVcf .exists ())
262+ {
263+ throw new PipelineJobException ("Unable to find cellsnp base VCF" );
264+ }
265+
266+
267+ File cellSnpCellsVcf = new File (cellsnpDir , "cellSNP.cells.vcf.gz" );
268+ if (!cellSnpCellsVcf .exists ())
269+ {
270+ throw new PipelineJobException ("Unable to find cellsnp calls VCF" );
271+ }
272+
273+ sortAndFixVcf (cellSnpBaseVcf , genome , ctx .getLogger ());
274+ sortAndFixVcf (cellSnpCellsVcf , genome , ctx .getLogger ());
275+
276+ int vcfFile = ctx .getParams ().optInt (REF_VCF , -1 );
277+ File refVcfSubset = null ;
278+ if (vcfFile > -1 )
279+ {
280+ File vcf = ctx .getSequenceSupport ().getCachedData (vcfFile );
281+ if (vcf == null || !vcf .exists ())
282+ {
283+ throw new PipelineJobException ("Unable to find file with ID: " + vcfFile );
284+ }
285+
286+ refVcfSubset = new File (ctx .getWorkingDirectory (), vcf .getName ());
287+ BcftoolsRunner bcftoolsRunner = new BcftoolsRunner (ctx .getLogger ());
288+ bcftoolsRunner .execute (Arrays .asList (
289+ BcftoolsRunner .getBcfToolsPath ().getAbsolutePath (),
290+ "view" ,
291+ vcf .getPath (),
292+ "-R" ,
293+ cellSnpCellsVcf .getPath (),
294+ "-Oz" ,
295+ "-o" ,
296+ refVcfSubset .getPath ()
297+ ));
298+
299+ ctx .getFileManager ().addIntermediateFile (refVcfSubset );
300+ ctx .getFileManager ().addIntermediateFile (new File (refVcfSubset .getPath () + ".tbi" ));
301+ }
302+
259303 List <String > vireo = new ArrayList <>();
260304 vireo .add ("vireo" );
261305 vireo .add ("-c" );
@@ -277,6 +321,12 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
277321 throw new PipelineJobException ("Must provide nDonors" );
278322 }
279323
324+ if (refVcfSubset != null )
325+ {
326+ vireo .add ("-d" );
327+ vireo .add (refVcfSubset .getPath ());
328+ }
329+
280330 vireo .add ("-N" );
281331 vireo .add (String .valueOf (nDonors ));
282332
@@ -312,25 +362,13 @@ else if (outFiles.length > 1)
312362 so .setName (inputFiles .get (0 ).getName () + ": Vireo Demultiplexing" );
313363 }
314364 so .setCategory ("Vireo Demultiplexing" );
365+ if (vcfFile > -1 )
366+ {
367+ so .setDescription ("Reference VCF ID: " + vcfFile );
368+ }
315369 ctx .addSequenceOutput (so );
316370 }
317371
318- File cellSnpBaseVcf = new File (cellsnpDir , "cellSNP.base.vcf.gz" );
319- if (!cellSnpBaseVcf .exists ())
320- {
321- throw new PipelineJobException ("Unable to find cellsnp base VCF" );
322- }
323-
324-
325- File cellSnpCellsVcf = new File (cellsnpDir , "cellSNP.cells.vcf.gz" );
326- if (!cellSnpCellsVcf .exists ())
327- {
328- throw new PipelineJobException ("Unable to find cellsnp calls VCF" );
329- }
330-
331- sortAndFixVcf (cellSnpBaseVcf , genome , ctx .getLogger ());
332- sortAndFixVcf (cellSnpCellsVcf , genome , ctx .getLogger ());
333-
334372 if (storeCellSnpVcf )
335373 {
336374 SequenceOutputFile so = new SequenceOutputFile ();
0 commit comments