Skip to content

Commit 2325f2b

Browse files
committed
Ensure cellsnp-lite VCFs are sorted and indexed
1 parent f2dd317 commit 2325f2b

File tree

1 file changed

+56
-20
lines changed

1 file changed

+56
-20
lines changed

singlecell/src/org/labkey/singlecell/pipeline/singlecell/VireoHandler.java

Lines changed: 56 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import org.labkey.api.pipeline.PipelineJob;
99
import org.labkey.api.pipeline.PipelineJobException;
1010
import org.labkey.api.pipeline.RecordedAction;
11+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
1112
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
1213
import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
1314
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
@@ -36,7 +37,7 @@ public class VireoHandler extends AbstractParameterizedOutputHandler<SequenceOu
3637

3738
public VireoHandler()
3839
{
39-
super(ModuleLoader.getInstance().getModule(SingleCellModule.class), "Run Vireo", "This will run cellsnp-lite and vireo to infer cell-to-sample based on genotype.", new LinkedHashSet<>(PageFlowUtil.set("sequenceanalysis/field/SequenceOutputFileSelectorField.js")), Arrays.asList(
40+
super(ModuleLoader.getInstance().getModule(SingleCellModule.class), "Run CellSnp-Lite/Vireo", "This will run cellsnp-lite and vireo to infer cell-to-sample based on genotype.", new LinkedHashSet<>(PageFlowUtil.set("sequenceanalysis/field/SequenceOutputFileSelectorField.js")), Arrays.asList(
4041
ToolParameterDescriptor.create("nDonors", "# Donors", "The number of donors to demultiplex", "ldk-integerfield", new JSONObject(){{
4142
put("allowBlank", false);
4243
}}, null),
@@ -247,6 +248,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
247248
vireo.add(ctx.getWorkingDirectory().getPath());
248249

249250
int nDonors = ctx.getParams().optInt("nDonors", 0);
251+
boolean storeCellSnpVcf = ctx.getParams().optBoolean("storeCellSnpVcf", false);
250252
if (nDonors == 0)
251253
{
252254
throw new PipelineJobException("Must provide nDonors");
@@ -255,39 +257,73 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
255257
vireo.add("-N");
256258
vireo.add(String.valueOf(nDonors));
257259

258-
new SimpleScriptWrapper(ctx.getLogger()).execute(vireo);
260+
if (nDonors == 1)
261+
{
262+
storeCellSnpVcf = true;
263+
ctx.getLogger().info("nDonor was 1, skipping vireo");
264+
}
265+
else
266+
{
267+
new SimpleScriptWrapper(ctx.getLogger()).execute(vireo);
268+
269+
File[] outFiles = ctx.getWorkingDirectory().listFiles(f -> f.getName().endsWith("donor_ids.tsv"));
270+
if (outFiles == null || outFiles.length == 0)
271+
{
272+
throw new PipelineJobException("Missing vireo output file");
273+
}
274+
else if (outFiles.length > 1)
275+
{
276+
throw new PipelineJobException("More than one possible vireo output file found");
277+
}
278+
279+
SequenceOutputFile so = new SequenceOutputFile();
280+
so.setReadset(inputFiles.get(0).getReadset());
281+
so.setLibrary_id(inputFiles.get(0).getLibrary_id());
282+
so.setFile(outFiles[0]);
283+
if (so.getReadset() != null)
284+
{
285+
so.setName(ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName() + ": Vireo Demultiplexing");
286+
}
287+
else
288+
{
289+
so.setName(inputFiles.get(0).getName() + ": Vireo Demultiplexing");
290+
}
291+
so.setCategory("Vireo Demultiplexing");
292+
ctx.addSequenceOutput(so);
293+
}
259294

260-
File[] outFiles = ctx.getWorkingDirectory().listFiles(f -> f.getName().endsWith("donor_ids.tsv"));
261-
if (outFiles == null || outFiles.length == 0)
295+
File cellSnpBaseVcf = new File(cellsnpDir, "cellSNP.base.vcf.gz");
296+
if (!cellSnpBaseVcf.exists())
262297
{
263-
throw new PipelineJobException("Missing vireo output file");
298+
throw new PipelineJobException("Unable to find cellsnp base VCF");
264299
}
265-
else if (outFiles.length > 1)
300+
301+
302+
File cellSnpCellsVcf = new File(cellsnpDir, "cellSNP.cells.vcf.gz");
303+
if (!cellSnpCellsVcf.exists())
266304
{
267-
throw new PipelineJobException("More than one possible vireo output file found");
305+
throw new PipelineJobException("Unable to find cellsnp calls VCF");
268306
}
269307

270-
SequenceOutputFile so = new SequenceOutputFile();
271-
so.setReadset(inputFiles.get(0).getReadset());
272-
so.setLibrary_id(inputFiles.get(0).getLibrary_id());
273-
so.setFile(outFiles[0]);
274-
if (so.getReadset() != null)
308+
try
275309
{
276-
so.setName(ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName() + ": Vireo Demultiplexing");
310+
SequencePipelineService.get().sortVcf(cellSnpBaseVcf, null, genome.getSequenceDictionary(), ctx.getLogger());
311+
SequenceAnalysisService.get().ensureVcfIndex(cellSnpBaseVcf, ctx.getLogger());
312+
313+
SequencePipelineService.get().sortVcf(cellSnpCellsVcf, null, genome.getSequenceDictionary(), ctx.getLogger());
314+
SequenceAnalysisService.get().ensureVcfIndex(cellSnpCellsVcf, ctx.getLogger());
277315
}
278-
else
316+
catch (IOException e)
279317
{
280-
so.setName(inputFiles.get(0).getName() + ": Vireo Demultiplexing");
318+
throw new PipelineJobException(e);
281319
}
282-
so.setCategory("Vireo Demultiplexing");
283-
ctx.addSequenceOutput(so);
284320

285-
if (ctx.getParams().optBoolean("storeCellSnpVcf", false))
321+
if (storeCellSnpVcf)
286322
{
287-
so = new SequenceOutputFile();
323+
SequenceOutputFile so = new SequenceOutputFile();
288324
so.setReadset(inputFiles.get(0).getReadset());
289325
so.setLibrary_id(inputFiles.get(0).getLibrary_id());
290-
so.setFile(outFiles[0]);
326+
so.setFile(cellSnpCellsVcf);
291327
if (so.getReadset() != null)
292328
{
293329
so.setName(ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName() + ": Cellsnp-lite VCF");

0 commit comments

Comments
 (0)