Skip to content

Commit 0b0f1d0

Browse files
committed
Ensure cellsnp-lite VCFs are reheadered, sorted and indexed
1 parent 68bd133 commit 0b0f1d0

File tree

1 file changed

+64
-12
lines changed

1 file changed

+64
-12
lines changed

singlecell/src/org/labkey/singlecell/pipeline/singlecell/VireoHandler.java

Lines changed: 64 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import htsjdk.samtools.util.IOUtil;
44
import org.apache.commons.io.FileUtils;
55
import org.apache.commons.lang3.StringUtils;
6+
import org.apache.logging.log4j.Logger;
67
import org.json.JSONObject;
78
import org.labkey.api.module.ModuleLoader;
89
import org.labkey.api.pipeline.PipelineJob;
@@ -16,6 +17,7 @@
1617
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
1718
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
1819
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
20+
import org.labkey.api.sequenceanalysis.run.AbstractGatk4Wrapper;
1921
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
2022
import org.labkey.api.util.PageFlowUtil;
2123
import org.labkey.api.writer.PrintWriters;
@@ -305,18 +307,8 @@ else if (outFiles.length > 1)
305307
throw new PipelineJobException("Unable to find cellsnp calls VCF");
306308
}
307309

308-
try
309-
{
310-
SequencePipelineService.get().sortVcf(cellSnpBaseVcf, null, genome.getSequenceDictionary(), ctx.getLogger());
311-
SequenceAnalysisService.get().ensureVcfIndex(cellSnpBaseVcf, ctx.getLogger());
312-
313-
SequencePipelineService.get().sortVcf(cellSnpCellsVcf, null, genome.getSequenceDictionary(), ctx.getLogger());
314-
SequenceAnalysisService.get().ensureVcfIndex(cellSnpCellsVcf, ctx.getLogger());
315-
}
316-
catch (IOException e)
317-
{
318-
throw new PipelineJobException(e);
319-
}
310+
sortAndFixVcf(cellSnpBaseVcf, genome, ctx.getLogger());
311+
sortAndFixVcf(cellSnpCellsVcf, genome, ctx.getLogger());
320312

321313
if (storeCellSnpVcf)
322314
{
@@ -336,5 +328,65 @@ else if (outFiles.length > 1)
336328
ctx.addSequenceOutput(so);
337329
}
338330
}
331+
332+
private void sortAndFixVcf(File vcf, ReferenceGenome genome, Logger log) throws PipelineJobException
333+
{
334+
// NOTE: this is required since cellsnp-lite creates a non-compliant header dictionary
335+
new UpdateVCFSequenceDictionary(log).execute(vcf, genome.getSequenceDictionary());
336+
337+
try
338+
{
339+
SequencePipelineService.get().sortVcf(vcf, null, genome.getSequenceDictionary(), log);
340+
SequenceAnalysisService.get().ensureVcfIndex(vcf, log);
341+
}
342+
catch (IOException e)
343+
{
344+
throw new PipelineJobException(e);
345+
}
346+
}
347+
348+
public static class UpdateVCFSequenceDictionary extends AbstractGatk4Wrapper
349+
{
350+
public UpdateVCFSequenceDictionary(Logger log)
351+
{
352+
super(log);
353+
}
354+
355+
public void execute(File vcf, File dict) throws PipelineJobException
356+
{
357+
List<String> args = new ArrayList<>(getBaseArgs("UpdateVCFSequenceDictionary"));
358+
args.add("-V");
359+
args.add(vcf.getPath());
360+
361+
args.add("--source-dictionary");
362+
args.add(dict.getPath());
363+
364+
args.add("--replace");
365+
args.add("true");
366+
367+
File output = new File(vcf.getParentFile(), "tmp.vcf.gz");
368+
args.add("-O");
369+
args.add(output.getPath());
370+
371+
execute(args);
372+
373+
if (!output.exists())
374+
{
375+
throw new PipelineJobException("Unable to find file: " + output.getPath());
376+
}
377+
378+
vcf.delete();
379+
380+
try
381+
{
382+
SequenceAnalysisService.get().ensureVcfIndex(vcf, getLogger(), true);
383+
}
384+
catch (IOException e)
385+
{
386+
throw new PipelineJobException(e);
387+
}
388+
389+
}
390+
}
339391
}
340392
}

0 commit comments

Comments
 (0)