Skip to content

Commit f41f3d7

Browse files
committed
Correct sample list in VCF after PBSV call
1 parent 0aa431f commit f41f3d7

File tree

1 file changed

+88
-0
lines changed

1 file changed

+88
-0
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PbsvJointCallingHandler.java

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,16 @@
11
package org.labkey.sequenceanalysis.run.analysis;
22

3+
import htsjdk.samtools.util.CloseableIterator;
34
import htsjdk.samtools.util.Interval;
5+
import htsjdk.variant.variantcontext.Allele;
6+
import htsjdk.variant.variantcontext.Genotype;
7+
import htsjdk.variant.variantcontext.GenotypeBuilder;
8+
import htsjdk.variant.variantcontext.VariantContext;
9+
import htsjdk.variant.variantcontext.VariantContextBuilder;
10+
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
11+
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
12+
import htsjdk.variant.vcf.VCFFileReader;
13+
import htsjdk.variant.vcf.VCFHeader;
414
import org.apache.commons.io.FileUtils;
515
import org.apache.commons.lang3.StringUtils;
616
import org.jetbrains.annotations.Nullable;
@@ -198,6 +208,7 @@ private File runPbsvCall(JobContext ctx, List<File> inputs, ReferenceGenome geno
198208
if (doneFile.exists())
199209
{
200210
ctx.getLogger().info("Existing file, found, re-using");
211+
verifyAndAddMissingSamples(ctx, vcfOut, inputs);
201212
return vcfOut;
202213
}
203214

@@ -272,6 +283,8 @@ else if ("1".equals(ret))
272283
throw new PipelineJobException("Unable to find file: " + vcfOut.getPath());
273284
}
274285

286+
verifyAndAddMissingSamples(ctx, vcfOut, inputs);
287+
275288
try
276289
{
277290
FileUtils.touch(doneFile);
@@ -336,4 +349,79 @@ public void doWork(List<SequenceOutputFile> inputFiles, JobContext ctx) throws P
336349
{
337350
ScatterGatherUtils.doCopyGvcfLocally(inputFiles, ctx);
338351
}
352+
353+
public void verifyAndAddMissingSamples(JobContext ctx, File input, List<File> inputFiles) throws PipelineJobException
354+
{
355+
ctx.getLogger().debug("Verifying sample list in output VCF");
356+
357+
List<String> sampleNamesInOrder = new ArrayList<>();
358+
inputFiles.forEach(f -> {
359+
sampleNamesInOrder.add(SequenceAnalysisService.get().getUnzippedBaseName(f.getName()));
360+
});
361+
362+
File output = new File(input.getPath() + ".tmp.vcf");
363+
try (VCFFileReader reader = new VCFFileReader(input))
364+
{
365+
VCFHeader header = reader.getHeader();
366+
List<String> existingSamples = header.getSampleNamesInOrder();
367+
if (existingSamples.equals(sampleNamesInOrder))
368+
{
369+
ctx.getLogger().debug("Samples are identical, no need to update VCF");
370+
return;
371+
}
372+
373+
ctx.getLogger().debug("Will add missing samples. Total pre-existing: " + existingSamples.size() + " of " + sampleNamesInOrder.size());
374+
try (VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(output).build();CloseableIterator<VariantContext> it = reader.iterator())
375+
{
376+
header = new VCFHeader(header.getMetaDataInInputOrder(), sampleNamesInOrder);
377+
writer.writeHeader(header);
378+
379+
while (it.hasNext())
380+
{
381+
VariantContext vc = it.next();
382+
VariantContextBuilder vcb = new VariantContextBuilder(vc);
383+
List<Genotype> genotypes = new ArrayList<>();
384+
for (String sample : sampleNamesInOrder)
385+
{
386+
if (vc.hasGenotype(sample))
387+
{
388+
genotypes.add(vc.getGenotype(sample));
389+
}
390+
else
391+
{
392+
genotypes.add(new GenotypeBuilder(sample, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).make());
393+
}
394+
}
395+
396+
vcb.genotypes(genotypes);
397+
398+
writer.add(vcb.make());
399+
}
400+
}
401+
}
402+
403+
try
404+
{
405+
// Replace input
406+
input.delete();
407+
FileUtils.moveFile(output, input);
408+
409+
// And index
410+
File idx = new File(input.getPath() + ".idx");
411+
if (idx.exists())
412+
{
413+
idx.delete();
414+
}
415+
416+
File outputIdx = new File(output.getPath() + ".idx");
417+
if (outputIdx.exists())
418+
{
419+
FileUtils.moveFile(outputIdx, idx);
420+
}
421+
}
422+
catch (IOException e)
423+
{
424+
throw new PipelineJobException(e);
425+
}
426+
}
339427
}

0 commit comments

Comments
 (0)