|
1 | 1 | package org.labkey.sequenceanalysis.run.analysis; |
2 | 2 |
|
| 3 | +import htsjdk.samtools.util.CloseableIterator; |
3 | 4 | import htsjdk.samtools.util.Interval; |
| 5 | +import htsjdk.variant.variantcontext.Allele; |
| 6 | +import htsjdk.variant.variantcontext.Genotype; |
| 7 | +import htsjdk.variant.variantcontext.GenotypeBuilder; |
| 8 | +import htsjdk.variant.variantcontext.VariantContext; |
| 9 | +import htsjdk.variant.variantcontext.VariantContextBuilder; |
| 10 | +import htsjdk.variant.variantcontext.writer.VariantContextWriter; |
| 11 | +import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder; |
| 12 | +import htsjdk.variant.vcf.VCFFileReader; |
| 13 | +import htsjdk.variant.vcf.VCFHeader; |
4 | 14 | import org.apache.commons.io.FileUtils; |
5 | 15 | import org.apache.commons.lang3.StringUtils; |
6 | 16 | import org.jetbrains.annotations.Nullable; |
@@ -198,6 +208,7 @@ private File runPbsvCall(JobContext ctx, List<File> inputs, ReferenceGenome geno |
198 | 208 | if (doneFile.exists()) |
199 | 209 | { |
200 | 210 | ctx.getLogger().info("Existing file, found, re-using"); |
| 211 | + verifyAndAddMissingSamples(ctx, vcfOut, inputs); |
201 | 212 | return vcfOut; |
202 | 213 | } |
203 | 214 |
|
@@ -272,6 +283,8 @@ else if ("1".equals(ret)) |
272 | 283 | throw new PipelineJobException("Unable to find file: " + vcfOut.getPath()); |
273 | 284 | } |
274 | 285 |
|
| 286 | + verifyAndAddMissingSamples(ctx, vcfOut, inputs); |
| 287 | + |
275 | 288 | try |
276 | 289 | { |
277 | 290 | FileUtils.touch(doneFile); |
@@ -336,4 +349,79 @@ public void doWork(List<SequenceOutputFile> inputFiles, JobContext ctx) throws P |
336 | 349 | { |
337 | 350 | ScatterGatherUtils.doCopyGvcfLocally(inputFiles, ctx); |
338 | 351 | } |
| 352 | + |
| 353 | + public void verifyAndAddMissingSamples(JobContext ctx, File input, List<File> inputFiles) throws PipelineJobException |
| 354 | + { |
| 355 | + ctx.getLogger().debug("Verifying sample list in output VCF"); |
| 356 | + |
| 357 | + List<String> sampleNamesInOrder = new ArrayList<>(); |
| 358 | + inputFiles.forEach(f -> { |
| 359 | + sampleNamesInOrder.add(SequenceAnalysisService.get().getUnzippedBaseName(f.getName())); |
| 360 | + }); |
| 361 | + |
| 362 | + File output = new File(input.getPath() + ".tmp.vcf"); |
| 363 | + try (VCFFileReader reader = new VCFFileReader(input)) |
| 364 | + { |
| 365 | + VCFHeader header = reader.getHeader(); |
| 366 | + List<String> existingSamples = header.getSampleNamesInOrder(); |
| 367 | + if (existingSamples.equals(sampleNamesInOrder)) |
| 368 | + { |
| 369 | + ctx.getLogger().debug("Samples are identical, no need to update VCF"); |
| 370 | + return; |
| 371 | + } |
| 372 | + |
| 373 | + ctx.getLogger().debug("Will add missing samples. Total pre-existing: " + existingSamples.size() + " of " + sampleNamesInOrder.size()); |
| 374 | + try (VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(output).build();CloseableIterator<VariantContext> it = reader.iterator()) |
| 375 | + { |
| 376 | + header = new VCFHeader(header.getMetaDataInInputOrder(), sampleNamesInOrder); |
| 377 | + writer.writeHeader(header); |
| 378 | + |
| 379 | + while (it.hasNext()) |
| 380 | + { |
| 381 | + VariantContext vc = it.next(); |
| 382 | + VariantContextBuilder vcb = new VariantContextBuilder(vc); |
| 383 | + List<Genotype> genotypes = new ArrayList<>(); |
| 384 | + for (String sample : sampleNamesInOrder) |
| 385 | + { |
| 386 | + if (vc.hasGenotype(sample)) |
| 387 | + { |
| 388 | + genotypes.add(vc.getGenotype(sample)); |
| 389 | + } |
| 390 | + else |
| 391 | + { |
| 392 | + genotypes.add(new GenotypeBuilder(sample, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).make()); |
| 393 | + } |
| 394 | + } |
| 395 | + |
| 396 | + vcb.genotypes(genotypes); |
| 397 | + |
| 398 | + writer.add(vcb.make()); |
| 399 | + } |
| 400 | + } |
| 401 | + } |
| 402 | + |
| 403 | + try |
| 404 | + { |
| 405 | + // Replace input |
| 406 | + input.delete(); |
| 407 | + FileUtils.moveFile(output, input); |
| 408 | + |
| 409 | + // And index |
| 410 | + File idx = new File(input.getPath() + ".idx"); |
| 411 | + if (idx.exists()) |
| 412 | + { |
| 413 | + idx.delete(); |
| 414 | + } |
| 415 | + |
| 416 | + File outputIdx = new File(output.getPath() + ".idx"); |
| 417 | + if (outputIdx.exists()) |
| 418 | + { |
| 419 | + FileUtils.moveFile(outputIdx, idx); |
| 420 | + } |
| 421 | + } |
| 422 | + catch (IOException e) |
| 423 | + { |
| 424 | + throw new PipelineJobException(e); |
| 425 | + } |
| 426 | + } |
339 | 427 | } |
0 commit comments