|
28 | 28 | import org.labkey.sequenceanalysis.SequenceAnalysisModule; |
29 | 29 | import org.labkey.sequenceanalysis.pipeline.JobContextImpl; |
30 | 30 | import org.labkey.sequenceanalysis.pipeline.ProcessVariantsHandler; |
| 31 | +import org.labkey.sequenceanalysis.run.util.CombineGVCFsWrapper; |
31 | 32 | import org.labkey.sequenceanalysis.run.util.GenomicsDBImportHandler; |
32 | 33 | import org.labkey.sequenceanalysis.run.util.GenotypeGVCFsWrapper; |
33 | 34 |
|
@@ -268,29 +269,62 @@ public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport |
268 | 269 |
|
269 | 270 | } |
270 | 271 |
|
| 272 | + private String getBasename(JobContext ctx) |
| 273 | + { |
| 274 | + String basename = ctx.getParams().get("variantCalling.GenotypeGVCFs.fileBaseName") != null ? ctx.getParams().getString("variantCalling.GenotypeGVCFs.fileBaseName") : "CombinedGenotypes"; |
| 275 | + basename = basename.replaceAll(".vcf.gz$", ""); |
| 276 | + basename = basename.replaceAll(".vcf$", ""); |
| 277 | + |
| 278 | + return basename; |
| 279 | + } |
| 280 | + |
271 | 281 | private File runGenotypeGVCFs(PipelineJob job, JobContext ctx, ProcessVariantsHandler.Resumer resumer, List<File> inputFiles, int genomeId) throws PipelineJobException |
272 | 282 | { |
273 | 283 | RecordedAction action = new RecordedAction(getName()); |
274 | 284 | action.setStartTime(new Date()); |
275 | 285 |
|
| 286 | + File outDir = ctx.getOutputDir(); |
| 287 | + String basename = getBasename(ctx); |
| 288 | + |
| 289 | + File outputVcf = new File(outDir, basename + ".vcf.gz"); |
| 290 | + |
276 | 291 | for (File f : inputFiles) |
277 | 292 | { |
278 | 293 | action.addInput(f, "Input Variants"); |
279 | 294 | } |
280 | 295 |
|
| 296 | + boolean doCopyLocal = ctx.getParams().optBoolean("variantCalling.GenotypeGVCFs.doCopyInputs", false); |
| 297 | + |
| 298 | + Set<File> toDelete = new HashSet<>(); |
| 299 | + List<File> filesToProcess = new ArrayList<>(); |
| 300 | + if (doCopyLocal) |
| 301 | + { |
| 302 | + ctx.getLogger().info("making local copies of gVCF/GenomicsDB files prior to genotyping"); |
| 303 | + filesToProcess.addAll(GenotypeGVCFsWrapper.copyVcfsLocally(inputFiles, toDelete, null, ctx.getLogger(), outputVcf.exists())); |
| 304 | + } |
| 305 | + else |
| 306 | + { |
| 307 | + filesToProcess.addAll(inputFiles); |
| 308 | + } |
| 309 | + |
| 310 | + //Allow CombineGVCFs to run on interval(s) |
| 311 | + File inputVcf; |
| 312 | + if (filesToProcess.size() > 1) |
| 313 | + { |
| 314 | + inputVcf = combineInputs(ctx, filesToProcess, genomeId); |
| 315 | + } |
| 316 | + else |
| 317 | + { |
| 318 | + inputVcf = filesToProcess.get(0); |
| 319 | + } |
| 320 | + |
281 | 321 | GenotypeGVCFsWrapper wrapper = new GenotypeGVCFsWrapper(job.getLogger()); |
282 | 322 | ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenome(genomeId); |
283 | 323 | if (genome == null) |
284 | 324 | { |
285 | 325 | throw new PipelineJobException("Unable to find cached genome for Id: " + genomeId); |
286 | 326 | } |
287 | 327 |
|
288 | | - File outDir = ctx.getOutputDir(); |
289 | | - String basename = ctx.getParams().get("variantCalling.GenotypeGVCFs.fileBaseName") != null ? ctx.getParams().getString("variantCalling.GenotypeGVCFs.fileBaseName") : "CombinedGenotypes"; |
290 | | - basename = basename.replaceAll(".vcf.gz$", ""); |
291 | | - basename = basename.replaceAll(".vcf$", ""); |
292 | | - |
293 | | - File outputVcf = new File(outDir, basename + ".vcf.gz"); |
294 | 328 | List<String> toolParams = new ArrayList<>(); |
295 | 329 | if (ctx.getParams().get("variantCalling.GenotypeGVCFs.stand_call_conf") != null) |
296 | 330 | { |
@@ -330,14 +364,62 @@ private File runGenotypeGVCFs(PipelineJob job, JobContext ctx, ProcessVariantsHa |
330 | 364 | }); |
331 | 365 | } |
332 | 366 |
|
333 | | - boolean doCopyInputs = ctx.getParams().optBoolean("variantCalling.GenotypeGVCFs.doCopyInputs", false); |
| 367 | + wrapper.execute(genome.getSourceFastaFile(), outputVcf, toolParams, inputVcf); |
334 | 368 |
|
335 | | - wrapper.execute(genome.getSourceFastaFile(), outputVcf, toolParams, doCopyInputs, inputFiles.toArray(new File[inputFiles.size()])); |
336 | 369 | action.addOutput(outputVcf, "VCF", outputVcf.exists(), true); |
337 | 370 | action.setEndTime(new Date()); |
338 | 371 | resumer.setGenotypeGVCFsComplete(action, outputVcf); |
339 | 372 |
|
| 373 | + if (!toDelete.isEmpty()) |
| 374 | + { |
| 375 | + ctx.getLogger().info("deleting locally copied inputs"); |
| 376 | + for (File f : toDelete) |
| 377 | + { |
| 378 | + if (f.exists()) |
| 379 | + { |
| 380 | + f.delete(); |
| 381 | + } |
| 382 | + } |
| 383 | + } |
| 384 | + |
340 | 385 | return outputVcf; |
341 | 386 | } |
| 387 | + |
| 388 | + private File combineInputs(JobContext ctx, List<File> inputFiles, int genomeId) throws PipelineJobException |
| 389 | + { |
| 390 | + // TODO: this should ultimately be expanded to include smarter merge with GenomicsDB |
| 391 | + // Also consider allowing the input to be a folder with per-contig gVCFs |
| 392 | + |
| 393 | + String basename = getBasename(ctx); |
| 394 | + File combined = new File(ctx.getOutputDir(), basename + ".combined.gvcf.gz"); |
| 395 | + |
| 396 | + File idx = new File(combined.getPath() + ".tbi"); |
| 397 | + if (idx.exists()) |
| 398 | + { |
| 399 | + ctx.getLogger().info("Index exists, resuming combine with existing file: " + combined.getPath()); |
| 400 | + return combined; |
| 401 | + } |
| 402 | + |
| 403 | + ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenome(genomeId); |
| 404 | + if (genome == null) |
| 405 | + { |
| 406 | + throw new PipelineJobException("Unable to find cached genome for Id: " + genomeId); |
| 407 | + } |
| 408 | + |
| 409 | + List<String> toolParams = new ArrayList<>(); |
| 410 | + List<Interval> intervals = ProcessVariantsHandler.getIntervals(ctx); |
| 411 | + if (intervals != null) |
| 412 | + { |
| 413 | + intervals.forEach(interval -> { |
| 414 | + toolParams.add("-L"); |
| 415 | + toolParams.add(interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd()); |
| 416 | + }); |
| 417 | + } |
| 418 | + |
| 419 | + CombineGVCFsWrapper wrapper = new CombineGVCFsWrapper(ctx.getLogger()); |
| 420 | + wrapper.execute(genome.getWorkingFastaFile(), combined, toolParams, inputFiles.toArray(new File[inputFiles.size()])); |
| 421 | + |
| 422 | + return combined; |
| 423 | + } |
342 | 424 | } |
343 | 425 | } |
0 commit comments