|
44 | 44 | import org.labkey.api.util.PageFlowUtil; |
45 | 45 | import org.labkey.api.writer.PrintWriters; |
46 | 46 |
|
| 47 | +import java.io.BufferedReader; |
47 | 48 | import java.io.File; |
48 | 49 | import java.io.IOException; |
49 | 50 | import java.io.PrintWriter; |
@@ -382,88 +383,123 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List<File> inp |
382 | 383 | getWrapper().execute(args); |
383 | 384 |
|
384 | 385 | File outdir = new File(outputDirectory, id); |
385 | | - outdir = new File(outdir, "multi"); |
386 | 386 | outdir = new File(outdir, "outs"); |
387 | 387 |
|
388 | | - File abDir = new File(outdir, "vdj_t"); |
389 | | - File gdDir = new File(outdir, "vdj_t_gd"); |
390 | 388 |
|
391 | | - File csvAB = processOutputsForType(rs, referenceGenome, abDir, output, "alpha/beta"); |
392 | | - File csvGD = processOutputsForType(rs, referenceGenome, gdDir, output, "gamma/delta"); |
| 389 | + File csvAB = processOutputsForType(id, rs, referenceGenome, outdir, output, "vdj_t"); |
| 390 | + File csvGD = processOutputsForType(id, rs, referenceGenome, outdir, output, "vdj_t_gd"); |
393 | 391 |
|
394 | | - deleteSymlinks(localFqDir); |
395 | | - |
396 | | - throw new PipelineJobException("This is under development!"); |
397 | | - //return output; |
398 | | - } |
| 392 | + File combinedCSV = processAndMergeCSVs(csvAB, csvGD, getPipelineCtx().getLogger()); |
399 | 393 |
|
400 | | - private File processOutputsForType(Readset rs, ReferenceGenome referenceGenome, File outdir, AlignmentOutputImpl output, String chainType) throws PipelineJobException |
401 | | - { |
402 | | - File bam = new File(outdir, "all_contig.bam"); |
403 | | - if (!bam.exists()) |
| 394 | + //NOTE: this folder has many unnecessary files and symlinks that get corrupted when we rename the main outputs |
| 395 | + File directory = new File(outdir.getParentFile(), "SC_MULTI_CS"); |
| 396 | + if (directory.exists()) |
404 | 397 | { |
405 | | - throw new PipelineJobException("Unable to find file: " + bam.getPath()); |
| 398 | + //NOTE: this will have lots of symlinks, including corrupted ones, which java handles badly |
| 399 | + new SimpleScriptWrapper(getPipelineCtx().getLogger()).execute(Arrays.asList("rm", "-Rf", directory.getPath())); |
| 400 | + } |
| 401 | + else |
| 402 | + { |
| 403 | + getPipelineCtx().getLogger().warn("Unable to find folder: " + directory.getPath()); |
406 | 404 | } |
407 | | - output.setBAM(bam); |
408 | 405 |
|
409 | | - //now do cleanup/rename: |
410 | 406 | try |
411 | 407 | { |
412 | | - String prefix = FileUtil.makeLegalName(rs.getName() + "_"); |
413 | | - File outputHtml = new File(outdir, "web_summary.html"); |
| 408 | + File outputHtml = new File(outdir, "per_sample_outs/" + id + "/web_summary.html"); |
414 | 409 | if (!outputHtml.exists()) |
415 | 410 | { |
416 | 411 | throw new PipelineJobException("Unable to find file: " + outputHtml.getPath()); |
417 | 412 | } |
418 | 413 |
|
419 | | - File outputHtmlRename = new File(outdir, prefix + outputHtml.getName()); |
| 414 | + File outputHtmlRename = new File(outputHtml.getParentFile(), FileUtil.makeLegalName(rs.getName() + "_") + outputHtml.getName()); |
420 | 415 | if (outputHtmlRename.exists()) |
421 | 416 | { |
422 | 417 | outputHtmlRename.delete(); |
423 | 418 | } |
424 | 419 | FileUtils.moveFile(outputHtml, outputHtmlRename); |
425 | 420 |
|
426 | | - output.addSequenceOutput(outputHtmlRename, rs.getName() + " 10x VDJ Summary: " + chainType, "10x Run Summary", rs.getRowId(), null, referenceGenome.getGenomeId(), null); |
| 421 | + output.addSequenceOutput(outputHtmlRename, rs.getName() + " 10x VDJ Summary", "10x Run Summary", rs.getRowId(), null, referenceGenome.getGenomeId(), null); |
| 422 | + } |
| 423 | + catch (IOException e) |
| 424 | + { |
| 425 | + throw new PipelineJobException(e); |
| 426 | + } |
427 | 427 |
|
428 | | - File outputVloupe = new File(outdir, "vloupe.vloupe"); |
429 | | - File csv = new File(outdir, "all_contig_annotations.csv"); |
430 | | - if (!outputVloupe.exists()) |
| 428 | + deleteSymlinks(localFqDir); |
| 429 | + |
| 430 | + return output; |
| 431 | + } |
| 432 | + |
| 433 | + private File processOutputsForType(String sampleId, Readset rs, ReferenceGenome referenceGenome, File outdir, AlignmentOutputImpl output, String subdirName) throws PipelineJobException |
| 434 | + { |
| 435 | + boolean isPrimaryDir = "vdj_t".equals(subdirName); |
| 436 | + String chainType = "vdj_t".equals(subdirName) ? "Alpha/Beta" : "Gamma/Delta"; |
| 437 | + |
| 438 | + File multiDir = new File(outdir, "multi/" + subdirName); |
| 439 | + if (!multiDir.exists()) |
| 440 | + { |
| 441 | + throw new PipelineJobException("Missing folder: " + multiDir.getPath()); |
| 442 | + } |
| 443 | + |
| 444 | + File sampleDir = new File(outdir, "per_sample_outs/" + sampleId + "/" + subdirName); |
| 445 | + if (!sampleDir.exists()) |
| 446 | + { |
| 447 | + throw new PipelineJobException("Missing folder: " + sampleDir.getPath()); |
| 448 | + } |
| 449 | + |
| 450 | + // For simplicity, consolidate all files: |
| 451 | + for (File f : multiDir.listFiles()) |
| 452 | + { |
| 453 | + try |
431 | 454 | { |
432 | | - //NOTE: if there were no A/B hits, the vLoupe isnt created, but all other outputs exist |
433 | | - if (!csv.exists()) |
434 | | - { |
435 | | - throw new PipelineJobException("Unable to find file: " + outputVloupe.getPath()); |
436 | | - } |
| 455 | + FileUtils.moveFile(f, new File(sampleDir, f.getName())); |
437 | 456 | } |
438 | | - else |
| 457 | + catch (IOException e) |
439 | 458 | { |
440 | | - File outputVloupeRename = new File(outdir, prefix + outputVloupe.getName()); |
441 | | - if (outputVloupeRename.exists()) |
442 | | - { |
443 | | - outputVloupeRename.delete(); |
444 | | - } |
445 | | - FileUtils.moveFile(outputVloupe, outputVloupeRename); |
446 | | - output.addSequenceOutput(outputVloupeRename, rs.getName() + " 10x VLoupe", "10x VLoupe", rs.getRowId(), null, referenceGenome.getGenomeId(), null); |
| 459 | + throw new PipelineJobException(e); |
447 | 460 | } |
| 461 | + } |
448 | 462 |
|
449 | | - //NOTE: this folder has many unnecessary files and symlinks that get corrupted when we rename the main outputs |
450 | | - File directory = new File(outdir.getParentFile(), "SC_VDJ_ASSEMBLER_CS"); |
451 | | - if (directory.exists()) |
452 | | - { |
453 | | - //NOTE: this will have lots of symlinks, including corrupted ones, which java handles badly |
454 | | - new SimpleScriptWrapper(getPipelineCtx().getLogger()).execute(Arrays.asList("rm", "-Rf", directory.getPath())); |
455 | | - } |
456 | | - else |
| 463 | + File bam = new File(sampleDir, "all_contig.bam"); |
| 464 | + if (!bam.exists()) |
| 465 | + { |
| 466 | + throw new PipelineJobException("Unable to find file: " + bam.getPath()); |
| 467 | + } |
| 468 | + |
| 469 | + if (isPrimaryDir) |
| 470 | + { |
| 471 | + output.setBAM(bam); |
| 472 | + } |
| 473 | + else |
| 474 | + { |
| 475 | + getPipelineCtx().getLogger().debug("Deleting BAM: " + bam.getPath()); |
| 476 | + bam.delete(); |
| 477 | + new File(bam.getPath() + ".bai").delete(); |
| 478 | + } |
| 479 | + |
| 480 | + File allContigAnnotations = new File(sampleDir, "all_contig_annotations.csv"); |
| 481 | + if (!allContigAnnotations.exists()) |
| 482 | + { |
| 483 | + throw new PipelineJobException("Missing file: " + allContigAnnotations.getPath()); |
| 484 | + } |
| 485 | + |
| 486 | + File outputVloupe = new File(sampleDir, "vloupe.vloupe"); |
| 487 | + File csv = new File(sampleDir, "all_contig_annotations.csv"); |
| 488 | + if (!outputVloupe.exists()) |
| 489 | + { |
| 490 | + //NOTE: if there were no A/B hits, the vLoupe isnt created, but all other outputs exist |
| 491 | + if (!csv.exists()) |
457 | 492 | { |
458 | | - getPipelineCtx().getLogger().warn("Unable to find folder: " + directory.getPath()); |
| 493 | + throw new PipelineJobException("Unable to find file: " + outputVloupe.getPath()); |
459 | 494 | } |
460 | | - |
461 | | - return csv; |
462 | 495 | } |
463 | | - catch (IOException e) |
| 496 | + // NOTE: only tag the vloupe file for a/b: |
| 497 | + else if (isPrimaryDir) |
464 | 498 | { |
465 | | - throw new PipelineJobException(e); |
| 499 | + output.addSequenceOutput(outputVloupe, rs.getName() + " 10x VLoupe", "10x VLoupe", rs.getRowId(), null, referenceGenome.getGenomeId(), null); |
466 | 500 | } |
| 501 | + |
| 502 | + return csv; |
467 | 503 | } |
468 | 504 |
|
469 | 505 | @Override |
@@ -603,6 +639,7 @@ public void addMetrics(AnalysisModel model) throws PipelineJobException |
603 | 639 | getPipelineCtx().getLogger().debug("adding 10x metrics"); |
604 | 640 |
|
605 | 641 | // TODO: improve |
| 642 | + //File outputHtml = new File(outdir, "per_sample_outs/" + id + "/web_summary.html"); |
606 | 643 | File metrics = new File(model.getAlignmentFileObject().getParentFile(), "metrics_summary.csv"); |
607 | 644 | if (metrics.exists()) |
608 | 645 | { |
@@ -741,4 +778,121 @@ protected File getExe() |
741 | 778 | { |
742 | 779 | return SequencePipelineService.get().getExeForPackage("CELLRANGERPATH", "cellranger"); |
743 | 780 | } |
| 781 | + |
| 782 | + private static File processAndMergeCSVs(File abCSV, File gdCSV, Logger log) throws PipelineJobException |
| 783 | + { |
| 784 | + File output = new File(abCSV.getParentFile(), "all_contig_annotations_combined.csv"); |
| 785 | + |
| 786 | + try (PrintWriter writer = PrintWriters.getPrintWriter(output)) |
| 787 | + { |
| 788 | + processCSV(writer, true, abCSV, log, Arrays.asList("TRA", "TRB")); |
| 789 | + processCSV(writer, false, gdCSV, log, Arrays.asList("TRD", "TRG")); |
| 790 | + } |
| 791 | + catch (IOException e) |
| 792 | + { |
| 793 | + throw new PipelineJobException(e); |
| 794 | + } |
| 795 | + |
| 796 | + return output; |
| 797 | + } |
| 798 | + |
| 799 | + private static void processCSV(PrintWriter writer, boolean printHeader, File inputCsv, Logger log, List<String> acceptableChains) throws IOException |
| 800 | + { |
| 801 | + log.info("Processing CSV: " + inputCsv.getPath()); |
| 802 | + try (BufferedReader reader = Readers.getReader(inputCsv)) |
| 803 | + { |
| 804 | + String line; |
| 805 | + int chimericCallsRecovered = 0; |
| 806 | + |
| 807 | + int lineIdx = 0; |
| 808 | + while ((line = reader.readLine()) != null) |
| 809 | + { |
| 810 | + lineIdx++; |
| 811 | + if (lineIdx == 1) |
| 812 | + { |
| 813 | + if (printHeader) |
| 814 | + { |
| 815 | + writer.println(line); |
| 816 | + } |
| 817 | + |
| 818 | + continue; |
| 819 | + } |
| 820 | + |
| 821 | + //Infer correct chain from the V, J and C genes |
| 822 | + String[] tokens = line.split(",", -1); // -1 used to preserve trailing empty strings |
| 823 | + List<String> chains = new ArrayList<>(); |
| 824 | + String vGeneChain = null; |
| 825 | + String jGeneChain = null; |
| 826 | + String cGeneChain = null; |
| 827 | + for (int idx : new Integer[]{6,8,9}) |
| 828 | + { |
| 829 | + String val = StringUtils.trimToNull(tokens[idx]); |
| 830 | + if (val != null) |
| 831 | + { |
| 832 | + val = val.substring(0, 3); |
| 833 | + |
| 834 | + chains.add(val); |
| 835 | + if (idx == 6) |
| 836 | + { |
| 837 | + vGeneChain = val; |
| 838 | + } |
| 839 | + if (idx == 8) |
| 840 | + { |
| 841 | + jGeneChain = val; |
| 842 | + } |
| 843 | + else if (idx == 9) |
| 844 | + { |
| 845 | + cGeneChain = val; |
| 846 | + } |
| 847 | + } |
| 848 | + } |
| 849 | + |
| 850 | + Set<String> uniqueChains = new HashSet<>(chains); |
| 851 | + String originalChain = StringUtils.trimToNull(tokens[5]); |
| 852 | + |
| 853 | + // Recover TRDV/TRAJ/TRAC: |
| 854 | + if (uniqueChains.size() > 1) |
| 855 | + { |
| 856 | + if (cGeneChain != null) |
| 857 | + { |
| 858 | + uniqueChains.clear(); |
| 859 | + uniqueChains.add(cGeneChain); |
| 860 | + chimericCallsRecovered++; |
| 861 | + } |
| 862 | + else if (uniqueChains.size() == 2) |
| 863 | + { |
| 864 | + if ("TRD".equals(vGeneChain) && "TRA".equals(jGeneChain)) |
| 865 | + { |
| 866 | + uniqueChains.clear(); |
| 867 | + chimericCallsRecovered++; |
| 868 | + uniqueChains.add(vGeneChain); |
| 869 | + } |
| 870 | + if ("TRA".equals(vGeneChain) && "TRD".equals(jGeneChain)) |
| 871 | + { |
| 872 | + uniqueChains.clear(); |
| 873 | + chimericCallsRecovered++; |
| 874 | + uniqueChains.add(vGeneChain); |
| 875 | + } |
| 876 | + } |
| 877 | + } |
| 878 | + |
| 879 | + if (uniqueChains.size() == 1) |
| 880 | + { |
| 881 | + String chain = uniqueChains.iterator().next(); |
| 882 | + tokens[5] = chain; |
| 883 | + } |
| 884 | + else |
| 885 | + { |
| 886 | + log.error("Multiple chains detected [" + StringUtils.join(chains, ",")+ "], leaving original call alone: " + originalChain + ". " + tokens[6] + "/" + tokens[8] + "/" + tokens[9]); |
| 887 | + } |
| 888 | + |
| 889 | + if (acceptableChains.contains(tokens[5])) |
| 890 | + { |
| 891 | + writer.println(StringUtils.join(tokens, ",")); |
| 892 | + } |
| 893 | + } |
| 894 | + |
| 895 | + log.info("\tChimeric calls recovered: " + chimericCallsRecovered); |
| 896 | + } |
| 897 | + } |
744 | 898 | } |
0 commit comments