Skip to content

Commit 7020bf9

Browse files
committed
Bugfix to cellranger VDJ
1 parent 9380508 commit 7020bf9

File tree

1 file changed

+42
-58
lines changed

1 file changed

+42
-58
lines changed

singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java

Lines changed: 42 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -382,10 +382,23 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List<File> inp
382382
getWrapper().execute(args);
383383

384384
File outdir = new File(outputDirectory, id);
385+
outdir = new File(outdir, "multi");
385386
outdir = new File(outdir, "outs");
386387

388+
File abDir = new File(outdir, "vdj_t");
389+
File gdDir = new File(outdir, "vdj_t_gd");
387390

388-
// TODO: cleanup from here on:
391+
File csvAB = processOutputsForType(rs, referenceGenome, abDir, output, "alpha/beta");
392+
File csvGD = processOutputsForType(rs, referenceGenome, gdDir, output, "gamma/delta");
393+
394+
deleteSymlinks(localFqDir);
395+
396+
throw new PipelineJobException("This is under development!");
397+
//return output;
398+
}
399+
400+
private File processOutputsForType(Readset rs, ReferenceGenome referenceGenome, File outdir, AlignmentOutputImpl output, String chainType) throws PipelineJobException
401+
{
389402
File bam = new File(outdir, "all_contig.bam");
390403
if (!bam.exists())
391404
{
@@ -410,7 +423,7 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List<File> inp
410423
}
411424
FileUtils.moveFile(outputHtml, outputHtmlRename);
412425

413-
output.addSequenceOutput(outputHtmlRename, rs.getName() + " 10x VDJ Summary", "10x Run Summary", rs.getRowId(), null, referenceGenome.getGenomeId(), null);
426+
output.addSequenceOutput(outputHtmlRename, rs.getName() + " 10x VDJ Summary: " + chainType, "10x Run Summary", rs.getRowId(), null, referenceGenome.getGenomeId(), null);
414427

415428
File outputVloupe = new File(outdir, "vloupe.vloupe");
416429
File csv = new File(outdir, "all_contig_annotations.csv");
@@ -432,27 +445,25 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List<File> inp
432445
FileUtils.moveFile(outputVloupe, outputVloupeRename);
433446
output.addSequenceOutput(outputVloupeRename, rs.getName() + " 10x VLoupe", "10x VLoupe", rs.getRowId(), null, referenceGenome.getGenomeId(), null);
434447
}
448+
449+
//NOTE: this folder has many unnecessary files and symlinks that get corrupted when we rename the main outputs
450+
File directory = new File(outdir.getParentFile(), "SC_VDJ_ASSEMBLER_CS");
451+
if (directory.exists())
452+
{
453+
//NOTE: this will have lots of symlinks, including corrupted ones, which java handles badly
454+
new SimpleScriptWrapper(getPipelineCtx().getLogger()).execute(Arrays.asList("rm", "-Rf", directory.getPath()));
455+
}
456+
else
457+
{
458+
getPipelineCtx().getLogger().warn("Unable to find folder: " + directory.getPath());
459+
}
460+
461+
return csv;
435462
}
436463
catch (IOException e)
437464
{
438465
throw new PipelineJobException(e);
439466
}
440-
441-
//NOTE: this folder has many unnecessary files and symlinks that get corrupted when we rename the main outputs
442-
File directory = new File(outdir.getParentFile(), "SC_VDJ_ASSEMBLER_CS");
443-
if (directory.exists())
444-
{
445-
//NOTE: this will have lots of symlinks, including corrupted ones, which java handles badly
446-
new SimpleScriptWrapper(getPipelineCtx().getLogger()).execute(Arrays.asList("rm", "-Rf", directory.getPath()));
447-
}
448-
else
449-
{
450-
getPipelineCtx().getLogger().warn("Unable to find folder: " + directory.getPath());
451-
}
452-
453-
deleteSymlinks(localFqDir);
454-
455-
return output;
456467
}
457468

458469
@Override
@@ -479,45 +490,16 @@ public boolean supportsGzipFastqs()
479490
return true;
480491
}
481492

482-
private String getSymlinkFileName(String fileName, boolean doRename, String sampleName, int idx, boolean isReversed)
493+
private String getSymlinkFileName(String sampleName, int idx, boolean isReverseRead)
483494
{
484-
return getSymlinkFileName(fileName, doRename, sampleName, idx, isReversed, null);
495+
return getSymlinkFileName(sampleName, idx, isReverseRead, null);
485496
}
486497

487-
private String getSymlinkFileName(String fileName, boolean doRename, String sampleName, int idx, boolean isReversed, @Nullable String suffix)
498+
private String getSymlinkFileName(String sampleName, int idx, boolean isReverseRead, @Nullable String suffix)
488499
{
489500
//NOTE: cellranger is very picky about file name formatting
490-
if (doRename)
491-
{
492-
sampleName = FileUtil.makeLegalName(sampleName.replaceAll("_", "-")).replaceAll(" ", "-").replaceAll("\\.", "-");
493-
return sampleName + (suffix == null ? "" : suffix) + "_S1_L001_R" + (isReversed ? "2" : "1") + "_" + StringUtils.leftPad(String.valueOf(idx), 3, "0") + ".fastq.gz";
494-
}
495-
else
496-
{
497-
Matcher m = FILE_PATTERN.matcher(fileName);
498-
if (m.matches())
499-
{
500-
if (!StringUtils.isEmpty(m.group(7)))
501-
{
502-
return m.group(1).replaceAll("_", "-") + StringUtils.trimToEmpty(m.group(2)) + "_L" + StringUtils.trimToEmpty(m.group(3)) + "_" + StringUtils.trimToEmpty(m.group(4)) + StringUtils.trimToEmpty(m.group(5)) + StringUtils.trimToEmpty(m.group(6)) + ".fastq.gz";
503-
}
504-
else if (m.group(1).contains("_"))
505-
{
506-
getPipelineCtx().getLogger().info("replacing underscores in file/sample name");
507-
return m.group(1).replaceAll("_", "-") + StringUtils.trimToEmpty(m.group(2)) + "_L" + StringUtils.trimToEmpty(m.group(3)) + "_" + StringUtils.trimToEmpty(m.group(4)) + StringUtils.trimToEmpty(m.group(5)) + StringUtils.trimToEmpty(m.group(6)) + ".fastq.gz";
508-
}
509-
else
510-
{
511-
getPipelineCtx().getLogger().info("no additional characters found");
512-
}
513-
}
514-
else
515-
{
516-
getPipelineCtx().getLogger().warn("filename does not match Illumina formatting: " + fileName);
517-
}
518-
}
519-
520-
return FileUtil.makeLegalName(fileName);
501+
sampleName = FileUtil.makeLegalName(sampleName.replaceAll("_", "-")).replaceAll(" ", "-").replaceAll("\\.", "-");
502+
return sampleName + (suffix == null ? "" : suffix) + "_S1_L001_R" + (isReverseRead ? "2" : "1") + "_" + StringUtils.leftPad(String.valueOf(idx), 3, "0") + ".fastq.gz";
521503
}
522504

523505
public Set<String> prepareFastqSymlinks(Readset rs, File localFqDir) throws PipelineJobException
@@ -535,13 +517,13 @@ public Set<String> prepareFastqSymlinks(Readset rs, File localFqDir) throws Pipe
535517
}
536518

537519
int idx = 0;
538-
boolean doRename = true; //cellranger is too picky - simply rename files all the time
539520
for (ReadData rd : rs.getReadData())
540521
{
541522
idx++;
542523
try
543524
{
544-
File target1 = new File(localFqDir, getSymlinkFileName(rd.getFile1().getName(), doRename, rs.getName(), idx, false));
525+
// a/b:
526+
File target1 = new File(localFqDir, getSymlinkFileName(rs.getName(), idx, false));
545527
getPipelineCtx().getLogger().debug("file: " + rd.getFile1().getPath());
546528
getPipelineCtx().getLogger().debug("target: " + target1.getPath());
547529
if (target1.exists())
@@ -554,7 +536,7 @@ public Set<String> prepareFastqSymlinks(Readset rs, File localFqDir) throws Pipe
554536
ret.add(getSampleName(target1.getName()));
555537

556538
// repeat for g/d:
557-
File target1gd = new File(localFqDir, getSymlinkFileName(rd.getFile1().getName(), doRename, rs.getName(), idx, false, "-GD"));
539+
File target1gd = new File(localFqDir, getSymlinkFileName(rs.getName(), idx, false, "-GD"));
558540
getPipelineCtx().getLogger().debug("file: " + rd.getFile1().getPath());
559541
getPipelineCtx().getLogger().debug("target: " + target1gd.getPath());
560542
if (target1gd.exists())
@@ -565,10 +547,10 @@ public Set<String> prepareFastqSymlinks(Readset rs, File localFqDir) throws Pipe
565547

566548
Files.createSymbolicLink(target1gd.toPath(), rd.getFile1().toPath());
567549

568-
// a/b:
569550
if (rd.getFile2() != null)
570551
{
571-
File target2 = new File(localFqDir, getSymlinkFileName(rd.getFile2().getName(), doRename, rs.getName(), idx, true));
552+
// a/b:
553+
File target2 = new File(localFqDir, getSymlinkFileName(rs.getName(), idx, true));
572554
getPipelineCtx().getLogger().debug("file: " + rd.getFile2().getPath());
573555
getPipelineCtx().getLogger().debug("target: " + target2.getPath());
574556
if (target2.exists())
@@ -579,7 +561,8 @@ public Set<String> prepareFastqSymlinks(Readset rs, File localFqDir) throws Pipe
579561
Files.createSymbolicLink(target2.toPath(), rd.getFile2().toPath());
580562
ret.add(getSampleName(target2.getName()));
581563

582-
File target2gd = new File(localFqDir, getSymlinkFileName(rd.getFile2().getName(), doRename, rs.getName(), idx, true, "-GD"));
564+
// g/d
565+
File target2gd = new File(localFqDir, getSymlinkFileName(rs.getName(), idx, true, "-GD"));
583566
getPipelineCtx().getLogger().debug("file: " + rd.getFile2().getPath());
584567
getPipelineCtx().getLogger().debug("target: " + target2gd.getPath());
585568
if (target2gd.exists())
@@ -619,6 +602,7 @@ public void addMetrics(AnalysisModel model) throws PipelineJobException
619602
{
620603
getPipelineCtx().getLogger().debug("adding 10x metrics");
621604

605+
// TODO: improve
622606
File metrics = new File(model.getAlignmentFileObject().getParentFile(), "metrics_summary.csv");
623607
if (metrics.exists())
624608
{

0 commit comments

Comments
 (0)