Skip to content

Commit afcd81e

Browse files
committed
Allow more streamlined inclusion of g/d sequences in cellranger vdj
1 parent cb1cecb commit afcd81e

File tree

1 file changed

+50
-8
lines changed

1 file changed

+50
-8
lines changed

singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import org.labkey.api.util.PageFlowUtil;
4545
import org.labkey.api.writer.PrintWriters;
4646

47+
import java.io.BufferedReader;
4748
import java.io.File;
4849
import java.io.IOException;
4950
import java.io.PrintWriter;
@@ -67,6 +68,7 @@ public CellRangerVDJWrapper(@Nullable Logger logger)
6768
}
6869

6970
public static final String INNER_ENRICHMENT_PRIMERS = "innerEnrichmentPrimers";
71+
public static final String INCLUDE_GD = "includeGD";
7072

7173
public static class VDJProvider extends AbstractAlignmentStepProvider<AlignmentStep>
7274
{
@@ -87,8 +89,11 @@ public VDJProvider()
8789
ToolParameterDescriptor.create(INNER_ENRICHMENT_PRIMERS, "Inner Enrichment Primers", "An option comma-separated list of the inner primers used for TCR enrichment. These will be used for trimming.", "textarea", new JSONObject(){{
8890
put("height", 100);
8991
put("width", 400);
90-
}}, null)
91-
), null, "https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger", true, false, false, ALIGNMENT_MODE.MERGE_THEN_ALIGN);
92+
}}, null),
93+
ToolParameterDescriptor.create(INCLUDE_GD, "Include G/D", "As a hack to get CellRanger 6 to include g/d (which it does not normally allow), they can be included in the reference, but marked as TRA/TRB. Alignment is performed with these values, and the result is converted in the final text file.", "checkbox", new JSONObject(){{
94+
put("checked", true);
95+
}}, true)
96+
), null, "https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger", true, false, false, ALIGNMENT_MODE.MERGE_THEN_ALIGN);
9297
}
9398

9499
@Override
@@ -126,6 +131,11 @@ public boolean supportsMetrics()
126131
return false;
127132
}
128133

134+
private boolean doGDParsing()
135+
{
136+
return getProvider().getParameterByName(INCLUDE_GD).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, true);
137+
}
138+
129139
@Override
130140
public void init(SequenceAnalysisJobSupport support) throws PipelineJobException
131141
{
@@ -156,8 +166,21 @@ public void init(SequenceAnalysisJobSupport support) throws PipelineJobException
156166
String seq = nt.getSequence();
157167

158168
//example: >1|TRAV41*01 TRAV41|TRAV41|L-REGION+V-REGION|TR|TRA|None|None
169+
//Note: for g/d recovery, add any gamma segments as TRA and delta as TRB. Also prefix their gene names with TRA/B, but keep the true name (i.e. TRDV1 -> TRBTRDV1)
170+
String prefix = "";
171+
if (doGDParsing() && "TRD".equalsIgnoreCase(locus))
172+
{
173+
prefix = "TRB";
174+
locus = "TRB";
175+
}
176+
else if (doGDParsing() && "TRG".equalsIgnoreCase(locus))
177+
{
178+
prefix = "TRA";
179+
locus = "TRA";
180+
}
181+
159182
StringBuilder header = new StringBuilder();
160-
header.append(">").append(i.get()).append("|").append(nt.getName()).append(" ").append(nt.getLineage()).append("|").append(nt.getLineage()).append("|");
183+
header.append(">").append(i.get()).append("|").append(prefix).append(nt.getName()).append(" ").append(prefix).append(nt.getLineage()).append("|").append(prefix).append(nt.getLineage()).append("|");
161184
//translate into V_Region
162185
String type;
163186
if (nt.getLineage().contains("J"))
@@ -212,7 +235,7 @@ private File getGenomeFasta()
212235
@Override
213236
public String getIndexCachedDirName(PipelineJob job)
214237
{
215-
return getProvider().getName();
238+
return getProvider().getName() + (doGDParsing() ? "-GD" : "");
216239
}
217240

218241
@Override
@@ -244,7 +267,7 @@ public AlignmentStep.IndexOutput createIndex(ReferenceGenome referenceGenome, Fi
244267
output.addInput(getGenomeFasta(), "Input FASTA");
245268

246269
List<String> args = new ArrayList<>();
247-
args.add(getWrapper().getExe().getPath());
270+
args.add(getWrapper().getExe(doGDParsing()).getPath());
248271
args.add("mkvdjref");
249272
args.add("--seqs=" + getGenomeFasta().getPath());
250273
args.add("--genome=" + indexDir.getName());
@@ -273,7 +296,7 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List<File> inp
273296
AlignmentOutputImpl output = new AlignmentOutputImpl();
274297

275298
List<String> args = new ArrayList<>();
276-
args.add(getWrapper().getExe().getPath());
299+
args.add(getWrapper().getExe(doGDParsing()).getPath());
277300
args.add("vdj");
278301

279302
String idParam = StringUtils.trimToNull(getProvider().getParameterByName("id").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class));
@@ -385,6 +408,25 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List<File> inp
385408
{
386409
throw new PipelineJobException("Unable to find file: " + outputVloupe.getPath());
387410
}
411+
412+
if (doGDParsing())
413+
{
414+
getPipelineCtx().getLogger().info("Removing g/d prefixes from all_contig_annotations.csv file");
415+
File csv2 = new File(outdir, "all_contig_annotations2.csv");
416+
try (PrintWriter writer = PrintWriters.getPrintWriter(csv2); BufferedReader reader = Readers.getReader(csv))
417+
{
418+
String line;
419+
while ((line = reader.readLine()) != null)
420+
{
421+
line = line.replaceAll("TRATRG", "TRG");
422+
line = line.replaceAll("TRBTRD", "TRD");
423+
writer.println(line);
424+
}
425+
}
426+
427+
csv.delete();
428+
FileUtils.moveFile(csv2, csv);
429+
}
388430
}
389431
else
390432
{
@@ -683,9 +725,9 @@ private String getSampleName(String fn)
683725
}
684726
}
685727

686-
protected File getExe()
728+
protected File getExe(boolean includeGD)
687729
{
688730
//NOTE: cellranger 4 doesnt work w/ custom libraries currently. update to CR4 when fixed
689-
return SequencePipelineService.get().getExeForPackage("CELLRANGERPATH", "cellranger-31");
731+
return SequencePipelineService.get().getExeForPackage("CELLRANGERPATH", includeGD ? "cellranger" : "cellranger-31");
690732
}
691733
}

0 commit comments

Comments
 (0)