Skip to content

Commit cea407a

Browse files
committed
Create separate CITE-seq processor
1 parent 59c3d0d commit cea407a

File tree

3 files changed

+133
-54
lines changed

3 files changed

+133
-54
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -39,25 +39,7 @@
3939
import org.labkey.api.util.PageFlowUtil;
4040
import org.labkey.api.util.SystemMaintenance;
4141
import org.labkey.api.view.WebPartFactory;
42-
import org.labkey.sequenceanalysis.analysis.BamCleanupHandler;
43-
import org.labkey.sequenceanalysis.analysis.BamHaplotypeHandler;
44-
import org.labkey.sequenceanalysis.analysis.CellHashingHandler;
45-
import org.labkey.sequenceanalysis.analysis.CellRangerAggrHandler;
46-
import org.labkey.sequenceanalysis.analysis.CellRangerRawDataHandler;
47-
import org.labkey.sequenceanalysis.analysis.CellRangerReanalysisHandler;
48-
import org.labkey.sequenceanalysis.analysis.CombineStarGeneCountsHandler;
49-
import org.labkey.sequenceanalysis.analysis.CombineSubreadGeneCountsHandler;
50-
import org.labkey.sequenceanalysis.analysis.GenotypeGVCFHandler;
51-
import org.labkey.sequenceanalysis.analysis.HaplotypeCallerHandler;
52-
import org.labkey.sequenceanalysis.analysis.LiftoverHandler;
53-
import org.labkey.sequenceanalysis.analysis.ListVcfSamplesHandler;
54-
import org.labkey.sequenceanalysis.analysis.MultiQCBamHandler;
55-
import org.labkey.sequenceanalysis.analysis.MultiQCHandler;
56-
import org.labkey.sequenceanalysis.analysis.PicardAlignmentMetricsHandler;
57-
import org.labkey.sequenceanalysis.analysis.RecalculateSequenceMetricsHandler;
58-
import org.labkey.sequenceanalysis.analysis.RnaSeqcHandler;
59-
import org.labkey.sequenceanalysis.analysis.SbtGeneCountHandler;
60-
import org.labkey.sequenceanalysis.analysis.UnmappedSequenceBasedGenotypeHandler;
42+
import org.labkey.sequenceanalysis.analysis.*;
6143
import org.labkey.sequenceanalysis.button.AddSraRunButton;
6244
import org.labkey.sequenceanalysis.button.CellHashingButton;
6345
import org.labkey.sequenceanalysis.button.ChangeReadsetStatusButton;
@@ -339,6 +321,7 @@ public static void registerPipelineSteps()
339321

340322
SequenceAnalysisService.get().registerReadsetHandler(new MultiQCHandler());
341323
SequenceAnalysisService.get().registerReadsetHandler(new CellHashingHandler());
324+
SequenceAnalysisService.get().registerReadsetHandler(new CiteSeqHandler());
342325

343326
//ObjectFactory.Registry.register(AnalysisModelImpl.class, new UnderscoreBeanObjectFactory(AnalysisModelImpl.class));
344327
//ObjectFactory.Registry.register(SequenceReadsetImpl.class, new UnderscoreBeanObjectFactory(SequenceReadsetImpl.class));

SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/CellHashingHandler.java

Lines changed: 82 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -68,23 +68,41 @@ public class CellHashingHandler extends AbstractParameterizedOutputHandler<Seque
6868

6969
public CellHashingHandler()
7070
{
71-
super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "CITE-Seq Count", "This will run CITE-Seq Count to generate a table of features counts from CITE-Seq or cell hashing libraries", null, getDefaultParams());
71+
this("Cell Hashing Calls", "This will run CITE-Seq Count to generate a table of features counts from CITE-Seq or cell hashing libraries. It will also run R code to generate a table of calls per cell", getDefaultParams());
72+
}
73+
74+
protected CellHashingHandler(String name, String description, List<ToolParameterDescriptor> defaultParams)
75+
{
76+
super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), name, description, null, defaultParams);
7277
}
7378

7479
public static List<ToolParameterDescriptor> getDefaultParams()
7580
{
76-
return Arrays.asList(
81+
return getDefaultParams(true, DEFAULT_TAG_GROUP);
82+
}
83+
84+
public static List<ToolParameterDescriptor> getDefaultParams(boolean allowScanningEditDistance, String defaultTagGroup)
85+
{
86+
List<ToolParameterDescriptor> ret = new ArrayList<>(Arrays.asList(
7787
ToolParameterDescriptor.create("outputFilePrefix", "Output File Basename", null, "textfield", new JSONObject(){{
7888
put("allowBlank", false);
7989
}}, "cellHashingCalls"),
8090
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("-cbf"), "cbf", "Cell Barcode Start", null, "ldk-integerfield", null, 1),
8191
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("-cbl"), "cbl", "Cell Barcode End", null, "ldk-integerfield", null, 16),
8292
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("-umif"), "umif", "UMI Start", null, "ldk-integerfield", null, 17),
83-
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("-umil"), "umil", "UMI End", null, "ldk-integerfield", null, 26),
84-
ToolParameterDescriptor.create("scanEditDistances", "Scan Edit Distances", "If checked, CITE-seq-count will be run using edit distances from 0-3 and the iteration with the highest singlets will be used.", "checkbox", new JSONObject(){{
85-
put("checked", true);
86-
}}, true),
87-
ToolParameterDescriptor.create("editDistance", "Edit Distance", null, "ldk-integerfield", null, 1),
93+
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("-umil"), "umil", "UMI End", null, "ldk-integerfield", null, 26)
94+
));
95+
96+
if (allowScanningEditDistance)
97+
{
98+
ret.add(ToolParameterDescriptor.create("scanEditDistances", "Scan Edit Distances", "If checked, CITE-seq-count will be run using edit distances from 0-3 and the iteration with the highest singlets will be used.", "checkbox", new JSONObject()
99+
{{
100+
put("checked", true);
101+
}}, true));
102+
}
103+
104+
ret.addAll(Arrays.asList(
105+
ToolParameterDescriptor.create("editDistance", "Edit Distance", null, "ldk-integerfield", null, 3),
88106
ToolParameterDescriptor.create("excludeFailedcDNA", "Exclude Failed cDNA", "If selected, cDNAs with non-blank status fields will be omitted", "checkbox", null, true),
89107
ToolParameterDescriptor.create("minCountPerCell", "Min Reads/Cell", null, "ldk-integerfield", null, 5),
90108
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("-cells"), "cells", "Expected Cells", null, "ldk-integerfield", null, 20000),
@@ -94,11 +112,13 @@ public static List<ToolParameterDescriptor> getDefaultParams()
94112
put("displayField", "group_name");
95113
put("valueField", "group_name");
96114
put("allowBlank", false);
97-
}}, DEFAULT_TAG_GROUP),
115+
}}, defaultTagGroup),
98116
ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{
99117
put("checked", false);
100118
}}, false)
101-
);
119+
));
120+
121+
return ret;
102122
}
103123

104124
@Override
@@ -134,11 +154,18 @@ public boolean doSplitJobs()
134154
@Override
135155
public SequenceReadsetProcessor getProcessor()
136156
{
137-
return new Processor();
157+
return new Processor(true);
138158
}
139159

140-
protected class Processor implements SequenceReadsetProcessor
160+
public class Processor implements SequenceReadsetProcessor
141161
{
162+
private final boolean _generateHtoCalls;
163+
164+
public Processor(boolean generateHtoCalls)
165+
{
166+
_generateHtoCalls = generateHtoCalls;
167+
}
168+
142169
@Override
143170
public void init(PipelineJob job, SequenceAnalysisJobSupport support, List<Readset> readsets, JSONObject params, File outputDir, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
144171
{
@@ -203,11 +230,16 @@ public void processFilesRemote(List<Readset> readsets, JobContext ctx) throws Un
203230
Set<Integer> editDistances = new TreeSet<>();
204231
Map<Integer, Map<String, Object>> results = new HashMap<>();
205232

206-
Integer highestSinglet = 0;
233+
int highestSinglet = 0;
207234
Integer bestEditDistance = null;
208235

209236
Integer minCountPerCell = ctx.getParams().optInt("minCountPerCell", 5);
210237
boolean scanEditDistances = ctx.getParams().optBoolean("scanEditDistances", false);
238+
if (!_generateHtoCalls && scanEditDistances)
239+
{
240+
throw new PipelineJobException("Scan edit distances should not be possible to use unless cell hashing is used");
241+
}
242+
211243
if (scanEditDistances)
212244
{
213245
editDistances.add(0);
@@ -223,7 +255,7 @@ public void processFilesRemote(List<Readset> readsets, JobContext ctx) throws Un
223255

224256
for (Integer editDistance : editDistances)
225257
{
226-
Map<String, Object> callMap = executeCiteSeqCount(ctx, action, rs, editDistance, minCountPerCell);
258+
Map<String, Object> callMap = executeCiteSeqCount(ctx, action, rs, editDistance, minCountPerCell, _generateHtoCalls);
227259
results.put(editDistance, callMap);
228260

229261
int singlet = Integer.parseInt(callMap.get("singlet").toString());
@@ -240,9 +272,22 @@ public void processFilesRemote(List<Readset> readsets, JobContext ctx) throws Un
240272
ctx.getLogger().info("Using edit distance: " + bestEditDistance + ", singlet: " + highestSinglet);
241273

242274
Map<String, Object> callMap = results.get(bestEditDistance);
243-
String description = String.format("Edit Distance: %,d\nMin Reads/Cell: %,d\nTotal Singlet: %,d\nDoublet: %,d\nDiscordant: %,d\nSeurat Called: %,d\nNegative: %,d\nUnique HTOs: %s", bestEditDistance, minCountPerCell, callMap.get("singlet"), callMap.get("doublet"), callMap.get("discordant"), callMap.get("seuratSinglet"), callMap.get("negative"), callMap.get("UniqueHtos"));
244-
File htoCalls = (File)callMap.get("htoCalls");
245-
File html = (File)callMap.get("html");
275+
if (_generateHtoCalls)
276+
{
277+
String description = String.format("Edit Distance: %,d\nMin Reads/Cell: %,d\nTotal Singlet: %,d\nDoublet: %,d\nDiscordant: %,d\nSeurat Called: %,d\nNegative: %,d\nUnique HTOs: %s", bestEditDistance, minCountPerCell, callMap.get("singlet"), callMap.get("doublet"), callMap.get("discordant"), callMap.get("seuratSinglet"), callMap.get("negative"), callMap.get("UniqueHtos"));
278+
File htoCalls = (File) callMap.get("htoCalls");
279+
File html = (File) callMap.get("html");
280+
281+
ctx.getFileManager().addSequenceOutput(htoCalls, rs.getName() + ": Cell Hashing Calls","Cell Hashing Calls", rs.getReadsetId(), null, null, description);
282+
ctx.getFileManager().addSequenceOutput(html, rs.getName() + ": Cell Hashing Report","Cell Hashing Report", rs.getReadsetId(), null, null, description);
283+
}
284+
else
285+
{
286+
ctx.getLogger().debug("HTO calls will not be generated");
287+
288+
File citeSeqCount = (File) callMap.get("citeSeqCount");
289+
ctx.getFileManager().addSequenceOutput(citeSeqCount, rs.getName() + ": CITE-Seq Count Matrix","CITE-Seq Count Matrix", rs.getReadsetId(), null, null, null);
290+
}
246291

247292
File origUnknown = getCiteSeqCountUnknownOutput(ctx.getSourceDirectory(), bestEditDistance);
248293
File movedUnknown = getCiteSeqCountUnknownOutput(ctx.getSourceDirectory(), null);
@@ -260,9 +305,6 @@ public void processFilesRemote(List<Readset> readsets, JobContext ctx) throws Un
260305
{
261306
throw new PipelineJobException(e);
262307
}
263-
264-
ctx.getFileManager().addSequenceOutput(htoCalls, rs.getName() + ": Cell Hashing Calls","Cell Hashing Calls", rs.getReadsetId(), null, null, description);
265-
ctx.getFileManager().addSequenceOutput(html, rs.getName() + ": Cell Hashing Report","Cell Hashing Report", rs.getReadsetId(), null, null, description);
266308
}
267309
else
268310
{
@@ -275,7 +317,7 @@ public void processFilesRemote(List<Readset> readsets, JobContext ctx) throws Un
275317
}
276318
}
277319

278-
private Map<String, Object> executeCiteSeqCount(JobContext ctx, RecordedAction action, Readset rs, int editDistance, int minCountPerCell) throws PipelineJobException
320+
private Map<String, Object> executeCiteSeqCount(JobContext ctx, RecordedAction action, Readset rs, int editDistance, int minCountPerCell, boolean generateHtoCalls) throws PipelineJobException
279321
{
280322
CiteSeqCountWrapper wrapper = new CiteSeqCountWrapper(ctx.getLogger());
281323
ReadData rd = rs.getReadData().get(0);
@@ -328,27 +370,32 @@ private Map<String, Object> executeCiteSeqCount(JobContext ctx, RecordedAction a
328370
throw new PipelineJobException(e);
329371
}
330372
ctx.getFileManager().addIntermediateFile(doneFile);
373+
ctx.getFileManager().addOutput(action, "Unknown barcodes", unknownBarcodes);
374+
ctx.getFileManager().addOutput(action, "CITE-seq Raw Counts", outputMatrix);
375+
ctx.getFileManager().addIntermediateFile(unknownBarcodes);
376+
ctx.getFileManager().addIntermediateFile(outputDir);
331377

332-
ctx.getJob().setStatus(PipelineJob.TaskStatus.running, "Generating HTO calls for edit distance: " + editDistance);
333-
File htoCalls = generateFinalCalls(outputMatrix.getParentFile(), ctx.getOutputDir(), outputBasename, ctx.getLogger(), null, true, minCountPerCell, ctx.getSourceDirectory());
334-
File html = new File(htoCalls.getParentFile(), outputBasename + ".html");
378+
Map<String, Object> callMap = new HashMap<>();
379+
callMap.put("citeSeqCount", outputMatrix);
335380

336-
if (!html.exists())
381+
if (generateHtoCalls)
337382
{
338-
throw new PipelineJobException("Unable to find expected HTML file: " + html.getPath());
339-
}
383+
ctx.getJob().setStatus(PipelineJob.TaskStatus.running, "Generating HTO calls for edit distance: " + editDistance);
384+
File htoCalls = generateFinalCalls(outputMatrix.getParentFile(), ctx.getOutputDir(), outputBasename, ctx.getLogger(), null, true, minCountPerCell, ctx.getSourceDirectory());
385+
File html = new File(htoCalls.getParentFile(), outputBasename + ".html");
340386

341-
ctx.getFileManager().addOutput(action, "Unknown barcodes", unknownBarcodes);
342-
ctx.getFileManager().addOutput(action, "CITE-seq Raw Counts", outputMatrix);
343-
ctx.getFileManager().addOutput(action, "Cell Hashing Calls", htoCalls);
344-
ctx.getFileManager().addOutput(action, "Cell Hashing Report", html);
387+
if (!html.exists())
388+
{
389+
throw new PipelineJobException("Unable to find expected HTML file: " + html.getPath());
390+
}
345391

346-
ctx.getFileManager().addIntermediateFile(unknownBarcodes);
347-
ctx.getFileManager().addIntermediateFile(outputDir);
392+
ctx.getFileManager().addOutput(action, "Cell Hashing Calls", htoCalls);
393+
ctx.getFileManager().addOutput(action, "Cell Hashing Report", html);
348394

349-
Map<String, Object> callMap = parseOutputTable(ctx.getLogger(), htoCalls, unknownBarcodes, ctx.getSourceDirectory(), ctx.getWorkingDirectory(), true);
350-
callMap.put("htoCalls", htoCalls);
351-
callMap.put("html", html);
395+
callMap.putAll(parseOutputTable(ctx.getLogger(), htoCalls, unknownBarcodes, ctx.getSourceDirectory(), ctx.getWorkingDirectory(), true));
396+
callMap.put("htoCalls", htoCalls);
397+
callMap.put("html", html);
398+
}
352399

353400
return callMap;
354401
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package org.labkey.sequenceanalysis.analysis;
2+
3+
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
4+
5+
public class CiteSeqHandler extends CellHashingHandler
6+
{
7+
private static final String DEFAULT_TAG_GROUP = "TotalSeq-C";
8+
9+
public CiteSeqHandler()
10+
{
11+
super("CITE-Seq Count", "This will run CITE-Seq Count to generate a table of features counts from CITE-Seq", CellHashingHandler.getDefaultParams(false, DEFAULT_TAG_GROUP));
12+
}
13+
14+
@Override
15+
public boolean canProcess(SequenceOutputFile o)
16+
{
17+
return false;
18+
}
19+
20+
@Override
21+
public boolean doRunRemote()
22+
{
23+
return true;
24+
}
25+
26+
@Override
27+
public boolean doRunLocal()
28+
{
29+
return false;
30+
}
31+
32+
@Override
33+
public boolean requiresSingleGenome()
34+
{
35+
return false;
36+
}
37+
38+
@Override
39+
public boolean doSplitJobs()
40+
{
41+
return true;
42+
}
43+
44+
@Override
45+
public SequenceReadsetProcessor getProcessor()
46+
{
47+
return new Processor(false);
48+
}
49+
}

0 commit comments

Comments
 (0)