Skip to content

Commit ddc5ffb

Browse files
committed
- Expand SDA plotting
- Allow TCR cell hashing to use demuxEM
1 parent 6d937c6 commit ddc5ffb

File tree

4 files changed

+104
-41
lines changed

4 files changed

+104
-41
lines changed

singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ static public void setInstance(CellHashingService instance)
4747
_instance = instance;
4848
}
4949

50-
abstract public void prepareHashingForVdjIfNeeded(File sourceDir, PipelineJob job, SequenceAnalysisJobSupport support, String filterField, final boolean failIfNoHashingReadset) throws PipelineJobException;
50+
abstract public void prepareHashingForVdjIfNeeded(SequenceOutputHandler.JobContext ctx, final boolean failIfNoHashingReadset) throws PipelineJobException;
5151

5252
abstract public File generateHashingCallsForRawMatrix(Readset parentReadset, PipelineOutputTracker output, SequenceOutputHandler.JobContext ctx, CellHashingParameters parameters, File rawCountMatrixDir) throws PipelineJobException;
5353

singlecell/resources/chunks/RunSDA.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ sdaFiles <- data.frame(DatasetId = character(), FileName = character())
1919
outputFileId <- ifelse(datasetId %in% names(datasetIdToOutputFileId), yes = datasetIdToOutputFileId[[datasetId]], no = NA)
2020
sdaResults$OutputFileId <- outputFileId
2121

22+
if (!all(is.null(fieldsToPlot))) {
23+
PlotSdaCellScores(seuratObj, sdaResults, fieldNames = fieldNames)
24+
}
25+
2226
fileName <- paste0('sda.', datasetId, '.rds')
2327
saveRDS(sdaResults, file = fileName)
2428

singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java

Lines changed: 89 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,13 @@ public static CellHashingServiceImpl get()
9090
return _instance;
9191
}
9292

93+
private static String TCR_FIELD = "tcrReadsetId";
94+
9395
@Override
94-
public void prepareHashingForVdjIfNeeded(File sourceDir, PipelineJob job, SequenceAnalysisJobSupport support, String filterField, final boolean failIfNoHashingReadset) throws PipelineJobException
96+
public void prepareHashingForVdjIfNeeded(SequenceOutputHandler.JobContext ctx, final boolean failIfNoHashingReadset) throws PipelineJobException
9597
{
96-
prepareHashingAndCiteSeqFilesIfNeeded(sourceDir, job, support, filterField, failIfNoHashingReadset, false, true, true, false, false);
98+
boolean useDemEM = ctx.getParams().optString("methods", "").contains(CellHashingService.CALLING_METHOD.demuxem.name());
99+
prepareHashingAndCiteSeqFilesIfNeeded(ctx.getOutputDir(), ctx.getJob(), ctx.getSequenceSupport(), TCR_FIELD, failIfNoHashingReadset, false, true, true, false, useDemEM);
97100
}
98101

99102
public void prepareHashingAndCiteSeqFilesForFeatureCountsIfNeeded(File sourceDir, PipelineJob job, SequenceAnalysisJobSupport support, String filterField, final boolean failIfNoHashingReadset, final boolean failIfNoCiteSeqReadset) throws PipelineJobException
@@ -133,6 +136,7 @@ public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob jo
133136
HashMap<String, Integer> gexReadsetToH5Map = new HashMap<>();
134137
HashMap<Integer, Integer> readsetToHashingMap = new HashMap<>();
135138
HashMap<Integer, Integer> readsetToCiteSeqMap = new HashMap<>();
139+
HashMap<Integer, Integer> readsetToGexMap = new HashMap<>();
136140
HashMap<Integer, Set<String>> gexToPanels = new HashMap<>();
137141

138142
List<Readset> cachedReadsets = support.getCachedReadsets();
@@ -227,10 +231,13 @@ public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob jo
227231
readsetToCiteSeqMap.put(rs.getReadsetId(), results.getInt(FieldKey.fromString("citeseqReadsetId")));
228232
}
229233
}
234+
235+
readsetToGexMap.put(rs.getReadsetId(), results.getInt(FieldKey.fromString("readsetId")));
230236
});
231237

232238
job.getLogger().debug("total readset to hashing pairs: " + readsetToHashingMap.size());
233239
job.getLogger().debug("total readset to cite-seq pairs: " + readsetToCiteSeqMap.size());
240+
job.getLogger().debug("total readset to GEX pairs: " + readsetToGexMap.size());
234241

235242
if (hasError.get())
236243
{
@@ -247,42 +254,7 @@ public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob jo
247254

248255
if (doH5Caching)
249256
{
250-
job.getLogger().debug("Caching H5 Files");
251-
TableInfo ti = QueryService.get().getUserSchema(job.getUser(), job.getContainer().isWorkbook() ? job.getContainer().getParent() : job.getContainer(), SingleCellSchema.SEQUENCE_SCHEMA_NAME).getTable("outputfiles");
252-
Set<Integer> cachedGenomes = support.getCachedGenomes().stream().map(ReferenceGenome::getGenomeId).collect(Collectors.toSet());
253-
for (int readsetId : uniqueGex)
254-
{
255-
for (int genomeId : cachedGenomes)
256-
{
257-
SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), readsetId, CompareType.EQUAL);
258-
filter.addCondition(FieldKey.fromString("library_id"), genomeId, CompareType.EQUAL);
259-
filter.addCondition(FieldKey.fromString("category"), LOUPE_CATEGORY, CompareType.EQUAL);
260-
261-
TableSelector ts = new TableSelector(ti, PageFlowUtil.set("dataid"), filter, new org.labkey.api.data.Sort("-rowid"));
262-
if (ts.exists())
263-
{
264-
List<Integer> dataIds = ts.getArrayList(Integer.class);
265-
int dataId = dataIds.get(0);
266-
if (dataIds.size() > 1)
267-
{
268-
job.getLogger().info("More than one loupe file found for readset " + readsetId + " with genome: "+ genomeId + ". Using the most recent: " + dataId);
269-
}
270-
271-
ExpData d = ExperimentService.get().getExpData(dataId);
272-
if (d == null)
273-
{
274-
throw new PipelineJobException("Unable to find exp data: " + dataId);
275-
}
276-
277-
support.cacheExpData(d);
278-
gexReadsetToH5Map.put(readsetId + "-" + genomeId, dataId);
279-
}
280-
else
281-
{
282-
job.getLogger().warn("Unable to find loupe file for readset: " + readsetId + " with genome: " + genomeId);
283-
}
284-
}
285-
}
257+
gexReadsetToH5Map.putAll(cacheH5Files(job, support, uniqueGex, readsetToGexMap));
286258
}
287259

288260
// if distinct HTOs is 1, no point in running hashing. note: presence of hashing readsets is a trigger downstream
@@ -443,6 +415,85 @@ else if (distinctHTOs.size() == 1)
443415
}
444416
}
445417

418+
private Map<String, Integer> cacheH5Files(PipelineJob job, SequenceAnalysisJobSupport support, Collection<Integer> uniqueGex, Map<Integer, Integer> readsetToGexMap) throws PipelineJobException
419+
{
420+
job.getLogger().debug("Caching H5 Files");
421+
Map<String, Integer> gexReadsetToH5Map = new HashMap<>();
422+
423+
TableInfo ti = QueryService.get().getUserSchema(job.getUser(), job.getContainer().isWorkbook() ? job.getContainer().getParent() : job.getContainer(), SingleCellSchema.SEQUENCE_SCHEMA_NAME).getTable("outputfiles");
424+
Set<Integer> cachedGenomes = support.getCachedGenomes().stream().map(ReferenceGenome::getGenomeId).collect(Collectors.toSet());
425+
426+
for (int readsetId : readsetToGexMap.keySet())
427+
{
428+
boolean isGEX = uniqueGex.contains(readsetId);
429+
int gexReadset = readsetToGexMap.get(readsetId);
430+
431+
SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), gexReadset, CompareType.EQUAL);
432+
filter.addCondition(FieldKey.fromString("category"), LOUPE_CATEGORY, CompareType.EQUAL);
433+
434+
int gexGenomeId;
435+
if (isGEX)
436+
{
437+
if (cachedGenomes.size() > 1)
438+
{
439+
throw new PipelineJobException("demuxEM was selected, but more than one cached genome found, cannot infer correct genome. Found: " + StringUtils.join(cachedGenomes, ", "));
440+
}
441+
442+
gexGenomeId = cachedGenomes.iterator().next();
443+
filter.addCondition(FieldKey.fromString("library_id"), gexGenomeId, CompareType.EQUAL);
444+
}
445+
else
446+
{
447+
job.getLogger().debug("Readset is not GEX, attempting to infer the loupe file genome");
448+
HashSet<Integer> genomeIds = new HashSet<>(new TableSelector(ti, PageFlowUtil.set("library_id"), filter, new org.labkey.api.data.Sort("-rowid")).getArrayList(Integer.class));
449+
if (genomeIds.isEmpty())
450+
{
451+
throw new PipelineJobException("demuxEM was selected, but no suitable loupe files were found for GEX readset: " + gexReadset);
452+
}
453+
else if (genomeIds.size() > 1)
454+
{
455+
throw new PipelineJobException("demuxEM was selected. Attempting to identify loupe files using GEX readset: " + gexReadset + ", but more than one genome found. Found: " + StringUtils.join(cachedGenomes, ", "));
456+
}
457+
458+
gexGenomeId = genomeIds.iterator().next();
459+
filter.addCondition(FieldKey.fromString("library_id"), gexGenomeId, CompareType.EQUAL);
460+
}
461+
462+
TableSelector ts = new TableSelector(ti, PageFlowUtil.set("dataid"), filter, new org.labkey.api.data.Sort("-rowid"));
463+
if (ts.exists())
464+
{
465+
List<Integer> dataIds = ts.getArrayList(Integer.class);
466+
int dataId = dataIds.get(0);
467+
if (dataIds.size() > 1)
468+
{
469+
job.getLogger().info("More than one loupe file found for GEX readset " + gexReadset + " with genome: "+ gexGenomeId + ". Using the most recent: " + dataId);
470+
}
471+
472+
ExpData d = ExperimentService.get().getExpData(dataId);
473+
if (d == null)
474+
{
475+
throw new PipelineJobException("Unable to find exp data: " + dataId);
476+
}
477+
478+
support.cacheExpData(d);
479+
480+
if (cachedGenomes.size() > 1)
481+
{
482+
throw new PipelineJobException("demuxEM was selected, but more than one cached genome found, cannot infer correct genome. Found: " + StringUtils.join(cachedGenomes, ", "));
483+
}
484+
485+
// NOTE: cache this using the source file's genome ID (which might be the TCR library), rather than the GEX genome
486+
gexReadsetToH5Map.put(readsetId + "-" + cachedGenomes.iterator().next(), dataId);
487+
}
488+
else
489+
{
490+
job.getLogger().warn("Unable to find loupe file for GEX readset: " + gexReadset + " with genome: " + gexGenomeId);
491+
}
492+
}
493+
494+
return gexReadsetToH5Map;
495+
}
496+
446497
public File getValidCiteSeqBarcodeFile(File sourceDir, int gexReadsetId)
447498
{
448499
return new File(sourceDir, "validADTS." + gexReadsetId + ".csv");

singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunSDA.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider;
1010
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
1111
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
12+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
1213
import org.labkey.api.singlecell.pipeline.SeuratToolParameter;
1314
import org.labkey.api.singlecell.pipeline.SingleCellStep;
1415

@@ -62,8 +63,15 @@ public Provider()
6263
}}, 10000),
6364
SeuratToolParameter.create(SEURAT_THREADS, "Max Threads", "The number of threads to use. Cannot be higher than the threads allocated to the job.", "ldk-integerfield", new JSONObject(){{
6465
put("minValue", 0);
65-
}}, 8)
66-
), Arrays.asList("/sequenceanalysis/field/TrimmingTextArea.js"), null);
66+
}}, 8),
67+
ToolParameterDescriptor.create("storeGoEnrichment", "Perform/Store GO Enrichment", null, "checkbox", null, true),
68+
SeuratToolParameter.create("fieldNames", "Fields To Plot", "Enter one field name per line", "sequenceanalysis-trimmingtextarea", new JSONObject(){{
69+
put("allowBlank", false);
70+
put("height", 150);
71+
put("delimiter", ",");
72+
put("stripCharsRe", "/['\"]/g");
73+
}}, null, null, true).delimiter(",")
74+
), Arrays.asList("/sequenceanalysis/field/TrimmingTextArea.js"), null);
6775
}
6876

6977

0 commit comments

Comments
 (0)