Skip to content

Commit b970a7d

Browse files
committed
Generalize error handling in seurat steps and make it easier to debug
1 parent a7cdde0 commit b970a7d

File tree

12 files changed

+60
-47
lines changed

12 files changed

+60
-47
lines changed

singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import java.io.File;
2424
import java.io.IOException;
2525
import java.io.PrintWriter;
26+
import java.nio.file.Files;
2627
import java.util.ArrayList;
2728
import java.util.Arrays;
2829
import java.util.Collection;
@@ -251,7 +252,7 @@ protected void executeR(SequenceOutputHandler.JobContext ctx, File rmd, String o
251252

252253
executeR(ctx, getDockerContainerName(), outputPrefix, lines);
253254

254-
handlePossibleFailure(ctx);
255+
handlePossibleFailure(ctx, outputPrefix);
255256
}
256257

257258
public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerContainerName, String outputPrefix, List<String> lines) throws PipelineJobException
@@ -526,7 +527,7 @@ protected void onFailure(SequenceOutputHandler.JobContext ctx) throws PipelineJo
526527
// This allows subclasses to implement tool-specific failure handling
527528
}
528529

529-
private void handlePossibleFailure(SequenceOutputHandler.JobContext ctx) throws PipelineJobException
530+
private void handlePossibleFailure(SequenceOutputHandler.JobContext ctx, String outputPrefix) throws PipelineJobException
530531
{
531532
File errorFile = getSeuratErrorFile(ctx);
532533
if (errorFile.exists())
@@ -538,6 +539,19 @@ private void handlePossibleFailure(SequenceOutputHandler.JobContext ctx) throws
538539

539540
onFailure(ctx);
540541

542+
File html = getExpectedHtmlFile(ctx, outputPrefix);
543+
if (html.exists())
544+
{
545+
ctx.getLogger().info("Copying HTML locally for debugging: ", html.getName());
546+
File target = new File(ctx.getSourceDirectory(), html.getName());
547+
if (target.exists())
548+
{
549+
target.delete();
550+
}
551+
552+
Files.copy(html.toPath(), target.toPath());
553+
}
554+
541555
throw new PipelineJobException(getProvider().getName() + " Errors: " + StringUtils.join(errors, ";"));
542556
}
543557
catch (IOException e)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package org.labkey.api.singlecell.pipeline;
2+
3+
/**
4+
* NOTE: this is a special class designed to encompass the steps related to processing raw count data from the 10x/loupe matrix
5+
*/
6+
public interface SingleCellRawDataStep extends SingleCellStep
7+
{
8+
public final String STEP_TYPE = "singleCellRawData";
9+
}

singlecell/api-src/org/labkey/api/singlecell/pipeline/SingleCellStep.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
public interface SingleCellStep extends PipelineStep
1717
{
1818
public static final String STEP_TYPE = "singleCell";
19-
public static final String SEURAT_PROCESSING = "seuratProcessing";
2019

2120
public Collection<String> getRLibraries();
2221

singlecell/resources/chunks/Functions.R

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,10 @@ saveData <- function(seuratObj, datasetId) {
8686
}
8787

8888
intermediateFiles <- c()
89-
addIntermediateFile <- function(f) { intermediateFiles <<- c(intermediateFiles, f) }
89+
addIntermediateFile <- function(f) { intermediateFiles <- c(intermediateFiles, f) }
90+
91+
errorMessages <- c()
92+
addErrorMessage <- function(f) { errorMessages <- c(errorMessages, f) }
9093

9194
print('Updating future.globals.maxSize')
9295
options(future.globals.maxSize = Inf)

singlecell/resources/chunks/PrepareRawCounts.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ for (datasetId in names(seuratObjects)) {
55
seuratObj <- CellMembrane::ReadAndFilter10xData(dataDir = rawCountDir, datasetId = datasetId, datasetName = datasetName, emptyDropsLower = emptyDropsLower, emptyDropsFdrThreshold = emptyDropsFdrThreshold, useEmptyDropsCellRanger = useEmptyDropsCellRanger, nExpectedCells = nExpectedCells)
66

77
if (!is.null(maxAllowableCells) && ncol(seuratObj) > maxAllowableCells) {
8-
stop(paste0('The seurat object has ', ncol(seuratObj), ' cells, which is more than the max allowable cells (', maxAllowableCells, '). Please review emptyDrops results as this probably means thresholds were suboptimal.'))
8+
addErrorMessage(paste0('The seurat object has ', ncol(seuratObj), ' cells, which is more than the max allowable cells (', maxAllowableCells, '). Please review emptyDrops results as this probably means thresholds were suboptimal.'))
99
}
1010

1111
saveData(seuratObj, datasetId)

singlecell/resources/chunks/RunCellHashing.R

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
errors <- c()
21
for (datasetId in names(seuratObjects)) {
32
seuratObj <- readRDS(seuratObjects[[datasetId]])
43

@@ -13,12 +12,12 @@ for (datasetId in names(seuratObjects)) {
1312

1413
fractionFailedHashing <- 1 - (sum(seuratObj@meta.data$HTO.Classification %in% c('Singlet', 'Doublet')) / nrow(seuratObj@meta.data))
1514
if (!is.null(maxHashingPctFail) && fractionFailedHashing > maxHashingPctFail) {
16-
errors <- c(errors, paste0('Fraction failing cell hashing was : ', fractionFailedHashing, ' for dataset: ', datasetId, ', above threshold of: ', maxHashingPctFail))
15+
addErrorMessage(paste0('Fraction failing cell hashing was : ', fractionFailedHashing, ' for dataset: ', datasetId, ', above threshold of: ', maxHashingPctFail))
1716
}
1817

1918
fractionDiscordantHashing <- 1 - (sum(seuratObj@meta.data$HTO.Classification == 'Discordant') / nrow(seuratObj@meta.data))
2019
if (!is.null(maxHashingPctDiscordant) && fractionDiscordantHashing > maxHashingPctDiscordant) {
21-
errors <- c(errors, paste0('Discordant hashing rate was: ', fractionDiscordantHashing, ' for dataset: ', datasetId, ', above threshold of: ', maxHashingPctDiscordant))
20+
addErrorMessage(paste0('Discordant hashing rate was: ', fractionDiscordantHashing, ' for dataset: ', datasetId, ', above threshold of: ', maxHashingPctDiscordant))
2221
}
2322
} else {
2423
# Add empty columns to keep objects consistent
@@ -45,13 +44,4 @@ for (datasetId in names(seuratObjects)) {
4544
# Cleanup
4645
rm(seuratObj)
4746
gc()
48-
}
49-
50-
if (length(errors) > 0) {
51-
print('There were errors:')
52-
for (msg in errors) {
53-
print(msg)
54-
}
55-
56-
write(errors, file = 'seuratErrors.txt')
5747
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
11
if (length(intermediateFiles) > 0) {
22
write.table(data.frame(file = intermediateFiles), file = 'intermediateFiles.txt', quote = FALSE, delim = '\t', row.names = FALSE, col.names = FALSE)
3+
}
4+
5+
if (length(errorMessages) > 0) {
6+
print('There were errors:')
7+
for (msg in errorMessages) {
8+
print(msg)
9+
}
10+
11+
write(errorMessages, file = 'seuratErrors.txt')
312
}
Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,46 @@
11
metricData <- data.frame(dataId = integer(), readsetId = integer(), metricname = character(), metricvalue = numeric())
22

3-
errors <- c()
43
for (datasetId in names(seuratObjects)) {
54
seuratObj <- readRDS(seuratObjects[[datasetId]])
65
metricData <- rbind(metricData, data.frame(dataId = datasetId, readsetId = datasetIdToReadset[[datasetId]], metricname = 'TotalCells', metricvalue = ncol(seuratObj)))
76

87
if (length(unique(seuratObj$DatasetId)) > 1) {
9-
errors <- c(errors, paste0('Seurat data prototypes must be a single dataset. Problem ID: ', datasetId))
8+
addErrorMessage(paste0('Seurat data prototypes must be a single dataset. Problem ID: ', datasetId))
109
}
1110

1211
if (is.null(usesHashing[[datasetId]])) {
13-
errors <- c(errors, paste0('No hashing context provided for: ', datasetId))
12+
addErrorMessage(paste0('No hashing context provided for: ', datasetId))
1413
break
1514
}
1615

1716
if (usesHashing[[datasetId]]) {
1817
if (requireHashing && !('HTO.Classification' %in% names(seuratObj@meta.data))) {
19-
errors <- c(errors, paste0('Missing cell hashing calls for dataset: ', datasetId))
18+
addErrorMessage(paste0('Missing cell hashing calls for dataset: ', datasetId))
2019
}
2120

2221
fractionFailedHashing <- 1 - (sum(seuratObj@meta.data$HTO.Classification %in% c('Singlet', 'Doublet')) / nrow(seuratObj@meta.data))
2322
if (!is.null(maxHashingPctFail) && fractionFailedHashing > maxHashingPctFail) {
24-
errors <- c(errors, paste0('Fraction failing cell hashing was : ', fractionFailedHashing, ' for dataset: ', datasetId, ', above threshold of: ', maxHashingPctFail))
23+
addErrorMessage(paste0('Fraction failing cell hashing was : ', fractionFailedHashing, ' for dataset: ', datasetId, ', above threshold of: ', maxHashingPctFail))
2524
}
2625

2726
metricData <- rbind(metricData, data.frame(dataId = datasetId, readsetId = datasetIdToReadset[[datasetId]], metricname = 'FractionFailedHashing', metricvalue = fractionFailedHashing))
2827

2928
fractionDiscordantHashing <- 1 - (sum(seuratObj@meta.data$HTO.Classification == 'Discordant') / nrow(seuratObj@meta.data))
3029
if (!is.null(maxHashingPctDiscordant) && fractionDiscordantHashing > maxHashingPctDiscordant) {
31-
errors <- c(errors, paste0('Discordant hashing rate was: ', fractionDiscordantHashing, ' for dataset: ', datasetId, ', above threshold of: ', maxHashingPctDiscordant))
30+
addErrorMessage(paste0('Discordant hashing rate was: ', fractionDiscordantHashing, ' for dataset: ', datasetId, ', above threshold of: ', maxHashingPctDiscordant))
3231
}
3332

3433
metricData <- rbind(metricData, data.frame(dataId = datasetId, readsetId = datasetIdToReadset[[datasetId]], metricname = 'FractionDiscordantHashing', metricvalue = fractionDiscordantHashing))
3534
}
3635

3736
if (is.null(usesCiteSeq[[datasetId]])) {
38-
errors <- c(errors, paste0('No CITE-seq context provided for: ', datasetId))
37+
addErrorMessage(paste0('No CITE-seq context provided for: ', datasetId))
3938
break
4039
}
4140

4241
if (usesCiteSeq[[datasetId]] && requireCiteSeq) {
4342
if (!'ADT' %in% names(seuratObj@assays)) {
44-
errors <- c(errors, paste0('Missing ADT data for dataset: ', datasetId))
43+
addErrorMessage(paste0('Missing ADT data for dataset: ', datasetId))
4544
}
4645

4746
fractionADTGT0 <- sum(seuratObj@meta.data$nCount_ADT > 0) / nrow(seuratObj@meta.data)
@@ -52,29 +51,29 @@ for (datasetId in names(seuratObjects)) {
5251
}
5352

5453
if (requireSaturation && !'Saturation.RNA' %in% names(seuratObj@meta.data)) {
55-
errors <- c(errors, paste0('Missing per-cell RNA saturation data for dataset: ', datasetId))
54+
addErrorMessage(paste0('Missing per-cell RNA saturation data for dataset: ', datasetId))
5655
}
5756

5857
if (!is.null(minSaturation)) {
5958
if (!'Saturation.RNA' %in% names(seuratObj@meta.data)) {
60-
errors <- c(errors, paste0('Min saturation provided, but missing per-cell RNA saturation data for dataset: ', datasetId))
59+
addErrorMessage(paste0('Min saturation provided, but missing per-cell RNA saturation data for dataset: ', datasetId))
6160
}
6261
}
6362

6463
if ('Saturation.RNA' %in% names(seuratObj@meta.data)) {
6564
meanSaturation.RNA <- mean(seuratObj$Saturation.RNA)
6665
if (!is.null(minSaturation) && meanSaturation.RNA < minSaturation) {
67-
errors <- c(errors, paste0('Mean RNA saturation was: ', meanSaturation.RNA, ' for dataset: ', datasetId, ', below threshold of: ', minSaturation))
66+
addErrorMessage(paste0('Mean RNA saturation was: ', meanSaturation.RNA, ' for dataset: ', datasetId, ', below threshold of: ', minSaturation))
6867
}
6968

7069
metricData <- rbind(metricData, data.frame(dataId = datasetId, readsetId = datasetIdToReadset[[datasetId]], metricname = 'MeanSaturation.RNA', metricvalue = meanSaturation.RNA))
7170
}
7271

7372
if (requireSingleR && !'dice.label' %in% names(seuratObj@meta.data)) {
74-
errors <- c(errors, paste0('Missing SingleR DICE labels for dataset: ', datasetId))
73+
addErrorMessage(paste0('Missing SingleR DICE labels for dataset: ', datasetId))
7574
}
7675

77-
if (length(errors) > 0) {
76+
if (length(errorMessages) > 0) {
7877
break
7978
}
8079

@@ -90,15 +89,6 @@ for (datasetId in names(seuratObjects)) {
9089
gc()
9190
}
9291

93-
if (length(errors) > 0) {
94-
print('There were errors:')
95-
for (msg in errors) {
96-
print(msg)
97-
}
98-
99-
write(errors, file = 'seuratErrors.txt')
100-
}
101-
10292
if (nrow(metricData) > 0) {
10393
write.table(metricData, file = 'seurat.metrics.txt', sep = '\t', quote = F, row.names = F, col.names = F)
10494
}

singlecell/resources/chunks/SubsetSeurat.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,5 @@ for (datasetId in names(seuratObjects)) {
2121
}
2222

2323
if (totalPassed == 0) {
24-
stop('No cells remained in any seurat objects after subsetting')
24+
addErrorMessage('No cells remained in any seurat objects after subsetting')
2525
}

singlecell/resources/web/singlecell/panel/SingleCellProcessingPanel.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,12 +218,12 @@ Ext4.define('SingleCell.panel.SingleCellProcessingPanel', {
218218
items.push({
219219
xtype: 'sequenceanalysis-analysissectionpanel',
220220
title: 'Prepare Raw Data',
221-
stepType: 'singleCell',
221+
stepType: 'singleCellRawData',
222222
singleTool: true,
223223
comboValue: 'PrepareRawCounts',
224224
sectionDescription: 'This section allows you to control the parsing of the raw 10x count data',
225225
toolConfig: {
226-
singleCell: [{
226+
singleCellRawData: [{
227227
description: 'Options related to processing the 10x matrix into a seurat object',
228228
label: 'Prepare Raw Counts',
229229
name: 'PrepareRawCounts',

0 commit comments

Comments
 (0)