Skip to content

Commit 4501fe3

Browse files
committed
Add ability to conditionally use multiseq on large datasets
1 parent 23c3a30 commit 4501fe3

File tree

2 files changed

+56
-4
lines changed

2 files changed

+56
-4
lines changed

singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ public static CellHashingService.CellHashingParameters createFromStep(SequenceOu
153153
if (methodStr2 != null)
154154
{
155155
ret.consensusMethods = extractMethods(methodStr2);
156-
if (!ret.methods.containsAll(ret.consensusMethods))
156+
if (!new HashSet<>(ret.methods).containsAll(ret.consensusMethods))
157157
{
158158
throw new PipelineJobException("All consensusMethods must be present in methods: " + methodStr2);
159159
}
@@ -189,7 +189,7 @@ public static CellHashingParameters createFromJson(BARCODE_TYPE type, File webse
189189

190190
if (ret.consensusMethods != null && !ret.consensusMethods.isEmpty())
191191
{
192-
if (!ret.methods.containsAll(ret.consensusMethods))
192+
if (!new HashSet<>(ret.methods).containsAll(ret.consensusMethods))
193193
{
194194
throw new PipelineJobException("All consensusMethods must be present in methods: " + ret.consensusMethods.stream().map(CALLING_METHOD::name).collect(Collectors.joining(",")));
195195
}
@@ -326,6 +326,7 @@ public Set<String> getAllowableBarcodeNames() throws PipelineJobException
326326
public enum CALLING_METHOD
327327
{
328328
multiseq(true, false),
329+
multiseqOnLargeData(true, true, false, 10000, "multiseq"),
329330
htodemux(false, false),
330331
dropletutils(true, true),
331332
gmm_demux(true, true),
@@ -337,17 +338,26 @@ public enum CALLING_METHOD
337338
boolean isDefaultRun;
338339
boolean isDefaultConsensus;
339340
boolean requiresH5;
341+
int minCells;
342+
String label;
340343

341344
CALLING_METHOD(boolean isDefaultRun, boolean isDefaultConsensus)
342345
{
343346
this(isDefaultRun, isDefaultConsensus, false);
344347
}
345348

346349
CALLING_METHOD(boolean isDefaultRun, boolean isDefaultConsensus, boolean requiresH5)
350+
{
351+
this(isDefaultRun, isDefaultConsensus, requiresH5, 0, null);
352+
}
353+
354+
CALLING_METHOD(boolean isDefaultRun, boolean isDefaultConsensus, boolean requiresH5, int minCells, String label)
347355
{
348356
this.isDefaultRun = isDefaultRun;
349357
this.isDefaultConsensus = isDefaultConsensus;
350358
this.requiresH5 = requiresH5;
359+
this.minCells = minCells;
360+
this.label = label;
351361
}
352362

353363
public boolean isDefaultRun()
@@ -360,6 +370,16 @@ public boolean isDefaultConsensus()
360370
return isDefaultConsensus;
361371
}
362372

373+
public int getMinCells()
374+
{
375+
return minCells;
376+
}
377+
378+
public String getLabel()
379+
{
380+
return label == null ? name() : label;
381+
}
382+
363383
public boolean isRequiresH5()
364384
{
365385
return requiresH5;

singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
import java.util.concurrent.atomic.AtomicBoolean;
7070
import java.util.concurrent.atomic.AtomicInteger;
7171
import java.util.stream.Collectors;
72+
import java.util.stream.Stream;
7273

7374
import static org.labkey.singlecell.run.CellRangerGexCountStep.LOUPE_CATEGORY;
7475

@@ -1214,13 +1215,44 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St
12141215
File localRScript = new File(outputDir, "generateCallsWrapper.R");
12151216
try (PrintWriter writer = PrintWriters.getPrintWriter(localRScript))
12161217
{
1217-
List<String> methodNames = parameters.methods.stream().map(Enum::name).collect(Collectors.toList());
1218-
List<String> consensusMethodNames = parameters.consensusMethods == null ? Collections.emptyList() : parameters.consensusMethods.stream().map(Enum::name).collect(Collectors.toList());
12191218
String cellbarcodeWhitelist = cellBarcodeWhitelistFile != null ? "'" + cellBarcodeWhitelistFile.getPath() + "'" : "NULL";
1219+
long totalCellBarcodes;
1220+
if (cellBarcodeWhitelistFile != null)
1221+
{
1222+
try (Stream<String> st = Files.lines(cellBarcodeWhitelistFile.toPath()))
1223+
{
1224+
totalCellBarcodes = st.count();
1225+
}
1226+
}
1227+
else
1228+
{
1229+
totalCellBarcodes = 99999L;
1230+
}
1231+
ctx.getLogger().debug("Total input cell barcodes: " + totalCellBarcodes);
12201232

12211233
Set<String> allowableBarcodes = parameters.getAllowableBarcodeNames();
12221234
String allowableBarcodeParam = allowableBarcodes != null ? "c('" + StringUtils.join(allowableBarcodes, "','") + "')" : "NULL";
12231235

1236+
List<String> methodNames = parameters.methods.stream().filter(m -> {
1237+
if (totalCellBarcodes > m.getMinCells())
1238+
{
1239+
ctx.getLogger().debug("Dropping method due to insufficient cells: " + m.name());
1240+
return false;
1241+
}
1242+
1243+
return true;
1244+
}).map(CALLING_METHOD::getLabel).distinct().toList();
1245+
1246+
List<String> consensusMethodNames = parameters.consensusMethods == null ? Collections.emptyList() : parameters.consensusMethods.stream().filter(m -> {
1247+
if (totalCellBarcodes > m.getMinCells())
1248+
{
1249+
ctx.getLogger().debug("Dropping consensus method due to insufficient cells: " + m.name());
1250+
return false;
1251+
}
1252+
1253+
return true;
1254+
}).map(CALLING_METHOD::getLabel).distinct().toList();
1255+
12241256
String skipNormalizationQcString = parameters.skipNormalizationQc ? "TRUE" : "FALSE";
12251257
String keepMarkdown = parameters.keepMarkdown ? "TRUE" : "FALSE";
12261258
String doTSNE = parameters.doTSNE ? "TRUE" : "FALSE";

0 commit comments

Comments
 (0)