Skip to content

Commit 6b6efeb

Browse files
committed
Allow saturation calculation to run on merged objects, and to run on ADT/HTO data
1 parent 92d7e61 commit 6b6efeb

File tree

3 files changed

+137
-17
lines changed

3 files changed

+137
-17
lines changed

singlecell/resources/chunks/AppendSaturation.R

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,12 @@
11
for (datasetId in names(seuratObjects)) {
22
seuratObj <- seuratObjects[[datasetId]]
33
seuratObjects[[datasetId]] <- NULL
4-
molInfoFile <- molInfoFiles[[datasetId]]
5-
if (is.null(molInfoFile)) {
6-
stop(paste0('Unable to find molInfo file for: ', datasetId))
7-
}
84

95
if (!'DatasetId' %in% names(seuratObj@meta.data)) {
106
stop('Seurat object lacks a DatasetId field!')
117
}
128

13-
datasetId <- unique(seuratObj$DatasetId)
14-
if (length(datasetId) != 1) {
15-
stop('Saturation can only be computed from single-dataset seurat objects!')
16-
}
17-
18-
seuratObj <- CellMembrane::AppendPerCellSaturation(seuratObj, molInfoFile = molInfoFile, cellbarcodePrefix = paste0(datasetId, '_'))
19-
9+
seuratObj <- CellMembrane::AppendPerCellSaturationInBulk(seuratObj, molInfoList = molInfoFiles[[datasetId]])
2010
newSeuratObjects[[datasetId]] <- seuratObj
2111

2212
# Cleanup

singlecell/resources/chunks/SaveData.R

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ for (datasetId in names(newSeuratObjects)) {
1919
savedFiles <- rbind(savedFiles, data.frame(datasetId = datasetId, datasetName = datasetName, filename = fn, outputFileId = outputFileId))
2020

2121
# Write cell barcodes and metadata:
22-
write.table(seuratObj@meta.data, file = metaFile, quote = F, row.names = F, sep = ',', col.names = F)
22+
metaDf <- seuratObj@meta.data
23+
metaDf$cellbarcode <- colnames(seuratObj)
24+
write.table(metaDf, file = metaFile, quote = F, row.names = F, sep = ',', col.names = T)
2325
write.table(data.frame(CellBarcode = colnames(seuratObj)), file = barcodeFile, quote = F, row.names = F, sep = ',', col.names = F)
2426
}
2527

singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendSaturation.java

Lines changed: 133 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,34 @@
33
import au.com.bytecode.opencsv.CSVReader;
44
import au.com.bytecode.opencsv.CSVWriter;
55
import org.apache.commons.io.FileUtils;
6+
import org.apache.commons.lang3.StringUtils;
7+
import org.labkey.api.data.Container;
8+
import org.labkey.api.data.SimpleFilter;
9+
import org.labkey.api.data.Sort;
10+
import org.labkey.api.data.TableSelector;
11+
import org.labkey.api.pipeline.PipelineJob;
612
import org.labkey.api.pipeline.PipelineJobException;
13+
import org.labkey.api.query.FieldKey;
14+
import org.labkey.api.query.QueryService;
715
import org.labkey.api.reader.Readers;
816
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
917
import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider;
1018
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
1119
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
1220
import org.labkey.api.singlecell.pipeline.SingleCellStep;
21+
import org.labkey.api.util.PageFlowUtil;
1322
import org.labkey.api.writer.PrintWriters;
23+
import org.labkey.singlecell.SingleCellSchema;
1424

1525
import java.io.File;
1626
import java.io.IOException;
1727
import java.util.ArrayList;
1828
import java.util.Arrays;
29+
import java.util.HashMap;
30+
import java.util.HashSet;
1931
import java.util.List;
32+
import java.util.Map;
33+
import java.util.Set;
2034

2135
import static org.labkey.singlecell.analysis.ProcessSingleCellHandler.LOUPE_TYPE;
2236

@@ -48,20 +62,72 @@ public void init(SequenceOutputHandler.JobContext ctx, List<SequenceOutputFile>
4862
{
4963
try (CSVWriter writer = new CSVWriter(PrintWriters.getPrintWriter(getMolInfoTable(ctx))))
5064
{
65+
Map<Integer, SequenceOutputFile> loupeOutputs = new HashMap<>();
5166
for (SequenceOutputFile so : inputFiles)
5267
{
5368
if (!LOUPE_TYPE.isType(so.getFile()))
5469
{
55-
throw new PipelineJobException("All input files must be loupe files to use sequence saturation");
70+
File meta = new File(so.getFile().getPath().replaceAll(".rds", ".cellBarcodes.csv"));
71+
if (!meta.exists())
72+
{
73+
throw new PipelineJobException("Cannot find expected metadata file: " + meta.getPath());
74+
}
75+
76+
Set<Integer> uniqueIds = new HashSet<>();
77+
try (CSVReader reader = new CSVReader(Readers.getReader(meta), '_'))
78+
{
79+
String[] line;
80+
while ((line = reader.readNext()) != null)
81+
{
82+
if (line.length != 2)
83+
{
84+
throw new PipelineJobException("Unexpected barcode line: " + StringUtils.join(line, "_"));
85+
}
86+
87+
try
88+
{
89+
uniqueIds.add(Integer.parseInt(line[0]));
90+
}
91+
catch (NumberFormatException e)
92+
{
93+
throw new PipelineJobException("Non-numeric barcode prefix: " + StringUtils.join(line, "_"));
94+
}
95+
}
96+
}
97+
98+
for (Integer rowId : uniqueIds)
99+
{
100+
SequenceOutputFile loupeObj = SequenceOutputFile.getForId(rowId);
101+
if (loupeObj == null)
102+
{
103+
throw new PipelineJobException("Unable to find loupe output file with ID: " + rowId);
104+
}
105+
loupeOutputs.put(rowId, loupeObj);
106+
}
56107
}
108+
else
109+
{
110+
loupeOutputs.put(so.getRowid(), so);
111+
}
112+
}
57113

58-
File molInfo = new File(so.getFile().getParentFile(), "molecule_info.h5");
114+
for (Integer rowId : loupeOutputs.keySet())
115+
{
116+
SequenceOutputFile loupeFile = loupeOutputs.get(rowId);
117+
File molInfo = new File(loupeFile.getFile().getParentFile(), "molecule_info.h5");
59118
if (!molInfo.exists())
60119
{
61120
throw new PipelineJobException("Cannot find file: " + molInfo.getPath());
62121
}
63122

64-
writer.writeNext(new String[]{String.valueOf(so.getRowid()), molInfo.getPath()});
123+
if (loupeFile.getReadset() == null)
124+
{
125+
throw new PipelineJobException("Loupe file lacks a readset: " + rowId);
126+
}
127+
128+
writer.writeNext(new String[]{String.valueOf(loupeFile.getRowid()), molInfo.getPath(), "RNA"});
129+
130+
findAdditionalData(loupeFile, writer, ctx.getJob());
65131
}
66132
}
67133
catch (IOException e)
@@ -93,14 +159,15 @@ protected List<Chunk> getChunks(SequenceOutputHandler.JobContext ctx) throws Pip
93159
while ((line = reader.readNext()) != null)
94160
{
95161
File source = new File(line[1]);
96-
File dest = new File(ctx.getWorkingDirectory(), line[0] + ".molInfo.h5");
162+
String assay = line[2];
163+
File dest = new File(ctx.getWorkingDirectory(), line[0] + "." + assay + ".molInfo.h5");
97164
if (dest.exists())
98165
{
99166
dest.delete();
100167
}
101168
FileUtils.copyFile(source, dest);
102169

103-
lines.add("\t'" + line[0] + "' = '" + dest.getName() + "',");
170+
lines.add("\t'" + line[0] + "-" + assay + "' = '" + dest.getName() + "',");
104171
ctx.getFileManager().addIntermediateFile(dest);
105172
}
106173
}
@@ -128,4 +195,65 @@ public String getFileSuffix()
128195
{
129196
return "saturation";
130197
}
198+
199+
private void findAdditionalData(SequenceOutputFile loupeFile, CSVWriter writer, PipelineJob job) throws IOException, PipelineJobException
200+
{
201+
Set<Integer> hashingReadsets = new HashSet<>();
202+
Set<Integer> citeReadsets = new HashSet<>();
203+
Container targetContainer = job.getContainer().isWorkbook() ? job.getContainer().getParent() : job.getContainer();
204+
TableSelector ts = new TableSelector(QueryService.get().getUserSchema(job.getUser(), targetContainer, SingleCellSchema.NAME).getTable(SingleCellSchema.TABLE_CDNAS), PageFlowUtil.set("hashingReadsetId", "citeseqReadsetId"), new SimpleFilter(FieldKey.fromString("readsetId"), loupeFile.getReadset()), null);
205+
ts.forEachResults(rs -> {
206+
if (rs.getObject(FieldKey.fromString("hashingReadsetId")) != null)
207+
{
208+
hashingReadsets.add(rs.getInt(FieldKey.fromString("hashingReadsetId")));
209+
}
210+
211+
if (rs.getObject(FieldKey.fromString("citeseqReadsetId")) != null)
212+
{
213+
citeReadsets.add(rs.getInt(FieldKey.fromString("citeseqReadsetId")));
214+
}
215+
});
216+
217+
if (hashingReadsets.size() > 1)
218+
{
219+
throw new PipelineJobException("More than one hashing readset associated with GEX readset: " + loupeFile.getReadset());
220+
}
221+
else if (hashingReadsets.size() == 1)
222+
{
223+
writeExtraData(loupeFile.getRowid(), hashingReadsets.iterator().next(), job, "Cell Hashing Counts", writer, "HTO");
224+
}
225+
226+
if (citeReadsets.size() > 1)
227+
{
228+
throw new PipelineJobException("More than one CITE-seq readset associated with GEX readset: " + loupeFile.getReadset());
229+
}
230+
else if (citeReadsets.size() == 1)
231+
{
232+
writeExtraData(loupeFile.getRowid(), citeReadsets.iterator().next(), job, "CITE-seq Counts", writer, "ADT");
233+
}
234+
}
235+
236+
private void writeExtraData(int datasetId, int readsetId, PipelineJob job, String category, CSVWriter writer, String assayName) throws PipelineJobException
237+
{
238+
Container targetContainer = job.getContainer().isWorkbook() ? job.getContainer().getParent() : job.getContainer();
239+
SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readsetId"), readsetId);
240+
filter.addCondition(FieldKey.fromString("category"), category);
241+
242+
List<Integer> rowIds = new TableSelector(QueryService.get().getUserSchema(job.getUser(), targetContainer, SingleCellSchema.SEQUENCE_SCHEMA_NAME).getTable("outputfiles"), PageFlowUtil.set("rowid"), filter, new Sort("-rowid")).getArrayList(Integer.class);
243+
if (!rowIds.isEmpty())
244+
{
245+
if (rowIds.size() > 1)
246+
{
247+
job.getLogger().info("More than one " + assayName + " output found for " + readsetId + ", using the most recent: " + rowIds.get(0));
248+
}
249+
250+
File molInfo = new File(SequenceOutputFile.getForId(rowIds.get(0)).getFile().getParentFile().getParentFile(), "molecule_info.h5");
251+
if (!molInfo.exists())
252+
{
253+
throw new PipelineJobException("Cannot find file: " + molInfo.getPath());
254+
}
255+
256+
writer.writeNext(new String[]{String.valueOf(datasetId), molInfo.getPath(), assayName});
257+
}
258+
}
131259
}

0 commit comments

Comments
 (0)