|
1 | 1 | package org.labkey.singlecell.pipeline.singlecell; |
2 | 2 |
|
| 3 | +import org.apache.commons.io.FileUtils; |
3 | 4 | import org.json.JSONObject; |
4 | 5 | import org.labkey.api.pipeline.PipelineJobException; |
5 | 6 | import org.labkey.api.sequenceanalysis.SequenceOutputFile; |
|
13 | 14 | import org.labkey.api.singlecell.pipeline.SingleCellOutput; |
14 | 15 | import org.labkey.api.singlecell.pipeline.SingleCellStep; |
15 | 16 | import org.labkey.api.util.PageFlowUtil; |
| 17 | +import org.labkey.singlecell.CellHashingServiceImpl; |
16 | 18 | import org.labkey.singlecell.analysis.CellRangerSeuratHandler; |
17 | 19 | import org.labkey.singlecell.analysis.SeuratCellHashingHandler; |
18 | 20 | import org.labkey.singlecell.analysis.SeuratCiteSeqHandler; |
19 | 21 |
|
20 | 22 | import java.io.File; |
| 23 | +import java.io.IOException; |
21 | 24 | import java.util.ArrayList; |
22 | 25 | import java.util.Collection; |
23 | 26 | import java.util.Collections; |
@@ -106,22 +109,75 @@ protected Map<Integer, File> prepareCountData(SingleCellOutput output, SequenceO |
106 | 109 | params.outputCategory = SeuratCiteSeqHandler.CATEGORY; |
107 | 110 | params.createOutputFiles = true; |
108 | 111 | params.genomeId = wrapper.getSequenceOutputFile().getLibrary_id(); |
109 | | - params.cellBarcodeWhitelistFile = cellBarcodesParsed; |
| 112 | + //params.cellBarcodeWhitelistFile = cellBarcodesParsed; |
110 | 113 | params.cells = 250000; |
111 | 114 |
|
112 | 115 | finalOutput = CellHashingService.get().processCellHashingOrCiteSeqForParent(parentReadset, output, ctx, params); |
| 116 | + |
| 117 | + File validAdt = CellHashingServiceImpl.get().getValidCiteSeqBarcodeMetadataFile(ctx.getSourceDirectory(), parentReadset.getReadsetId()); |
| 118 | + if (!validAdt.exists()) |
| 119 | + { |
| 120 | + throw new PipelineJobException("Unable to find ADT metadata. expected: " + validAdt.getPath()); |
| 121 | + } |
| 122 | + |
| 123 | + try |
| 124 | + { |
| 125 | + FileUtils.copyFile(validAdt, getAdtMetadata(finalOutput)); |
| 126 | + } |
| 127 | + catch (IOException e) |
| 128 | + { |
| 129 | + throw new PipelineJobException(e); |
| 130 | + } |
113 | 131 | } |
114 | 132 | else |
115 | 133 | { |
116 | 134 | ctx.getLogger().info("CITE-seq not used, skipping: " + parentReadset.getName()); |
117 | 135 | } |
118 | 136 |
|
119 | | - dataIdToCalls.put(wrapper.getSequenceOutputFileId(), finalOutput); |
| 137 | + dataIdToCalls.put(wrapper.getSequenceOutputFileId(), finalOutput.getParentFile()); |
120 | 138 | } |
121 | 139 |
|
122 | 140 | return dataIdToCalls; |
123 | 141 | } |
124 | 142 |
|
| 143 | + public File getAdtMetadata(File countMatrix) |
| 144 | + { |
| 145 | + return new File(countMatrix.getParentFile(), "adtMetadata.txt"); |
| 146 | + } |
| 147 | + |
| 148 | + @Override |
| 149 | + protected Chunk createDataChunk(Map<Integer, File> hashingData) |
| 150 | + { |
| 151 | + Chunk ret = super.createDataChunk(hashingData); |
| 152 | + |
| 153 | + List<String> lines = new ArrayList<>(); |
| 154 | + |
| 155 | + lines.add("featureMetadataFiles <- list("); |
| 156 | + for (Integer key : hashingData.keySet()) |
| 157 | + { |
| 158 | + if (hashingData.get(key) == null) |
| 159 | + { |
| 160 | + lines.add("\t'" + key + "' = NULL,"); |
| 161 | + } |
| 162 | + else |
| 163 | + { |
| 164 | + File meta = getAdtMetadata(hashingData.get(key)); |
| 165 | + lines.add("\t'" + key + "' = '" + meta.getName() + "',"); |
| 166 | + } |
| 167 | + } |
| 168 | + |
| 169 | + // Remove trailing comma: |
| 170 | + int lastIdx = lines.size() - 1; |
| 171 | + lines.set(lastIdx, lines.get(lastIdx).replaceAll(",$", "")); |
| 172 | + |
| 173 | + lines.add(")"); |
| 174 | + lines.add(""); |
| 175 | + |
| 176 | + ret.bodyLines.addAll(lines); |
| 177 | + |
| 178 | + return ret; |
| 179 | + } |
| 180 | + |
125 | 181 | @Override |
126 | 182 | public boolean isIncluded(SequenceOutputHandler.JobContext ctx, List<SequenceOutputFile> inputs) throws PipelineJobException |
127 | 183 | { |
|
0 commit comments