Skip to content

Commit 72411d0

Browse files
committed
Bugfix feature barcoding and add metrics capture
1 parent 22242b6 commit 72411d0

File tree

1 file changed

+120
-2
lines changed

1 file changed

+120
-2
lines changed

singlecell/src/org/labkey/singlecell/run/CellRangerFeatureBarcodeHandler.java

Lines changed: 120 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,20 @@
55
import org.apache.commons.io.FileUtils;
66
import org.apache.commons.lang3.tuple.Pair;
77
import org.json.JSONObject;
8+
import org.labkey.api.collections.CaseInsensitiveHashMap;
9+
import org.labkey.api.data.CompareType;
10+
import org.labkey.api.data.ConvertHelper;
11+
import org.labkey.api.data.DbSchema;
12+
import org.labkey.api.data.DbSchemaType;
13+
import org.labkey.api.data.SimpleFilter;
14+
import org.labkey.api.data.Table;
15+
import org.labkey.api.data.TableInfo;
16+
import org.labkey.api.data.TableSelector;
817
import org.labkey.api.module.ModuleLoader;
918
import org.labkey.api.pipeline.PipelineJob;
1019
import org.labkey.api.pipeline.PipelineJobException;
1120
import org.labkey.api.pipeline.RecordedAction;
21+
import org.labkey.api.query.FieldKey;
1222
import org.labkey.api.reader.Readers;
1323
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
1424
import org.labkey.api.sequenceanalysis.model.Readset;
@@ -20,6 +30,7 @@
2030
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
2131
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
2232
import org.labkey.api.util.FileUtil;
33+
import org.labkey.api.util.PageFlowUtil;
2334
import org.labkey.api.writer.PrintWriters;
2435
import org.labkey.singlecell.CellHashingServiceImpl;
2536
import org.labkey.singlecell.SingleCellModule;
@@ -31,6 +42,7 @@
3142
import java.util.Arrays;
3243
import java.util.Date;
3344
import java.util.List;
45+
import java.util.Map;
3446

3547
public class CellRangerFeatureBarcodeHandler extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceReadsetProcessor>
3648
{
@@ -164,8 +176,8 @@ else if (rs.getApplication().equals("CITE-Seq"))
164176
List<Pair<File, File>> inputFastqs = new ArrayList<>();
165177
rs.getReadData().forEach(rd -> {
166178
inputFastqs.add(Pair.of(rd.getFile1(), rd.getFile2()));
167-
output.addIntermediateFile(rd.getFile1(), "Input FASTQ");
168-
output.addIntermediateFile(rd.getFile2(), "Input FASTQ");
179+
action.addInputIfNotPresent(rd.getFile1(), "Input FASTQ");
180+
action.addInputIfNotPresent(rd.getFile2(), "Input FASTQ");
169181
});
170182

171183
List<String> args = wrapper.prepareCountArgs(output, id, ctx.getOutputDir(), rs, inputFastqs, extraArgs, false);
@@ -366,6 +378,112 @@ private File createFeatureRefForHashing(File outputDir, File hashingWhitelist) t
366378
return featuresCsv;
367379
}
368380

381+
@Override
382+
public void complete(PipelineJob job, List<Readset> readsets, List<SequenceOutputFile> outputsCreated) throws PipelineJobException
383+
{
384+
if (outputsCreated.isEmpty())
385+
{
386+
job.getLogger().error("Expected outputs to be created");
387+
return;
388+
}
389+
390+
SequenceOutputFile so = null;
391+
for (SequenceOutputFile o : outputsCreated)
392+
{
393+
if ("10x Run Summary".equals(o.getCategory()))
394+
{
395+
continue;
396+
}
397+
398+
so = o;
399+
break;
400+
}
401+
402+
if (so == null)
403+
{
404+
throw new PipelineJobException("Unable to find count matrix as output");
405+
}
406+
407+
Readset rs = readsets.get(0);
408+
409+
File metrics = new File(so.getFile().getParentFile(), "metrics_summary.csv");
410+
if (metrics.exists())
411+
{
412+
job.getLogger().debug("adding 10x metrics");
413+
try (CSVReader reader = new CSVReader(Readers.getReader(metrics)))
414+
{
415+
String[] line;
416+
String[] header = null;
417+
String[] metricValues = null;
369418

419+
int i = 0;
420+
while ((line = reader.readNext()) != null)
421+
{
422+
if (i == 0)
423+
{
424+
header = line;
425+
}
426+
else
427+
{
428+
metricValues = line;
429+
break;
430+
}
431+
432+
i++;
433+
}
434+
435+
TableInfo ti = DbSchema.get("sequenceanalysis", DbSchemaType.Module).getTable("quality_metrics");
436+
437+
//NOTE: if this job errored and restarted, we may have duplicate records:
438+
SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), so.getReadset());
439+
filter.addCondition(FieldKey.fromString("dataid"), so.getDataId(), CompareType.EQUAL);
440+
filter.addCondition(FieldKey.fromString("category"), rs.getApplication(), CompareType.EQUAL);
441+
filter.addCondition(FieldKey.fromString("container"), job.getContainer().getId(), CompareType.EQUAL);
442+
TableSelector ts = new TableSelector(ti, PageFlowUtil.set("rowid"), filter, null);
443+
if (ts.exists())
444+
{
445+
job.getLogger().info("Deleting existing QC metrics (probably from prior restarted job)");
446+
ts.getArrayList(Integer.class).forEach(rowid -> {
447+
Table.delete(ti, rowid);
448+
});
449+
}
450+
451+
for (int j = 0; j < header.length; j++)
452+
{
453+
Map<String, Object> toInsert = new CaseInsensitiveHashMap<>();
454+
toInsert.put("container", job.getContainer().getId());
455+
toInsert.put("createdby", job.getUser().getUserId());
456+
toInsert.put("created", new Date());
457+
toInsert.put("readset", rs.getReadsetId());
458+
toInsert.put("dataid", so.getDataId());
459+
460+
toInsert.put("category", "Cell Ranger");
461+
toInsert.put("metricname", header[j]);
462+
463+
metricValues[j] = metricValues[j].replaceAll(",", "");
464+
Object val = metricValues[j];
465+
if (metricValues[j].contains("%"))
466+
{
467+
metricValues[j] = metricValues[j].replaceAll("%", "");
468+
Double d = ConvertHelper.convert(metricValues[j], Double.class);
469+
d = d / 100.0;
470+
val = d;
471+
}
472+
473+
toInsert.put("metricvalue", val);
474+
475+
Table.insert(job.getUser(), ti, toInsert);
476+
}
477+
}
478+
catch (IOException e)
479+
{
480+
throw new PipelineJobException(e);
481+
}
482+
}
483+
else
484+
{
485+
job.getLogger().warn("unable to find metrics file: " + metrics.getPath());
486+
}
487+
}
370488
}
371489
}

0 commit comments

Comments
 (0)