Skip to content

Commit f558da8

Browse files
committed
Improve CellRanger QC
1 parent 582bb55 commit f558da8

File tree

2 files changed

+156
-65
lines changed

2 files changed

+156
-65
lines changed

singlecell/src/org/labkey/singlecell/run/CellRangerFeatureBarcodeHandler.java

Lines changed: 62 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -482,82 +482,80 @@ public void complete(PipelineJob job, List<Readset> readsets, List<SequenceOutpu
482482
Readset rs = readsets.get(0);
483483

484484
File metrics = new File(so.getFile().getParentFile().getParentFile(), "metrics_summary.csv");
485-
if (metrics.exists())
485+
if (!metrics.exists())
486486
{
487-
job.getLogger().debug("adding 10x metrics");
488-
try (CSVReader reader = new CSVReader(Readers.getReader(metrics)))
489-
{
490-
String[] line;
491-
String[] header = null;
492-
String[] metricValues = null;
493-
494-
int i = 0;
495-
while ((line = reader.readNext()) != null)
496-
{
497-
if (i == 0)
498-
{
499-
header = line;
500-
}
501-
else
502-
{
503-
metricValues = line;
504-
break;
505-
}
506-
507-
i++;
508-
}
487+
throw new PipelineJobException("unable to find metrics file: " + metrics.getPath());
488+
}
509489

510-
TableInfo ti = DbSchema.get("sequenceanalysis", DbSchemaType.Module).getTable("quality_metrics");
490+
job.getLogger().debug("adding 10x metrics");
491+
try (CSVReader reader = new CSVReader(Readers.getReader(metrics)))
492+
{
493+
String[] line;
494+
String[] header = null;
495+
String[] metricValues = null;
511496

512-
//NOTE: if this job errored and restarted, we may have duplicate records:
513-
SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), so.getReadset());
514-
filter.addCondition(FieldKey.fromString("dataid"), so.getDataId(), CompareType.EQUAL);
515-
filter.addCondition(FieldKey.fromString("category"), rs.getApplication(), CompareType.EQUAL);
516-
filter.addCondition(FieldKey.fromString("container"), job.getContainer().getId(), CompareType.EQUAL);
517-
TableSelector ts = new TableSelector(ti, PageFlowUtil.set("rowid"), filter, null);
518-
if (ts.exists())
497+
int i = 0;
498+
while ((line = reader.readNext()) != null)
499+
{
500+
if (i == 0)
519501
{
520-
job.getLogger().info("Deleting existing QC metrics (probably from prior restarted job)");
521-
ts.getArrayList(Integer.class).forEach(rowid -> {
522-
Table.delete(ti, rowid);
523-
});
502+
header = line;
524503
}
525-
526-
for (int j = 0; j < header.length; j++)
504+
else
527505
{
528-
Map<String, Object> toInsert = new CaseInsensitiveHashMap<>();
529-
toInsert.put("container", job.getContainer().getId());
530-
toInsert.put("createdby", job.getUser().getUserId());
531-
toInsert.put("created", new Date());
532-
toInsert.put("readset", rs.getReadsetId());
533-
toInsert.put("dataid", so.getDataId());
534-
535-
toInsert.put("category", "Cell Ranger");
536-
toInsert.put("metricname", header[j]);
537-
538-
metricValues[j] = metricValues[j].replaceAll(",", "");
539-
Object val = metricValues[j];
540-
if (metricValues[j].contains("%"))
541-
{
542-
metricValues[j] = metricValues[j].replaceAll("%", "");
543-
Double d = ConvertHelper.convert(metricValues[j], Double.class);
544-
d = d / 100.0;
545-
val = d;
546-
}
547-
548-
toInsert.put("metricvalue", val);
549-
550-
Table.insert(job.getUser(), ti, toInsert);
506+
metricValues = line;
507+
break;
551508
}
509+
510+
i++;
552511
}
553-
catch (IOException e)
512+
513+
TableInfo ti = DbSchema.get("sequenceanalysis", DbSchemaType.Module).getTable("quality_metrics");
514+
515+
//NOTE: if this job errored and restarted, we may have duplicate records:
516+
SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), so.getReadset());
517+
filter.addCondition(FieldKey.fromString("dataid"), so.getDataId(), CompareType.EQUAL);
518+
filter.addCondition(FieldKey.fromString("category"), rs.getApplication(), CompareType.EQUAL);
519+
filter.addCondition(FieldKey.fromString("container"), job.getContainer().getId(), CompareType.EQUAL);
520+
TableSelector ts = new TableSelector(ti, PageFlowUtil.set("rowid"), filter, null);
521+
if (ts.exists())
554522
{
555-
throw new PipelineJobException(e);
523+
job.getLogger().info("Deleting existing QC metrics (probably from prior restarted job)");
524+
ts.getArrayList(Integer.class).forEach(rowid -> {
525+
Table.delete(ti, rowid);
526+
});
527+
}
528+
529+
for (int j = 0; j < header.length; j++)
530+
{
531+
Map<String, Object> toInsert = new CaseInsensitiveHashMap<>();
532+
toInsert.put("container", job.getContainer().getId());
533+
toInsert.put("createdby", job.getUser().getUserId());
534+
toInsert.put("created", new Date());
535+
toInsert.put("readset", rs.getReadsetId());
536+
toInsert.put("dataid", so.getDataId());
537+
538+
toInsert.put("category", "Cell Ranger");
539+
toInsert.put("metricname", header[j]);
540+
541+
metricValues[j] = metricValues[j].replaceAll(",", "");
542+
Object val = metricValues[j];
543+
if (metricValues[j].contains("%"))
544+
{
545+
metricValues[j] = metricValues[j].replaceAll("%", "");
546+
Double d = ConvertHelper.convert(metricValues[j], Double.class);
547+
d = d / 100.0;
548+
val = d;
549+
}
550+
551+
toInsert.put("metricvalue", val);
552+
553+
Table.insert(job.getUser(), ti, toInsert);
556554
}
557555
}
558-
else
556+
catch (IOException e)
559557
{
560-
job.getLogger().warn("unable to find metrics file: " + metrics.getPath());
558+
throw new PipelineJobException(e);
561559
}
562560
}
563561
}

singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Co
536536
}
537537
else
538538
{
539-
getPipelineCtx().getLogger().warn("unable to find metrics file: " + metrics.getPath());
539+
throw new PipelineJobException("unable to find metrics file: " + metrics.getPath());
540540
}
541541

542542
TableInfo cDNA = SingleCellSchema.getInstance().getSchema().getTable(SingleCellSchema.TABLE_CDNAS);
@@ -571,4 +571,97 @@ public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Co
571571
}
572572
}
573573
}
574+
575+
private void addMetrics(File outDir, AnalysisModel model) throws PipelineJobException
576+
{
577+
getPipelineCtx().getLogger().debug("adding 10x metrics");
578+
579+
File metrics = new File(outDir, "metrics_summary.csv");
580+
if (!metrics.exists())
581+
{
582+
throw new PipelineJobException("Unable to find file: " + metrics.getPath());
583+
}
584+
585+
if (model.getAlignmentFile() == null)
586+
{
587+
throw new PipelineJobException("model.getAlignmentFile() was null");
588+
}
589+
590+
try (CSVReader reader = new CSVReader(Readers.getReader(metrics)))
591+
{
592+
String[] line;
593+
List<String[]> metricValues = new ArrayList<>();
594+
595+
int i = 0;
596+
while ((line = reader.readNext()) != null)
597+
{
598+
i++;
599+
if (i == 1)
600+
{
601+
continue;
602+
}
603+
604+
metricValues.add(line);
605+
}
606+
607+
int totalAdded = 0;
608+
TableInfo ti = DbSchema.get("sequenceanalysis", DbSchemaType.Module).getTable("quality_metrics");
609+
610+
//NOTE: if this job errored and restarted, we may have duplicate records:
611+
SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), model.getReadset());
612+
filter.addCondition(FieldKey.fromString("analysis_id"), model.getRowId(), CompareType.EQUAL);
613+
filter.addCondition(FieldKey.fromString("dataid"), model.getAlignmentFile(), CompareType.EQUAL);
614+
filter.addCondition(FieldKey.fromString("category"), "Cell Ranger VDJ", CompareType.EQUAL);
615+
filter.addCondition(FieldKey.fromString("container"), getPipelineCtx().getJob().getContainer().getId(), CompareType.EQUAL);
616+
TableSelector ts = new TableSelector(ti, PageFlowUtil.set("rowid"), filter, null);
617+
if (ts.exists())
618+
{
619+
getPipelineCtx().getLogger().info("Deleting existing QC metrics (probably from prior restarted job)");
620+
ts.getArrayList(Integer.class).forEach(rowid -> {
621+
Table.delete(ti, rowid);
622+
});
623+
}
624+
625+
for (String[] row : metricValues)
626+
{
627+
//TODO
628+
if ("Fastq ID".equals(row[2]) || "Physical library ID".equals(row[2]))
629+
{
630+
continue;
631+
}
632+
633+
Map<String, Object> toInsert = new CaseInsensitiveHashMap<>();
634+
toInsert.put("container", getPipelineCtx().getJob().getContainer().getId());
635+
toInsert.put("createdby", getPipelineCtx().getJob().getUser().getUserId());
636+
toInsert.put("created", new Date());
637+
toInsert.put("readset", model.getReadset());
638+
toInsert.put("analysis_id", model.getRowId());
639+
toInsert.put("dataid", model.getAlignmentFile());
640+
641+
toInsert.put("category", "Cell Ranger");
642+
toInsert.put("metricname", row[4]);
643+
644+
row[5] = row[5].replaceAll(",", ""); //remove commas
645+
Object val = row[5];
646+
if (row[5].contains("%"))
647+
{
648+
row[5] = row[5].replaceAll("%", "");
649+
Double d = ConvertHelper.convert(row[5], Double.class);
650+
d = d / 100.0;
651+
val = d;
652+
}
653+
654+
toInsert.put("metricvalue", val);
655+
656+
Table.insert(getPipelineCtx().getJob().getUser(), ti, toInsert);
657+
totalAdded++;
658+
}
659+
660+
getPipelineCtx().getLogger().info("total metrics added: " + totalAdded);
661+
}
662+
catch (IOException e)
663+
{
664+
throw new PipelineJobException(e);
665+
}
666+
}
574667
}

0 commit comments

Comments
 (0)