Skip to content

Commit 582bb55

Browse files
committed
Improve CellRangerVDJ metrics
1 parent 55bf920 commit 582bb55

File tree

1 file changed

+83
-74
lines changed

1 file changed

+83
-74
lines changed

singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java

Lines changed: 83 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -634,106 +634,115 @@ public void deleteSymlinks(File localFqDir) throws PipelineJobException
634634
}
635635
}
636636

637-
public void addMetrics(AnalysisModel model) throws PipelineJobException
637+
public void addMetrics(File outDir, AnalysisModel model) throws PipelineJobException
638638
{
639639
getPipelineCtx().getLogger().debug("adding 10x metrics");
640640

641-
// TODO: improve
642-
//File outputHtml = new File(outdir, "per_sample_outs/" + id + "/web_summary.html");
643-
File metrics = new File(model.getAlignmentFileObject().getParentFile(), "metrics_summary.csv");
644-
if (metrics.exists())
641+
File metrics = new File(outDir, "metrics_summary.csv");
642+
if (!metrics.exists())
645643
{
646-
try (CSVReader reader = new CSVReader(Readers.getReader(metrics)))
647-
{
648-
String[] line;
649-
String[] header = null;
650-
String[] metricValues = null;
644+
throw new PipelineJobException("Unable to find file: " + metrics.getPath());
645+
}
651646

652-
int i = 0;
653-
while ((line = reader.readNext()) != null)
654-
{
655-
if (i == 0)
656-
{
657-
header = line;
658-
}
659-
else
660-
{
661-
metricValues = line;
662-
break;
663-
}
647+
if (model.getAlignmentFile() == null)
648+
{
649+
throw new PipelineJobException("model.getAlignmentFile() was null");
650+
}
664651

665-
i++;
666-
}
652+
try (CSVReader reader = new CSVReader(Readers.getReader(metrics)))
653+
{
654+
String[] line;
655+
List<String[]> metricValues = new ArrayList<>();
667656

668-
if (model.getAlignmentFile() == null)
657+
int i = 0;
658+
while ((line = reader.readNext()) != null)
659+
{
660+
i++;
661+
if (i == 1)
669662
{
670-
throw new PipelineJobException("model.getAlignmentFile() was null");
663+
continue;
671664
}
672665

673-
int totalAdded = 0;
674-
TableInfo ti = DbSchema.get("sequenceanalysis", DbSchemaType.Module).getTable("quality_metrics");
675-
676-
//NOTE: if this job errored and restarted, we may have duplicate records:
677-
SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), model.getReadset());
678-
filter.addCondition(FieldKey.fromString("analysis_id"), model.getRowId(), CompareType.EQUAL);
679-
filter.addCondition(FieldKey.fromString("dataid"), model.getAlignmentFile(), CompareType.EQUAL);
680-
filter.addCondition(FieldKey.fromString("category"), "Cell Ranger VDJ", CompareType.EQUAL);
681-
filter.addCondition(FieldKey.fromString("container"), getPipelineCtx().getJob().getContainer().getId(), CompareType.EQUAL);
682-
TableSelector ts = new TableSelector(ti, PageFlowUtil.set("rowid"), filter, null);
683-
if (ts.exists())
666+
metricValues.add(line);
667+
}
668+
669+
int totalAdded = 0;
670+
TableInfo ti = DbSchema.get("sequenceanalysis", DbSchemaType.Module).getTable("quality_metrics");
671+
672+
//NOTE: if this job errored and restarted, we may have duplicate records:
673+
SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), model.getReadset());
674+
filter.addCondition(FieldKey.fromString("analysis_id"), model.getRowId(), CompareType.EQUAL);
675+
filter.addCondition(FieldKey.fromString("dataid"), model.getAlignmentFile(), CompareType.EQUAL);
676+
filter.addCondition(FieldKey.fromString("category"), "Cell Ranger VDJ", CompareType.EQUAL);
677+
filter.addCondition(FieldKey.fromString("container"), getPipelineCtx().getJob().getContainer().getId(), CompareType.EQUAL);
678+
TableSelector ts = new TableSelector(ti, PageFlowUtil.set("rowid"), filter, null);
679+
if (ts.exists())
680+
{
681+
getPipelineCtx().getLogger().info("Deleting existing QC metrics (probably from prior restarted job)");
682+
ts.getArrayList(Integer.class).forEach(rowid -> {
683+
Table.delete(ti, rowid);
684+
});
685+
}
686+
687+
for (String[] row : metricValues)
688+
{
689+
if ("Fastq ID".equals(row[2]) || "Physical library ID".equals(row[2]))
684690
{
685-
getPipelineCtx().getLogger().info("Deleting existing QC metrics (probably from prior restarted job)");
686-
ts.getArrayList(Integer.class).forEach(rowid -> {
687-
Table.delete(ti, rowid);
688-
});
691+
continue;
689692
}
690693

691-
for (int j = 0; j < header.length; j++)
692-
{
693-
Map<String, Object> toInsert = new CaseInsensitiveHashMap<>();
694-
toInsert.put("container", getPipelineCtx().getJob().getContainer().getId());
695-
toInsert.put("createdby", getPipelineCtx().getJob().getUser().getUserId());
696-
toInsert.put("created", new Date());
697-
toInsert.put("readset", model.getReadset());
698-
toInsert.put("analysis_id", model.getRowId());
699-
toInsert.put("dataid", model.getAlignmentFile());
700-
701-
toInsert.put("category", "Cell Ranger VDJ");
702-
toInsert.put("metricname", header[j]);
703-
704-
metricValues[j] = metricValues[j].replaceAll(",", "");
705-
Object val = metricValues[j];
706-
if (metricValues[j].contains("%"))
707-
{
708-
metricValues[j] = metricValues[j].replaceAll("%", "");
709-
Double d = ConvertHelper.convert(metricValues[j], Double.class);
710-
d = d / 100.0;
711-
val = d;
712-
}
694+
Map<String, Object> toInsert = new CaseInsensitiveHashMap<>();
695+
toInsert.put("container", getPipelineCtx().getJob().getContainer().getId());
696+
toInsert.put("createdby", getPipelineCtx().getJob().getUser().getUserId());
697+
toInsert.put("created", new Date());
698+
toInsert.put("readset", model.getReadset());
699+
toInsert.put("analysis_id", model.getRowId());
700+
toInsert.put("dataid", model.getAlignmentFile());
713701

714-
toInsert.put("metricvalue", val);
702+
toInsert.put("category", "Cell Ranger VDJ");
715703

716-
Table.insert(getPipelineCtx().getJob().getUser(), ti, toInsert);
717-
totalAdded++;
704+
String mn = row[4];
705+
if (Arrays.asList("Cells with productive V-J spanning pair", "Estimated number of cells", "Number of cells with productive V-J spanning pair", "Paired clonotype diversity").contains(mn))
706+
{
707+
mn = ("VDJ T GD".equals(row[1]) ? "Gamma/Delta" : "Alpha/Beta") + ": " + mn;
718708
}
709+
toInsert.put("metricname", mn);
719710

720-
getPipelineCtx().getLogger().info("total metrics added: " + totalAdded);
721-
}
722-
catch (IOException e)
723-
{
724-
throw new PipelineJobException(e);
711+
row[5] = row[5].replaceAll(",", ""); //remove commas
712+
Object val = row[5];
713+
if (row[5].contains("%"))
714+
{
715+
row[5] = row[5].replaceAll("%", "");
716+
Double d = ConvertHelper.convert(row[5], Double.class);
717+
d = d / 100.0;
718+
val = d;
719+
}
720+
721+
toInsert.put("metricvalue", val);
722+
723+
Table.insert(getPipelineCtx().getJob().getUser(), ti, toInsert);
724+
totalAdded++;
725725
}
726+
727+
getPipelineCtx().getLogger().info("total metrics added: " + totalAdded);
726728
}
727-
else
729+
catch (IOException e)
728730
{
729-
getPipelineCtx().getLogger().warn("unable to find metrics file: " + metrics.getPath());
731+
throw new PipelineJobException(e);
730732
}
731733
}
732734

733735
@Override
734736
public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Collection<SequenceOutputFile> outputFilesCreated) throws PipelineJobException
735737
{
736-
addMetrics(model);
738+
if (outputFilesCreated == null || outputFilesCreated.isEmpty())
739+
{
740+
throw new PipelineJobException("Expected sequence outputs to be created");
741+
}
742+
743+
File html = outputFilesCreated.stream().filter(x -> "10x Run Summary".equals(x.getCategory())).findFirst().orElseThrow().getFile();
744+
745+
addMetrics(html.getParentFile(), model);
737746

738747
File bam = model.getAlignmentData().getFile();
739748
if (!bam.exists())

0 commit comments

Comments
 (0)