Skip to content

Commit d11cec0

Browse files
committed
Update CellRanger metrics for BAM-less jobs
1 parent beb1bac commit d11cec0

File tree

2 files changed

+25
-117
lines changed

2 files changed

+25
-117
lines changed

singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java

Lines changed: 12 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,16 @@ public boolean alwaysCopyIndexToWorkingDir()
475475
@Override
476476
public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Collection<SequenceOutputFile> outputFilesCreated) throws PipelineJobException
477477
{
478-
File metrics = new File(model.getAlignmentFileObject().getParentFile(), "metrics_summary.csv");
478+
SequenceOutputFile outputForData = outputFilesCreated.stream().filter(x -> LOUPE_CATEGORY.equals(x.getCategory())).findFirst().orElse(null);
479+
if (outputForData == null)
480+
{
481+
outputForData = outputFilesCreated.stream().filter(x -> "10x Run Summary".equals(x.getCategory())).findFirst().orElseThrow();
482+
}
483+
484+
File outsDir = outputForData.getFile().getParentFile();
485+
Integer dataId = outputForData.getDataId();
486+
487+
File metrics = new File(outsDir, "metrics_summary.csv");
479488
if (metrics.exists())
480489
{
481490
getPipelineCtx().getLogger().debug("adding 10x metrics");
@@ -501,17 +510,12 @@ public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Co
501510
i++;
502511
}
503512

504-
if (model.getAlignmentFile() == null)
505-
{
506-
throw new PipelineJobException("model.getAlignmentFile() was null");
507-
}
508-
509513
TableInfo ti = DbSchema.get("sequenceanalysis", DbSchemaType.Module).getTable("quality_metrics");
510514

511515
//NOTE: if this job errored and restarted, we may have duplicate records:
512516
SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), model.getReadset());
513517
filter.addCondition(FieldKey.fromString("analysis_id"), model.getRowId(), CompareType.EQUAL);
514-
filter.addCondition(FieldKey.fromString("dataid"), model.getAlignmentFile(), CompareType.EQUAL);
518+
filter.addCondition(FieldKey.fromString("dataid"), dataId, CompareType.EQUAL);
515519
filter.addCondition(FieldKey.fromString("category"), "Cell Ranger", CompareType.EQUAL);
516520
filter.addCondition(FieldKey.fromString("container"), getPipelineCtx().getJob().getContainer().getId(), CompareType.EQUAL);
517521
TableSelector ts = new TableSelector(ti, PageFlowUtil.set("rowid"), filter, null);
@@ -531,7 +535,7 @@ public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Co
531535
toInsert.put("created", new Date());
532536
toInsert.put("readset", model.getReadset());
533537
toInsert.put("analysis_id", model.getRowId());
534-
toInsert.put("dataid", model.getAlignmentFile());
538+
toInsert.put("dataid", dataId);
535539

536540
toInsert.put("category", "Cell Ranger");
537541
toInsert.put("metricname", header[j]);
@@ -593,97 +597,4 @@ public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Co
593597
}
594598
}
595599
}
596-
597-
private void addMetrics(File outDir, AnalysisModel model) throws PipelineJobException
598-
{
599-
getPipelineCtx().getLogger().debug("adding 10x metrics");
600-
601-
File metrics = new File(outDir, "metrics_summary.csv");
602-
if (!metrics.exists())
603-
{
604-
throw new PipelineJobException("Unable to find file: " + metrics.getPath());
605-
}
606-
607-
if (model.getAlignmentFile() == null)
608-
{
609-
throw new PipelineJobException("model.getAlignmentFile() was null");
610-
}
611-
612-
try (CSVReader reader = new CSVReader(Readers.getReader(metrics)))
613-
{
614-
String[] line;
615-
List<String[]> metricValues = new ArrayList<>();
616-
617-
int i = 0;
618-
while ((line = reader.readNext()) != null)
619-
{
620-
i++;
621-
if (i == 1)
622-
{
623-
continue;
624-
}
625-
626-
metricValues.add(line);
627-
}
628-
629-
int totalAdded = 0;
630-
TableInfo ti = DbSchema.get("sequenceanalysis", DbSchemaType.Module).getTable("quality_metrics");
631-
632-
//NOTE: if this job errored and restarted, we may have duplicate records:
633-
SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), model.getReadset());
634-
filter.addCondition(FieldKey.fromString("analysis_id"), model.getRowId(), CompareType.EQUAL);
635-
filter.addCondition(FieldKey.fromString("dataid"), model.getAlignmentFile(), CompareType.EQUAL);
636-
filter.addCondition(FieldKey.fromString("category"), "Cell Ranger VDJ", CompareType.EQUAL);
637-
filter.addCondition(FieldKey.fromString("container"), getPipelineCtx().getJob().getContainer().getId(), CompareType.EQUAL);
638-
TableSelector ts = new TableSelector(ti, PageFlowUtil.set("rowid"), filter, null);
639-
if (ts.exists())
640-
{
641-
getPipelineCtx().getLogger().info("Deleting existing QC metrics (probably from prior restarted job)");
642-
ts.getArrayList(Integer.class).forEach(rowid -> {
643-
Table.delete(ti, rowid);
644-
});
645-
}
646-
647-
for (String[] row : metricValues)
648-
{
649-
//TODO
650-
if ("Fastq ID".equals(row[2]) || "Physical library ID".equals(row[2]))
651-
{
652-
continue;
653-
}
654-
655-
Map<String, Object> toInsert = new CaseInsensitiveHashMap<>();
656-
toInsert.put("container", getPipelineCtx().getJob().getContainer().getId());
657-
toInsert.put("createdby", getPipelineCtx().getJob().getUser().getUserId());
658-
toInsert.put("created", new Date());
659-
toInsert.put("readset", model.getReadset());
660-
toInsert.put("analysis_id", model.getRowId());
661-
toInsert.put("dataid", model.getAlignmentFile());
662-
663-
toInsert.put("category", "Cell Ranger");
664-
toInsert.put("metricname", row[4]);
665-
666-
row[5] = row[5].replaceAll(",", ""); //remove commas
667-
Object val = row[5];
668-
if (row[5].contains("%"))
669-
{
670-
row[5] = row[5].replaceAll("%", "");
671-
Double d = ConvertHelper.convert(row[5], Double.class);
672-
d = d / 100.0;
673-
val = d;
674-
}
675-
676-
toInsert.put("metricvalue", val);
677-
678-
Table.insert(getPipelineCtx().getJob().getUser(), ti, toInsert);
679-
totalAdded++;
680-
}
681-
682-
getPipelineCtx().getLogger().info("total metrics added: " + totalAdded);
683-
}
684-
catch (IOException e)
685-
{
686-
throw new PipelineJobException(e);
687-
}
688-
}
689600
}

singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ public CellRangerVDJWrapper(@Nullable Logger logger)
7070
}
7171

7272
public static final String INNER_ENRICHMENT_PRIMERS = "innerEnrichmentPrimers";
73+
public static final String VLOUPE_CATEGORY = "10x VLoupe";
7374

7475
public static class VDJProvider extends AbstractAlignmentStepProvider<AlignmentStep>
7576
{
@@ -527,7 +528,7 @@ private File processOutputsForType(String sampleId, Readset rs, ReferenceGenome
527528
else if (isPrimaryDir)
528529
{
529530
String versionString = "Version: " + getWrapper().getVersionString();
530-
output.addSequenceOutput(outputVloupe, rs.getName() + " 10x VLoupe", "10x VLoupe", rs.getRowId(), null, referenceGenome.getGenomeId(), versionString);
531+
output.addSequenceOutput(outputVloupe, rs.getName() + " 10x VLoupe", VLOUPE_CATEGORY, rs.getRowId(), null, referenceGenome.getGenomeId(), versionString);
531532
}
532533

533534
output.addIntermediateFile(new File(sampleDir, "airr_rearrangement.tsv"));
@@ -678,7 +679,7 @@ public void deleteSymlinks(File localFqDir) throws PipelineJobException
678679
}
679680
}
680681

681-
public void addMetrics(File outDir, AnalysisModel model) throws PipelineJobException
682+
public void addMetrics(File outDir, AnalysisModel model, int dataId) throws PipelineJobException
682683
{
683684
getPipelineCtx().getLogger().debug("adding 10x metrics");
684685

@@ -688,11 +689,6 @@ public void addMetrics(File outDir, AnalysisModel model) throws PipelineJobExcep
688689
throw new PipelineJobException("Unable to find file: " + metrics.getPath());
689690
}
690691

691-
if (model.getAlignmentFile() == null)
692-
{
693-
throw new PipelineJobException("model.getAlignmentFile() was null");
694-
}
695-
696692
try (CSVReader reader = new CSVReader(Readers.getReader(metrics)))
697693
{
698694
String[] line;
@@ -716,7 +712,7 @@ public void addMetrics(File outDir, AnalysisModel model) throws PipelineJobExcep
716712
//NOTE: if this job errored and restarted, we may have duplicate records:
717713
SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), model.getReadset());
718714
filter.addCondition(FieldKey.fromString("analysis_id"), model.getRowId(), CompareType.EQUAL);
719-
filter.addCondition(FieldKey.fromString("dataid"), model.getAlignmentFile(), CompareType.EQUAL);
715+
filter.addCondition(FieldKey.fromString("dataid"), dataId, CompareType.EQUAL);
720716
filter.addCondition(FieldKey.fromString("category"), "Cell Ranger VDJ", CompareType.EQUAL);
721717
filter.addCondition(FieldKey.fromString("container"), getPipelineCtx().getJob().getContainer().getId(), CompareType.EQUAL);
722718
TableSelector ts = new TableSelector(ti, PageFlowUtil.set("rowid"), filter, null);
@@ -741,7 +737,7 @@ public void addMetrics(File outDir, AnalysisModel model) throws PipelineJobExcep
741737
toInsert.put("created", new Date());
742738
toInsert.put("readset", model.getReadset());
743739
toInsert.put("analysis_id", model.getRowId());
744-
toInsert.put("dataid", model.getAlignmentFile());
740+
toInsert.put("dataid", dataId);
745741

746742
toInsert.put("category", "Cell Ranger VDJ");
747743

@@ -784,15 +780,16 @@ public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Co
784780
throw new PipelineJobException("Expected sequence outputs to be created");
785781
}
786782

787-
File html = outputFilesCreated.stream().filter(x -> "10x Run Summary".equals(x.getCategory())).findFirst().orElseThrow().getFile();
788-
789-
addMetrics(html.getParentFile(), model);
790-
791-
File bam = model.getAlignmentData().getFile();
792-
if (!bam.exists())
783+
SequenceOutputFile outputForData = outputFilesCreated.stream().filter(x -> VLOUPE_CATEGORY.equals(x.getCategory())).findFirst().orElse(null);
784+
if (outputForData == null)
793785
{
794-
getPipelineCtx().getLogger().warn("BAM not found, expected: " + bam.getPath());
786+
outputForData = outputFilesCreated.stream().filter(x -> "10x Run Summary".equals(x.getCategory())).findFirst().orElseThrow();
795787
}
788+
789+
File outsDir = outputForData.getFile().getParentFile();
790+
Integer dataId = outputForData.getDataId();
791+
792+
addMetrics(outsDir, model, dataId);
796793
}
797794

798795
private static final Pattern FILE_PATTERN = Pattern.compile("^(.+?)(_S[0-9]+){0,1}_L(.+?)_(R){0,1}([0-9])(_[0-9]+){0,1}(.*?)(\\.f(ast){0,1}q)(\\.gz)?$");

0 commit comments

Comments
 (0)