|
5 | 5 | import org.apache.commons.io.FileUtils; |
6 | 6 | import org.apache.commons.lang3.tuple.Pair; |
7 | 7 | import org.json.JSONObject; |
| 8 | +import org.labkey.api.collections.CaseInsensitiveHashMap; |
| 9 | +import org.labkey.api.data.CompareType; |
| 10 | +import org.labkey.api.data.ConvertHelper; |
| 11 | +import org.labkey.api.data.DbSchema; |
| 12 | +import org.labkey.api.data.DbSchemaType; |
| 13 | +import org.labkey.api.data.SimpleFilter; |
| 14 | +import org.labkey.api.data.Table; |
| 15 | +import org.labkey.api.data.TableInfo; |
| 16 | +import org.labkey.api.data.TableSelector; |
8 | 17 | import org.labkey.api.module.ModuleLoader; |
9 | 18 | import org.labkey.api.pipeline.PipelineJob; |
10 | 19 | import org.labkey.api.pipeline.PipelineJobException; |
11 | 20 | import org.labkey.api.pipeline.RecordedAction; |
| 21 | +import org.labkey.api.query.FieldKey; |
12 | 22 | import org.labkey.api.reader.Readers; |
13 | 23 | import org.labkey.api.sequenceanalysis.SequenceOutputFile; |
14 | 24 | import org.labkey.api.sequenceanalysis.model.Readset; |
|
20 | 30 | import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; |
21 | 31 | import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; |
22 | 32 | import org.labkey.api.util.FileUtil; |
| 33 | +import org.labkey.api.util.PageFlowUtil; |
23 | 34 | import org.labkey.api.writer.PrintWriters; |
24 | 35 | import org.labkey.singlecell.CellHashingServiceImpl; |
25 | 36 | import org.labkey.singlecell.SingleCellModule; |
|
31 | 42 | import java.util.Arrays; |
32 | 43 | import java.util.Date; |
33 | 44 | import java.util.List; |
| 45 | +import java.util.Map; |
34 | 46 |
|
35 | 47 | public class CellRangerFeatureBarcodeHandler extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceReadsetProcessor> |
36 | 48 | { |
@@ -164,8 +176,8 @@ else if (rs.getApplication().equals("CITE-Seq")) |
164 | 176 | List<Pair<File, File>> inputFastqs = new ArrayList<>(); |
165 | 177 | rs.getReadData().forEach(rd -> { |
166 | 178 | inputFastqs.add(Pair.of(rd.getFile1(), rd.getFile2())); |
167 | | - output.addIntermediateFile(rd.getFile1(), "Input FASTQ"); |
168 | | - output.addIntermediateFile(rd.getFile2(), "Input FASTQ"); |
| 179 | + action.addInputIfNotPresent(rd.getFile1(), "Input FASTQ"); |
| 180 | + action.addInputIfNotPresent(rd.getFile2(), "Input FASTQ"); |
169 | 181 | }); |
170 | 182 |
|
171 | 183 | List<String> args = wrapper.prepareCountArgs(output, id, ctx.getOutputDir(), rs, inputFastqs, extraArgs, false); |
@@ -366,6 +378,112 @@ private File createFeatureRefForHashing(File outputDir, File hashingWhitelist) t |
366 | 378 | return featuresCsv; |
367 | 379 | } |
368 | 380 |
|
| 381 | + @Override |
| 382 | + public void complete(PipelineJob job, List<Readset> readsets, List<SequenceOutputFile> outputsCreated) throws PipelineJobException |
| 383 | + { |
| 384 | + if (outputsCreated.isEmpty()) |
| 385 | + { |
| 386 | + job.getLogger().error("Expected outputs to be created"); |
| 387 | + return; |
| 388 | + } |
| 389 | + |
| 390 | + SequenceOutputFile so = null; |
| 391 | + for (SequenceOutputFile o : outputsCreated) |
| 392 | + { |
| 393 | + if ("10x Run Summary".equals(o.getCategory())) |
| 394 | + { |
| 395 | + continue; |
| 396 | + } |
| 397 | + |
| 398 | + so = o; |
| 399 | + break; |
| 400 | + } |
| 401 | + |
| 402 | + if (so == null) |
| 403 | + { |
| 404 | + throw new PipelineJobException("Unable to find count matrix as output"); |
| 405 | + } |
| 406 | + |
| 407 | + Readset rs = readsets.get(0); |
| 408 | + |
| 409 | + File metrics = new File(so.getFile().getParentFile(), "metrics_summary.csv"); |
| 410 | + if (metrics.exists()) |
| 411 | + { |
| 412 | + job.getLogger().debug("adding 10x metrics"); |
| 413 | + try (CSVReader reader = new CSVReader(Readers.getReader(metrics))) |
| 414 | + { |
| 415 | + String[] line; |
| 416 | + String[] header = null; |
| 417 | + String[] metricValues = null; |
369 | 418 |
|
| 419 | + int i = 0; |
| 420 | + while ((line = reader.readNext()) != null) |
| 421 | + { |
| 422 | + if (i == 0) |
| 423 | + { |
| 424 | + header = line; |
| 425 | + } |
| 426 | + else |
| 427 | + { |
| 428 | + metricValues = line; |
| 429 | + break; |
| 430 | + } |
| 431 | + |
| 432 | + i++; |
| 433 | + } |
| 434 | + |
| 435 | + TableInfo ti = DbSchema.get("sequenceanalysis", DbSchemaType.Module).getTable("quality_metrics"); |
| 436 | + |
| 437 | + //NOTE: if this job errored and restarted, we may have duplicate records: |
| 438 | + SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), so.getReadset()); |
| 439 | + filter.addCondition(FieldKey.fromString("dataid"), so.getDataId(), CompareType.EQUAL); |
| 440 | + filter.addCondition(FieldKey.fromString("category"), rs.getApplication(), CompareType.EQUAL); |
| 441 | + filter.addCondition(FieldKey.fromString("container"), job.getContainer().getId(), CompareType.EQUAL); |
| 442 | + TableSelector ts = new TableSelector(ti, PageFlowUtil.set("rowid"), filter, null); |
| 443 | + if (ts.exists()) |
| 444 | + { |
| 445 | + job.getLogger().info("Deleting existing QC metrics (probably from prior restarted job)"); |
| 446 | + ts.getArrayList(Integer.class).forEach(rowid -> { |
| 447 | + Table.delete(ti, rowid); |
| 448 | + }); |
| 449 | + } |
| 450 | + |
| 451 | + for (int j = 0; j < header.length; j++) |
| 452 | + { |
| 453 | + Map<String, Object> toInsert = new CaseInsensitiveHashMap<>(); |
| 454 | + toInsert.put("container", job.getContainer().getId()); |
| 455 | + toInsert.put("createdby", job.getUser().getUserId()); |
| 456 | + toInsert.put("created", new Date()); |
| 457 | + toInsert.put("readset", rs.getReadsetId()); |
| 458 | + toInsert.put("dataid", so.getDataId()); |
| 459 | + |
| 460 | + toInsert.put("category", "Cell Ranger"); |
| 461 | + toInsert.put("metricname", header[j]); |
| 462 | + |
| 463 | + metricValues[j] = metricValues[j].replaceAll(",", ""); |
| 464 | + Object val = metricValues[j]; |
| 465 | + if (metricValues[j].contains("%")) |
| 466 | + { |
| 467 | + metricValues[j] = metricValues[j].replaceAll("%", ""); |
| 468 | + Double d = ConvertHelper.convert(metricValues[j], Double.class); |
| 469 | + d = d / 100.0; |
| 470 | + val = d; |
| 471 | + } |
| 472 | + |
| 473 | + toInsert.put("metricvalue", val); |
| 474 | + |
| 475 | + Table.insert(job.getUser(), ti, toInsert); |
| 476 | + } |
| 477 | + } |
| 478 | + catch (IOException e) |
| 479 | + { |
| 480 | + throw new PipelineJobException(e); |
| 481 | + } |
| 482 | + } |
| 483 | + else |
| 484 | + { |
| 485 | + job.getLogger().warn("unable to find metrics file: " + metrics.getPath()); |
| 486 | + } |
| 487 | + } |
370 | 488 | } |
371 | 489 | } |
0 commit comments