Skip to content

Commit 1598add

Browse files
committed
Merge discvr-21.3 to discvr-21.7
2 parents 5c1bd01 + 1f3470e commit 1598add

File tree

15 files changed

+226
-91
lines changed

15 files changed

+226
-91
lines changed

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AlignmentStep.java

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.labkey.api.sequenceanalysis.model.Readset;
2323

2424
import java.io.File;
25+
import java.util.List;
2526

2627
/**
2728
* User: bimber
@@ -43,12 +44,27 @@ default public String getIndexCachedDirName(PipelineJob job)
4344

4445
/**
4546
* Performs a reference guided alignment using the provided files.
46-
* @param inputFastq1 The forward FASTQ file
47-
* @param inputFastq2 The second FASTQ, if using paired end data
47+
* @param inputFastqs1 The forward FASTQ file(s). In most cases this is a single FASTQ. The aligner must return true for canAlignMultiplePairsAtOnce() otherwise.
48+
* @param inputFastqs2 The second FASTQ(s), if using paired end data
4849
* @param basename The basename to use as the output
4950
* @throws PipelineJobException
5051
*/
51-
public AlignmentOutput performAlignment(Readset rs, File inputFastq1, @Nullable File inputFastq2, File outputDirectory, ReferenceGenome referenceGenome, String basename, String readGroupId, @Nullable String platformUnit) throws PipelineJobException;
52+
public AlignmentOutput performAlignment(Readset rs, List<File> inputFastqs1, @Nullable List<File> inputFastqs2, File outputDirectory, ReferenceGenome referenceGenome, String basename, String readGroupId, @Nullable String platformUnit) throws PipelineJobException;
53+
54+
default boolean canAlignMultiplePairsAtOnce()
55+
{
56+
return false;
57+
}
58+
59+
default File assertSingleFile(List<File> inputs)
60+
{
61+
if (inputs != null && inputs.size() > 1)
62+
{
63+
throw new IllegalArgumentException("This aligner only supports a single pair of input FASTQs");
64+
}
65+
66+
return inputs == null ? null : inputs.get(0);
67+
}
5268

5369
public boolean doAddReadGroups();
5470

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ReadsetCreationTask.java

Lines changed: 60 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
import java.util.Collections;
5858
import java.util.Date;
5959
import java.util.HashMap;
60+
import java.util.HashSet;
6061
import java.util.List;
6162
import java.util.Map;
6263
import java.util.Set;
@@ -155,6 +156,7 @@ private void importReadsets() throws PipelineJobException
155156

156157
List<SequenceReadsetImpl> newReadsets = new ArrayList<>();
157158

159+
Set<Integer> fileIdsWithExistingMetrics = new HashSet<>();
158160
try (DbScope.Transaction transaction = schema.getScope().ensureTransaction())
159161
{
160162
TableInfo readsetTable = schema.getTable(SequenceAnalysisSchema.TABLE_READSETS);
@@ -233,7 +235,7 @@ private void importReadsets() throws PipelineJobException
233235
}
234236

235237
//now add readData created in this run:
236-
List<ReadDataImpl> readDatas = new ArrayList<>();
238+
List<ReadDataImpl> readDatasToCreate = new ArrayList<>();
237239
for (ReadDataImpl rd : r.getReadDataImpl())
238240
{
239241
File f1 = rd.getFile1();
@@ -295,7 +297,7 @@ private void importReadsets() throws PipelineJobException
295297
}
296298

297299
rd.setRunId(runId);
298-
readDatas.add(rd);
300+
readDatasToCreate.add(rd);
299301
}
300302

301303
List<Map<String, Object>> qualMetricsToAdd = new ArrayList<>();
@@ -333,6 +335,8 @@ private void importReadsets() throws PipelineJobException
333335

334336
qualMetricsToAdd.add(map);
335337
});
338+
339+
fileIdsWithExistingMetrics.add(rd.getFileId1());
336340
});
337341
}
338342

@@ -341,12 +345,12 @@ private void importReadsets() throws PipelineJobException
341345
row.setModifiedBy(getJob().getUser().getUserId());
342346

343347
//then import
344-
if (readDatas.isEmpty())
348+
if (readDatasToCreate.isEmpty())
345349
{
346350
getJob().getLogger().info("no reads found for readset: " + r.getName() + ", skipping import");
347351
continue;
348352
}
349-
row.setReadData(readDatas);
353+
row.setReadData(readDatasToCreate);
350354

351355
SequenceReadsetImpl newRow;
352356
if (!readsetExists)
@@ -374,8 +378,8 @@ private void importReadsets() throws PipelineJobException
374378
}
375379

376380
//create ReadData
377-
getJob().getLogger().debug(readDatas.size() + " file pairs to insert");
378-
for (ReadDataImpl rd : readDatas)
381+
getJob().getLogger().debug(readDatasToCreate.size() + " file pairs to insert");
382+
for (ReadDataImpl rd : readDatasToCreate)
379383
{
380384
getJob().getLogger().debug("creating read data for readset: " + newRow.getReadsetId());
381385
if (newRow.getReadsetId() == null)
@@ -390,16 +394,33 @@ private void importReadsets() throws PipelineJobException
390394
rd.setModified(new Date());
391395

392396
Table.insert(getJob().getUser(), readDataTable, rd);
397+
}
393398

394-
TableInfo metricsTable = SequenceAnalysisManager.get().getTable(SequenceAnalysisSchema.TABLE_QUALITY_METRICS);
395-
if (!qualMetricsToAdd.isEmpty())
399+
TableInfo metricsTable = SequenceAnalysisManager.get().getTable(SequenceAnalysisSchema.TABLE_QUALITY_METRICS);
400+
if (!qualMetricsToAdd.isEmpty())
401+
{
402+
getJob().getLogger().info("Copying " + qualMetricsToAdd.size() + " quality metrics from pre-existing readdata");
403+
for (Map<String, Object> qm : qualMetricsToAdd)
396404
{
397-
getJob().getLogger().info("Copying " + qualMetricsToAdd.size() + " quality metrics from pre-existing readdata");
398-
for (Map<String, Object> qm : qualMetricsToAdd)
399-
{
400-
qm.put("readset", newRow.getReadsetId());
401-
Table.insert(getJob().getUser(), metricsTable, qm);
402-
}
405+
qm.put("readset", newRow.getReadsetId());
406+
Table.insert(getJob().getUser(), metricsTable, qm);
407+
}
408+
}
409+
410+
if (!preexistingReadData.isEmpty())
411+
{
412+
getJob().getLogger().debug("Copying " + preexistingReadData.size() + " pre-existing read data for readset: " + newRow.getReadsetId());
413+
for (ReadDataImpl rd : preexistingReadData)
414+
{
415+
rd.setRowid(null);
416+
rd.setReadset(newRow.getReadsetId());
417+
rd.setContainer(getJob().getContainer().getId());
418+
rd.setCreatedBy(getJob().getUser().getUserId());
419+
rd.setCreated(new Date());
420+
rd.setModifiedBy(getJob().getUser().getUserId());
421+
rd.setModified(new Date());
422+
423+
Table.insert(getJob().getUser(), readDataTable, rd);
403424
}
404425
}
405426
}
@@ -416,6 +437,12 @@ private void importReadsets() throws PipelineJobException
416437
long totalReads = 0L;
417438
for (ReadDataImpl d : model.getReadDataImpl())
418439
{
440+
if (fileIdsWithExistingMetrics.contains(d.getFileId1()))
441+
{
442+
totalReads += getTotalReadsForFile(d.getFileId1(), model.getReadsetId());
443+
continue;
444+
}
445+
419446
getJob().setStatus(PipelineJob.TaskStatus.running, "CALCULATING QUALITY METRICS (" + idx + " of " + newReadsets.size() + ")");
420447
totalReads += addQualityMetricsForReadset(model, d.getFileId1(), getJob());
421448
if (d.getFileId2() != null)
@@ -464,6 +491,25 @@ private void runFastqcForFile(Integer fileId) throws PipelineJobException
464491
}
465492
}
466493

494+
private Long getTotalReadsForFile(int fileId, int readsetId)
495+
{
496+
TableInfo metricsTable = SequenceAnalysisManager.get().getTable(SequenceAnalysisSchema.TABLE_QUALITY_METRICS);
497+
498+
// Determine if we have saved metrics:
499+
SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), readsetId);
500+
filter.addCondition(FieldKey.fromString("category"), "Readset");
501+
filter.addCondition(FieldKey.fromString("metricname"), "Total Reads");
502+
filter.addCondition(FieldKey.fromString("dataid"), fileId);
503+
504+
TableSelector ts = new TableSelector(metricsTable, PageFlowUtil.set("metricvalue"), filter, null);
505+
if (ts.exists())
506+
{
507+
return ts.getObject(Long.class);
508+
}
509+
510+
return 0L;
511+
}
512+
467513
public static long addQualityMetricsForReadset(Readset rs, int fileId, PipelineJob job) throws PipelineJobException
468514
{
469515
try

0 commit comments

Comments
 (0)