Skip to content

Commit 0edd7c9

Browse files
committed
Creation option to import FASTQ data and merge with an existing readset
1 parent c4fd53e commit 0edd7c9

File tree

5 files changed

+132
-22
lines changed

5 files changed

+132
-22
lines changed

SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceImportPanel.js

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceImportPanel', {
3636
{name: 'fileGroupId', allowBlank: false},
3737
{name: 'readset', allowBlank: false},
3838
{name: 'readsetname', useNull: true},
39+
{name: 'importType', useNull: true},
3940
{name: 'barcode5', useNull: true},
4041
{name: 'barcode3', useNull: true},
4142
{name: 'platform', allowBlank: false},
@@ -794,6 +795,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceImportPanel', {
794795
barcode3: r.get('barcode3'),
795796
readset: r.get('readset'),
796797
readsetname: r.get('readsetname'),
798+
importType: r.get('importType'),
797799
platform: r.get('platform'),
798800
application: r.get('application'),
799801
chemistry: r.get('chemistry'),
@@ -1643,6 +1645,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceImportPanel', {
16431645
Ext4.Msg.wait('Loading...');
16441646
var doDemultiplex = this.down('#doDemultiplex').getValue();
16451647
var showBarcodes = this.down('#showBarcodes').getValue();
1648+
var allowReadsetMerge = this.down('#allowReadsetMerge').getValue();
16461649

16471650
LABKEY.Query.selectRows({
16481651
method: 'POST',
@@ -1659,8 +1662,8 @@ Ext4.define('SequenceAnalysis.panel.SequenceImportPanel', {
16591662
if (results && results.rows && results.rows.length){
16601663
Ext4.Array.forEach(results.rows, function(row) {
16611664
var records = recordsById[row.rowid];
1662-
if (row.totalFiles) {
1663-
msgs.push('Readset ' + row.rowid + 'has already been associated with files and cannot be re-used. If you would like to reanalyze this readset, load the table of readsets and look for the \'Analyze Data\' button.');
1665+
if (!allowReadsetMerge && row.totalFiles) {
1666+
msgs.push('Readset ' + row.rowid + ' has already been associated with files and cannot be re-used. If you would like to reanalyze this readset, load the table of readsets and look for the \'Analyze Data\' button.');
16641667
Ext4.Array.forEach(records, function(record) {
16651668
record.data.readset = null;
16661669
}, this);
@@ -1675,18 +1678,16 @@ Ext4.define('SequenceAnalysis.panel.SequenceImportPanel', {
16751678
return;
16761679
}
16771680
else if (!doDemultiplex && !showBarcodes && (row.barcode3 || row.barcode5)) {
1678-
msgs.push('Readset ' + row.rowid + ' has barcodes, but you have not selected to either show barcodes or perform demultiplexing');
1679-
Ext4.Array.forEach(records, function(record) {
1680-
record.data.readset = null;
1681-
}, this);
1682-
1683-
return;
1681+
this.down('#showBarcodes').setValue(true);
1682+
showBarcodes = true;
16841683
}
16851684

16861685
//update row based on saved readset. avoid firing event
16871686
Ext4.Array.forEach(records, function(record) {
1687+
var importType = !record.data.readset ? null : row.totalFiles ? 'Merge With Existing' : 'New Data';
16881688
Ext4.apply(record.data, {
16891689
readsetname: row.name,
1690+
importType: importType,
16901691
platform: row.platform,
16911692
application: row.application,
16921693
chemistry: row.chemistry,
@@ -1716,6 +1717,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceImportPanel', {
17161717
Ext4.apply(record.data, {
17171718
readset: null,
17181719
readsetname: null,
1720+
importType: null,
17191721
platform: null,
17201722
application: null,
17211723
chemistry: null,
@@ -1866,7 +1868,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceImportPanel', {
18661868

18671869
var grid = btn.up('panel').down('ldk-gridpanel');
18681870
Ext4.Array.forEach(grid.columns, function(c){
1869-
if (c.dataIndex === 'readset'){
1871+
if (c.dataIndex === 'readset' || c.dataIndex === 'importType'){
18701872
c.setVisible(val);
18711873
}
18721874
}, this);
@@ -1881,12 +1883,18 @@ Ext4.define('SequenceAnalysis.panel.SequenceImportPanel', {
18811883
hideMode: 'offsets',
18821884
width: 'auto',
18831885
itemId: 'existingReadsetOptions',
1886+
border: false,
18841887
fieldDefaults: {
18851888
width: 350
18861889
},
18871890
items: [{
18881891
html: 'It is possible to import readset information before you import the actual read data. This is most commonly done when you plan a run upfront (such as the Illumina workflow). If you did this, just enter the readset Id below and the details will automatically populate. Note: when this option is selected the readset details are not editable through this form.',
18891892
border: false
1893+
},{
1894+
itemId: 'allowReadsetMerge',
1895+
xtype: 'checkbox',
1896+
fieldLabel: 'Allow Merge If Existing Readset Has Data',
1897+
helpPopup: 'If there is already data for the selected readset(s), the original readset will be copied (same attrbitues), and a new readset will be creating using these reads and the original data. The original readset will remain.'
18901898
}]
18911899
},{
18921900
xtype: 'ldk-gridpanel',
@@ -1964,6 +1972,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceImportPanel', {
19641972
if (records && records.length) {
19651973
records[0].set({
19661974
readset: null,
1975+
importType: null,
19671976
readsetname: null,
19681977
platform: null,
19691978
application: null,
@@ -1989,6 +1998,18 @@ Ext4.define('SequenceAnalysis.panel.SequenceImportPanel', {
19891998
}
19901999
}
19912000
}
2001+
}, {
2002+
text: 'Import Status',
2003+
tdCls: 'ldk-wrap-text',
2004+
name: 'importType',
2005+
hidden: true,
2006+
dataIndex: 'importType',
2007+
width: 110,
2008+
editable: true,
2009+
editor: {
2010+
xtype: 'displayfield',
2011+
allowBlank: true
2012+
}
19922013
},{
19932014
text: '5\' Barcode',
19942015
tdCls: 'ldk-wrap-text',
@@ -2615,7 +2636,12 @@ Ext4.define('SequenceAnalysis.panel.SequenceImportPanel', {
26152636
}
26162637

26172638
if (obj.readset){
2618-
readsetsToUpdate.push(obj.readset);
2639+
if (!Ext4.isNumeric(obj.readset)) {
2640+
errors.push('Readset Id should be an integer: ' + obj.readset);
2641+
}
2642+
else {
2643+
readsetsToUpdate.push(obj.readset);
2644+
}
26192645
}
26202646
}, this);
26212647

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceReadsetImpl.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ public int getRowId()
7272
return _rowId == null ? 0 : _rowId;
7373
}
7474

75+
public void unsetRowId()
76+
{
77+
_rowId = null;
78+
}
79+
7580
public void setRowId(int rowId)
7681
{
7782
_rowId = rowId;

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ReadsetCreationTask.java

Lines changed: 86 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,14 @@
1717

1818
import au.com.bytecode.opencsv.CSVReader;
1919
import org.jetbrains.annotations.NotNull;
20+
import org.labkey.api.collections.CaseInsensitiveHashMap;
21+
import org.labkey.api.data.CompareType;
2022
import org.labkey.api.data.DbSchema;
2123
import org.labkey.api.data.DbScope;
24+
import org.labkey.api.data.SimpleFilter;
2225
import org.labkey.api.data.Table;
2326
import org.labkey.api.data.TableInfo;
27+
import org.labkey.api.data.TableSelector;
2428
import org.labkey.api.exp.api.ExpData;
2529
import org.labkey.api.exp.api.ExpProtocol;
2630
import org.labkey.api.exp.api.ExpRun;
@@ -31,9 +35,12 @@
3135
import org.labkey.api.pipeline.PipelineJobException;
3236
import org.labkey.api.pipeline.RecordedAction;
3337
import org.labkey.api.pipeline.RecordedActionSet;
38+
import org.labkey.api.query.FieldKey;
3439
import org.labkey.api.reader.Readers;
40+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
3541
import org.labkey.api.sequenceanalysis.model.Readset;
3642
import org.labkey.api.util.FileType;
43+
import org.labkey.api.util.PageFlowUtil;
3744
import org.labkey.sequenceanalysis.ReadDataImpl;
3845
import org.labkey.sequenceanalysis.SequenceAnalysisManager;
3946
import org.labkey.sequenceanalysis.SequenceAnalysisSchema;
@@ -44,12 +51,15 @@
4451

4552
import java.io.File;
4653
import java.io.IOException;
54+
import java.sql.SQLException;
4755
import java.util.ArrayList;
4856
import java.util.Arrays;
57+
import java.util.Collections;
4958
import java.util.Date;
5059
import java.util.HashMap;
5160
import java.util.List;
5261
import java.util.Map;
62+
import java.util.Set;
5363

5464
/**
5565
* User: bbimber
@@ -153,12 +163,21 @@ private void importReadsets() throws PipelineJobException
153163
for (Readset rs : getPipelineJob().getSequenceSupport().getCachedReadsets())
154164
{
155165
SequenceReadsetImpl r = (SequenceReadsetImpl)rs;
156-
boolean updateExisting = r.getReadsetId() != null && r.getReadsetId() > 0;
157-
158166
getJob().getLogger().info("Starting readset " + r.getName());
159167

168+
boolean readsetExists = r.getReadsetId() != null && r.getReadsetId() > 0;
169+
List<ReadDataImpl> preexistingReadData;
170+
if (readsetExists)
171+
{
172+
preexistingReadData = ((SequenceReadsetImpl)SequenceAnalysisService.get().getReadset(r.getReadsetId(), getJob().getUser())).getReadDataImpl();
173+
}
174+
else
175+
{
176+
preexistingReadData = Collections.emptyList();
177+
}
178+
160179
SequenceReadsetImpl row;
161-
if (!updateExisting)
180+
if (!readsetExists)
162181
{
163182
row = new SequenceReadsetImpl();
164183

@@ -195,18 +214,25 @@ private void importReadsets() throws PipelineJobException
195214
throw new PipelineJobException("Readset lacks a rowid: " + r.getReadsetId());
196215
}
197216

198-
if (row.getReadData() != null && !row.getReadData().isEmpty())
217+
if (row.getInstrumentRunId() == null)
199218
{
200-
throw new PipelineJobException("Readset already has data imported: " + row.getReadsetId());
219+
row.setInstrumentRunId(r.getInstrumentRunId());
201220
}
202221

203-
if (row.getInstrumentRunId() == null)
222+
if (!preexistingReadData.isEmpty())
204223
{
205-
row.setInstrumentRunId(r.getInstrumentRunId());
224+
getJob().getLogger().debug("Existing readset found with " + preexistingReadData.size() + " read pairs, will clone and merge data");
225+
226+
row.unsetRowId();
227+
row.setCreatedBy(getJob().getUser().getUserId());
228+
row.setCreated(new Date());
229+
row.setModifiedBy(getJob().getUser().getUserId());
230+
row.setModified(new Date());
231+
readsetExists = false;
206232
}
207233
}
208234

209-
//now add readData
235+
//now add readData created in this run:
210236
List<ReadDataImpl> readDatas = new ArrayList<>();
211237
for (ReadDataImpl rd : r.getReadDataImpl())
212238
{
@@ -269,10 +295,47 @@ private void importReadsets() throws PipelineJobException
269295
}
270296

271297
rd.setRunId(runId);
272-
273298
readDatas.add(rd);
274299
}
275300

301+
List<Map<String, Object>> qualMetricsToAdd = new ArrayList<>();
302+
if (!preexistingReadData.isEmpty())
303+
{
304+
preexistingReadData.forEach(rd -> {
305+
rd.setRowid(null);
306+
rd.setReadset(null);
307+
308+
SimpleFilter filter = new SimpleFilter(FieldKey.fromString("category"), "Readset", CompareType.EQUAL);
309+
filter.addCondition(FieldKey.fromString("readset"), r.getRowId());
310+
if (rd.getFile2() == null)
311+
{
312+
filter.addCondition(FieldKey.fromString("dataid"), rd.getFileId1(), CompareType.EQUAL);
313+
}
314+
else
315+
{
316+
filter.addCondition(FieldKey.fromString("dataid"), Arrays.asList(rd.getFileId1(), rd.getFileId2()), CompareType.IN);
317+
}
318+
319+
final Set<String> fields = PageFlowUtil.set("dataid", "category", "metricname", "metricvalue", "qualvalue", "comment", "container", "created", "createdby", "modified", "modifiedby");
320+
new TableSelector(SequenceAnalysisSchema.getTable(SequenceAnalysisSchema.TABLE_QUALITY_METRICS), fields, filter, null).forEachResults(results -> {
321+
Map<String, Object> map = new CaseInsensitiveHashMap<>();
322+
323+
fields.stream().forEach(f -> {
324+
try
325+
{
326+
map.put(f, results.getObject(FieldKey.fromString(f)));
327+
}
328+
catch (SQLException e)
329+
{
330+
throw new RuntimeException(e);
331+
}
332+
});
333+
334+
qualMetricsToAdd.add(map);
335+
});
336+
});
337+
}
338+
276339
row.setRunId(runId);
277340
row.setModified(new Date());
278341
row.setModifiedBy(getJob().getUser().getUserId());
@@ -286,7 +349,7 @@ private void importReadsets() throws PipelineJobException
286349
row.setReadData(readDatas);
287350

288351
SequenceReadsetImpl newRow;
289-
if (!updateExisting)
352+
if (!readsetExists)
290353
{
291354
newRow = Table.insert(getJob().getUser(), readsetTable, row);
292355
getJob().getLogger().info("Created readset: " + newRow.getReadsetId());
@@ -327,6 +390,17 @@ private void importReadsets() throws PipelineJobException
327390
rd.setModified(new Date());
328391

329392
Table.insert(getJob().getUser(), readDataTable, rd);
393+
394+
TableInfo metricsTable = SequenceAnalysisManager.get().getTable(SequenceAnalysisSchema.TABLE_QUALITY_METRICS);
395+
if (!qualMetricsToAdd.isEmpty())
396+
{
397+
getJob().getLogger().info("Copying " + qualMetricsToAdd.size() + " quality metrics from pre-existing readdata");
398+
for (Map<String, Object> qm : qualMetricsToAdd)
399+
{
400+
qm.put("readset", newRow.getReadsetId());
401+
Table.insert(getJob().getUser(), metricsTable, qm);
402+
}
403+
}
330404
}
331405
}
332406

@@ -417,6 +491,7 @@ public static long addQualityMetricsForReadset(Readset rs, int fileId, PipelineJ
417491
metricsMap = FastqUtils.getQualityMetrics(d.getFile(), job.getLogger());
418492
}
419493

494+
TableInfo metricsTable = SequenceAnalysisManager.get().getTable(SequenceAnalysisSchema.TABLE_QUALITY_METRICS);
420495
for (String metricName : metricsMap.keySet())
421496
{
422497
Map<String, Object> r = new HashMap<>();
@@ -428,7 +503,7 @@ public static long addQualityMetricsForReadset(Readset rs, int fileId, PipelineJ
428503
r.put("container", rs.getContainer() == null ? job.getContainer() : rs.getContainer());
429504
r.put("createdby", job.getUser().getUserId());
430505

431-
Table.insert(job.getUser(), SequenceAnalysisManager.get().getTable(SequenceAnalysisSchema.TABLE_QUALITY_METRICS), r);
506+
Table.insert(job.getUser(), metricsTable, r);
432507
}
433508

434509
if (cachedMetrics.exists())

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ReadsetInitTask.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ public class ReadsetInitTask extends WorkDirectoryTask<ReadsetInitTask.Factory>
7070
private FileAnalysisJobSupport _support;
7171

7272
private static final String ACTION_NAME = "IMPORTING READSET";
73-
private static String COMPRESS_INPUT_ACTIONNAME = "Compressing FASTQ Files";
73+
private static final String COMPRESS_INPUT_ACTIONNAME = "Compressing FASTQ Files";
7474

7575
protected ReadsetInitTask(Factory factory, PipelineJob job)
7676
{

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,10 @@ public String execute(List<File> sequenceFiles, @Nullable Map<File, String> file
122122

123123
filesCreated.add(expectedHtml);
124124
}
125+
else
126+
{
127+
_logger.debug("Existing FASTQC output found, will not re-run");
128+
}
125129
}
126130

127131
return processOutput(uniqueFiles, filesCreated, fileLabels);

0 commit comments

Comments
 (0)