Skip to content

Commit 7306b2d

Browse files
authored
Major refactor of underlying SBT SQL queries (#381)
1 parent d356f84 commit 7306b2d

13 files changed

+479
-122
lines changed

SequenceAnalysis/resources/queries/sequenceanalysis/alignment_summary_by_lineage.query.xml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,14 @@
99
<isKeyField>true</isKeyField>
1010
<isHidden>true</isHidden>
1111
</column>
12+
<column columnName="analysis_id">
13+
<columnTitle>Analysis Id</columnTitle>
14+
<fk>
15+
<fkDbSchema>sequenceanalysis</fkDbSchema>
16+
<fkTable>sequence_analyses</fkTable>
17+
<fkColumnName>rowid</fkColumnName>
18+
</fk>
19+
</column>
1220
<column columnName="lineages">
1321
<displayWidth>150</displayWidth>
1422
</column>
Lines changed: 43 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,45 @@
1-
PARAMETERS(AnalysisId INTEGER)
2-
3-
select
4-
(CAST(AnalysisId as varchar) || '<>' || a.lineages) as key,
5-
a.analysis_id,
6-
a.lineages,
7-
max(a.totalLineages) as totalLineages,
8-
a.loci,
9-
10-
sum(a.total) as total,
11-
max(a.total_reads) as total_reads,
12-
round(100 * (cast(sum(a.total) as float) / cast(max(a.total_reads) as float)), 2) as percent,
13-
group_concat(distinct a.haplotypesWithAllele) as haplotypesWithAllele,
14-
15-
CAST((select sum(s.total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = AnalysisId AND s.rowid IN (
16-
SELECT distinct asj.alignment_id from sequenceanalysis.alignment_summary_junction asj WHERE asj.analysis_id = AnalysisId AND asj.ref_nt_id.locus = a.loci and asj.status = true
17-
)
18-
) as integer) as total_reads_from_locus,
19-
20-
round(100 * (cast(sum(a.total) as float) / cast((select sum(s.total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = AnalysisId AND s.rowid IN (
21-
SELECT distinct asj.alignment_id from sequenceanalysis.alignment_summary_junction asj WHERE asj.analysis_id = AnalysisId AND asj.ref_nt_id.locus = a.loci and asj.status = true
22-
)
23-
) as float)), 2) as percent_from_locus,
24-
group_concat(distinct a.rowid, ',') as rowids
1+
SELECT
2+
t.*,
3+
CASE WHEN t.total_reads_from_locus = 0 THEN 0 ELSE round(100 * (cast(t.total_reads as float) / cast(t.total_reads_from_locus as float)), 2) END as percent_from_locus,
254

265
FROM (
27-
28-
select
29-
a.analysis_id,
30-
a.rowid,
31-
32-
group_concat(distinct coalesce(j.ref_nt_id.lineage, j.ref_nt_id.name), chr(10)) as lineages,
33-
count(distinct j.ref_nt_id.lineage) as totalLineages,
34-
group_concat(distinct coalesce(j.ref_nt_id.locus, j.ref_nt_id.name), chr(10)) as loci,
35-
36-
total,
37-
cast((select sum(total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = AnalysisId) as integer) as total_reads,
38-
group_concat(distinct hs.haplotype, chr(10)) as haplotypesWithAllele
39-
40-
from sequenceanalysis.alignment_summary a
41-
join sequenceanalysis.alignment_summary_junction j ON (j.analysis_id = AnalysisId AND j.alignment_id = a.rowid and j.status = true)
42-
left join sequenceanalysis.haplotype_sequences hs ON ((
43-
(hs.name = j.ref_nt_id.lineage AND hs.type = 'Lineage') OR
44-
(hs.name = j.ref_nt_id.name AND hs.type = 'Allele')
45-
) AND hs.haplotype.datedisabled IS NULL)
46-
WHERE a.analysis_id = AnalysisId
47-
group by a.analysis_id, a.rowid, a.total
48-
49-
) a
50-
51-
GROUP BY a.analysis_id, a.lineages, a.loci
6+
SELECT
7+
(CAST(a.analysis_id as varchar) || '<>' || a.lineages) as key,
8+
a.analysis_id,
9+
max(a.lineages) as lineages,
10+
coalesce(max(a.totalLineages), 0) as totalLineages,
11+
a.loci,
12+
13+
sum(a.total) as total_reads,
14+
max(a.total_reads_in_analysis) as total_reads_in_analysis,
15+
CASE WHEN max(a.total_reads_in_analysis) = 0 THEN 0 ELSE round(100 * (cast(sum(a.total) as float) / cast(max(a.total_reads_in_analysis) as float)), 2) END as percent,
16+
17+
group_concat(a.rowid, ',') as rowids,
18+
group_concat(distinct a.haplotypesWithAllele) as haplotypesWithAllele,
19+
20+
max(a.total_reads_from_locus) as total_reads_from_locus,
21+
max(a.lastModified) as lastModified,
22+
count(distinct a.rowid) as nAlignments
23+
24+
FROM (
25+
26+
select
27+
ac.analysis_id,
28+
ac.rowid,
29+
30+
group_concat(distinct coalesce(ac.lineage, ac.ntName), chr(10)) as lineages,
31+
count(distinct ac.lineage) as totalLineages,
32+
group_concat(distinct coalesce(ac.locus, ac.ntName), chr(10)) as loci,
33+
34+
group_concat(distinct haplotypesWithAllele, chr(10)) as haplotypesWithAllele,
35+
36+
max(ac.total) as total,
37+
max(ac.total_reads_in_analysis) as total_reads_in_analysis,
38+
max(ac.total_reads_from_locus) as total_reads_from_locus,
39+
max(ac.modified) as lastModified
40+
from sequenceanalysis.alignment_summary_combined ac
41+
group by ac.analysis_id, ac.rowid, ac.total
42+
) a
43+
44+
GROUP BY a.analysis_id, a.lineages, a.loci
45+
) t

SequenceAnalysis/resources/queries/sequenceanalysis/alignment_summary_by_lineage/.qview.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
</properties>
2020
</column>
2121
<column name="totalLineages" />
22-
<column name="total" />
2322
<column name="total_reads" />
23+
<column name="total_reads_in_analysis" />
2424
<column name="percent"/>
2525
<column name="total_reads_from_locus" />
2626
<column name="percent_from_locus"/>

SequenceAnalysis/resources/queries/sequenceanalysis/alignment_summary_by_lineage/With Haplotype Matches.qview.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
</properties>
2020
</column>
2121
<column name="totalLineages" />
22-
<column name="total" />
2322
<column name="total_reads" />
23+
<column name="total_reads_in_analysis" />
2424
<column name="percent"/>
2525
<column name="total_reads_from_locus" />
2626
<column name="percent_from_locus"/>
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
PARAMETERS(AnalysisId INTEGER)
2+
3+
select
4+
(CAST(AnalysisId as varchar) || '<>' || a.lineages) as key,
5+
a.analysis_id,
6+
a.lineages,
7+
max(a.totalLineages) as totalLineages,
8+
a.loci,
9+
10+
sum(a.total) as total,
11+
max(a.total_reads) as total_reads,
12+
round(100 * (cast(sum(a.total) as float) / cast(max(a.total_reads) as float)), 2) as percent,
13+
group_concat(distinct a.haplotypesWithAllele) as haplotypesWithAllele,
14+
15+
CAST((select sum(s.total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = AnalysisId AND s.rowid IN (
16+
SELECT distinct asj.alignment_id from sequenceanalysis.alignment_summary_junction asj WHERE asj.analysis_id = AnalysisId AND asj.ref_nt_id.locus = a.loci and asj.status = true
17+
)
18+
) as integer) as total_reads_from_locus,
19+
20+
round(100 * (cast(sum(a.total) as float) / cast((select sum(s.total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = AnalysisId AND s.rowid IN (
21+
SELECT distinct asj.alignment_id from sequenceanalysis.alignment_summary_junction asj WHERE asj.analysis_id = AnalysisId AND asj.ref_nt_id.locus = a.loci and asj.status = true
22+
)
23+
) as float)), 2) as percent_from_locus,
24+
group_concat(distinct a.rowid, ',') as rowids
25+
26+
FROM (
27+
28+
select
29+
a.analysis_id,
30+
a.rowid,
31+
32+
group_concat(distinct coalesce(j.ref_nt_id.lineage, j.ref_nt_id.name), chr(10)) as lineages,
33+
count(distinct j.ref_nt_id.lineage) as totalLineages,
34+
group_concat(distinct coalesce(j.ref_nt_id.locus, j.ref_nt_id.name), chr(10)) as loci,
35+
36+
total,
37+
cast((select sum(total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = AnalysisId) as integer) as total_reads,
38+
group_concat(distinct hs.haplotype, chr(10)) as haplotypesWithAllele
39+
40+
from sequenceanalysis.alignment_summary a
41+
join sequenceanalysis.alignment_summary_junction j ON (j.analysis_id = AnalysisId AND j.alignment_id = a.rowid and j.status = true)
42+
left join sequenceanalysis.haplotype_sequences hs ON ((
43+
(hs.name = j.ref_nt_id.lineage AND hs.type = 'Lineage') OR
44+
(hs.name = j.ref_nt_id.name AND hs.type = 'Allele')
45+
) AND hs.haplotype.datedisabled IS NULL)
46+
WHERE a.analysis_id = AnalysisId
47+
group by a.analysis_id, a.rowid, a.total
48+
49+
) a
50+
51+
GROUP BY a.analysis_id, a.lineages, a.loci
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
<query xmlns="http://labkey.org/data/xml/query">
2+
<metadata>
3+
<tables xmlns="http://labkey.org/data/xml">
4+
<table tableName="alignment_summary_combined" tableDbType="TABLE">
5+
<tableTitle>Alignment Summary, Combined</tableTitle>
6+
<columns>
7+
<column columnName="total_forward">
8+
<columnTitle>Total First-Mate Reads</columnTitle>
9+
<description>The column shows the number of alignments that contain a first-mate or singleton read</description>
10+
</column>
11+
<column columnName="total_reverse">
12+
<columnTitle>Total Second-Mate Reads</columnTitle>
13+
<description>The column shows the number of alignments that contain a second-mate read</description>
14+
</column>
15+
<column columnName="valid_pairs">
16+
<columnTitle>Total Valid Pairs</columnTitle>
17+
<description>The column shows the number of alignments that contain valid paired reads (both forward and reverse)</description>
18+
</column>
19+
</columns>
20+
</table>
21+
</tables>
22+
</metadata>
23+
</query>

SequenceAnalysis/resources/queries/sequenceanalysis/alignment_summary_grouped.query.xml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,14 @@
66
<tableTitle>Alignment Summary</tableTitle>
77
<pkColumnName>rowids</pkColumnName>
88
<columns>
9+
<column columnName="analysis_id">
10+
<columnTitle>Analysis Id</columnTitle>
11+
<fk>
12+
<fkDbSchema>sequenceanalysis</fkDbSchema>
13+
<fkTable>sequence_analyses</fkTable>
14+
<fkColumnName>rowid</fkColumnName>
15+
</fk>
16+
</column>
917
<column columnName="rowids">
1018
<columnTitle>RowIds</columnTitle>
1119
<isHidden>true</isHidden>
Lines changed: 53 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,70 +1,55 @@
1-
PARAMETERS(AnalysisId INTEGER)
2-
3-
select
4-
a.analysis_id,
5-
a.alleles,
6-
a.alleleIds,
7-
max(a.lineages) as lineages,
8-
coalesce(max(a.totalLineages), 0) as totalLineages,
9-
a.loci,
10-
11-
sum(a.total) as total_reads,
12-
sum(a.total_forward) as total_forward,
13-
sum(a.total_reverse) as total_reverse,
14-
sum(a.valid_pairs) as valid_pairs,
15-
max(cast(a.total_reads as integer)) as total_reads_in_analysis,
16-
--max(a.loci_total_reads) as total_reads_in_analysis_from_locus,
17-
CASE WHEN max(a.total_reads) = 0 THEN 0 ELSE round(100 * (cast(sum(a.total) as float) / cast(max(a.total_reads) as float)), 2) END as percent,
18-
-- case
19-
-- when (cast(sum(a.total) as float) / cast(max(a.total_reads) as float)) >= .04 THEN 'Major'
20-
-- else 'Minor'
21-
-- end as category,
22-
23-
group_concat(a.rowid, ',') as rowids,
24-
group_concat(distinct a.haplotypesWithAllele) as haplotypesWithAllele,
25-
26-
CAST((select sum(s.total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = AnalysisId AND s.rowid IN (
27-
SELECT distinct asj.alignment_id from sequenceanalysis.alignment_summary_junction asj WHERE asj.analysis_id = AnalysisId AND asj.ref_nt_id.locus = a.loci and asj.status = true
28-
)
29-
) as INTEGER) as total_reads_from_locus,
30-
31-
round(100 * (cast(sum(a.total) as float) / CASE WHEN count(a.lineages) = 0 THEN max(a.total_reads) ELSE cast((select sum(s.total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = AnalysisId AND s.rowid IN (
32-
SELECT distinct asj.alignment_id from sequenceanalysis.alignment_summary_junction asj WHERE asj.analysis_id = AnalysisId AND asj.ref_nt_id.locus = a.loci and asj.status = true
33-
)
34-
) as float) END), 2) as percent_from_locus,
35-
max(lastModified) as lastModified,
36-
count(distinct a.rowid) as nAlignments,
37-
max(a.nloci) as nLoci
1+
SELECT
2+
t.*,
3+
CASE WHEN t.total_reads_from_locus = 0 THEN 0 ELSE round(100 * (cast(t.total_reads as float) / cast(t.total_reads_from_locus as float)), 2) END as percent_from_locus,
384

395
FROM (
40-
41-
select
42-
a.analysis_id,
43-
a.rowid,
44-
45-
group_concat(distinct j.ref_nt_id) as alleleIds,
46-
group_concat(distinct j.ref_nt_id.name, chr(10)) as alleles,
47-
group_concat(distinct j.ref_nt_id.lineage, chr(10)) as lineages,
48-
count(distinct j.ref_nt_id.lineage) as totalLineages,
49-
group_concat(distinct j.ref_nt_id.locus, chr(10)) as loci,
50-
count(distinct j.ref_nt_id.locus) as nloci,
51-
group_concat(distinct hs.haplotype, chr(10)) as haplotypesWithAllele,
52-
53-
total,
54-
total_forward,
55-
total_reverse,
56-
valid_pairs,
57-
(select sum(total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = AnalysisId) as total_reads,
58-
max(j.modified) as lastModified
59-
from sequenceanalysis.alignment_summary a
60-
left join sequenceanalysis.alignment_summary_junction j ON (j.analysis_id = AnalysisId AND j.alignment_id = a.rowid and j.status = true)
61-
left join sequenceanalysis.haplotype_sequences hs ON ((
62-
(hs.name = j.ref_nt_id.lineage AND hs.type = 'Lineage') OR
63-
(hs.name = j.ref_nt_id.name AND hs.type = 'Allele')
64-
) AND hs.haplotype.datedisabled IS NULL)
65-
WHERE a.analysis_id = AnalysisId
66-
group by a.analysis_id, a.rowid, a.total, total_forward, total_reverse, valid_pairs
67-
68-
) a
69-
70-
GROUP BY a.analysis_id, a.alleles, a.alleleIds, a.loci
6+
SELECT
7+
a.analysis_id,
8+
a.alleles,
9+
a.alleleIds,
10+
max(a.lineages) as lineages,
11+
coalesce(max(a.totalLineages), 0) as totalLineages,
12+
a.loci,
13+
14+
sum(a.total) as total_reads,
15+
sum(a.total_forward) as total_forward,
16+
sum(a.total_reverse) as total_reverse,
17+
sum(a.valid_pairs) as valid_pairs,
18+
max(a.total_reads_in_analysis) as total_reads_in_analysis,
19+
CASE WHEN max(a.total_reads_in_analysis) = 0 THEN 0 ELSE round(100 * (cast(sum(a.total) as float) / cast(max(a.total_reads_in_analysis) as float)), 2) END as percent,
20+
21+
group_concat(a.rowid, ',') as rowids,
22+
group_concat(distinct a.haplotypesWithAllele) as haplotypesWithAllele,
23+
24+
max(a.total_reads_from_locus) as total_reads_from_locus,
25+
max(a.lastModified) as lastModified,
26+
count(distinct a.rowid) as nAlignments,
27+
max(a.nloci) as nLoci
28+
29+
FROM (
30+
31+
select
32+
ac.analysis_id,
33+
ac.rowid,
34+
35+
group_concat(distinct ac.ref_nt_id) as alleleIds,
36+
group_concat(distinct ac.ntName, chr(10)) as alleles,
37+
group_concat(distinct ac.lineage, chr(10)) as lineages,
38+
count(distinct ac.lineage) as totalLineages,
39+
group_concat(distinct ac.locus, chr(10)) as loci,
40+
count(distinct ac.locus) as nloci,
41+
group_concat(distinct haplotypesWithAllele, chr(10)) as haplotypesWithAllele,
42+
43+
max(ac.total) as total,
44+
max(ac.total_forward) as total_forward,
45+
max(ac.total_reverse) as total_reverse,
46+
max(ac.valid_pairs) as valid_pairs,
47+
max(ac.total_reads_in_analysis) as total_reads_in_analysis,
48+
max(ac.total_reads_from_locus) as total_reads_from_locus,
49+
max(ac.modified) as lastModified
50+
from sequenceanalysis.alignment_summary_combined ac
51+
group by ac.analysis_id, ac.rowid
52+
) a
53+
54+
GROUP BY a.analysis_id, a.alleles, a.alleleIds, a.loci
55+
) t

0 commit comments

Comments
 (0)