Skip to content

Commit 2f7f15b

Browse files
committed
Expand ETL/presentation of SIV data
1 parent b320081 commit 2f7f15b

File tree

11 files changed

+202
-41
lines changed

11 files changed

+202
-41
lines changed

SivStudies/resources/queries/study/demographics/Expanded.qview.xml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66
<column name="calculated_status"/>
77
<column name="birth"/>
88
<column name="death"/>
9+
<column name="projects/categories"/>
910
<column name="projects/allStudies"/>
1011
<column name="projects/subgroups"/>
1112
<column name="immunizations/immunizations"/>
1213
<column name="sivART/allInfections"/>
13-
<column name="sivART/artInitiationDate"/>
14+
<column name="sivART/artInitiationDPI"/>
1415
<column name="interventions/interventions"/>
1516
<column name="outcomes/outcomes"/>
1617
</columns>

SivStudies/resources/queries/study/demographics/Project Summary.qview.xml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
<column name="death"/>
1010
<column name="projects/allStudies"/>
1111
<column name="projects/categories"/>
12-
<column name="projects/rhCmvVaccines"/>
13-
<column name="projects/sivArt"/>
1412
</columns>
1513
<sorts>
1614
<sort column="Id" descending="false"/>
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
<customView xmlns="http://labkey.org/data/xml/queryCustomView" hidden="false" canOverride="false">
2+
<columns>
3+
<column name="Id"/>
4+
<column name="sex"/>
5+
<column name="species"/>
6+
<column name="calculated_status"/>
7+
<column name="projects/categories"/>
8+
<column name="projects/allStudies"/>
9+
<column name="projects/subgroups"/>
10+
<column name="sivART/allInfections"/>
11+
<column name="sivART/artInitiationDate"/>
12+
<column name="sivART/artReleaseDate"/>
13+
<column name="sivART/artInitiationDPI"/>
14+
<column name="sivART/artReleaseWPI"/>
15+
<column name="pvlInfo/numPVL"/>
16+
<column name="pvlInfo/numPVLPostArtRelease"/>
17+
<column name="pvlInfo/lastPvlPostArtReleaseWeeks"/>
18+
</columns>
19+
<sorts>
20+
<sort column="Id" descending="false"/>
21+
</sorts>
22+
</customView>

SivStudies/resources/queries/study/demographicsChallengeAndArt.query.xml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,22 @@
2020
</column>
2121
<column columnName="infectionDate">
2222
<columnTitle>Infection Date</columnTitle>
23+
<formatString>Date</formatString>
2324
</column>
2425
<column columnName="artInitiationDate">
26+
<columnTitle>ART Initiation</columnTitle>
27+
<formatString>Date</formatString>
28+
</column>
29+
<column columnName="artInitiationDPI">
2530
<columnTitle>ART Initiation (DPI)</columnTitle>
2631
</column>
32+
<column columnName="artReleaseDate">
33+
<columnTitle>ART Release</columnTitle>
34+
<formatString>Date</formatString>
35+
</column>
36+
<column columnName="artReleaseWPI">
37+
<columnTitle>ART Release (WPI)</columnTitle>
38+
</column>
2739
</columns>
2840
<titleColumn>allInfections</titleColumn>
2941
</table>
Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,35 @@
11
SELECT
2-
t.Id,
3-
group_concat(DISTINCT CASE
4-
WHEN t.category = 'SIV Infection' THEN (cast(month(t.date) as varchar) || '/' || cast(dayofmonth(t.date) as varchar) || '/' || cast(year(t.date) as varchar) || ' (' || t.treatment || ')')
5-
ELSE NULL
6-
END, char(10)) as allInfections,
7-
group_concat(DISTINCT CASE
8-
WHEN t.category = 'ART' THEN (cast(month(t.date) as varchar) || '/' || cast(dayofmonth(t.date) as varchar) || '/' || cast(year(t.date) as varchar) || ' (' || t.treatment || ')')
9-
ELSE NULL
10-
END, char(10)) as allART,
11-
min(CASE
12-
WHEN t.category = 'SIV Infection' THEN t.date
13-
ELSE NULL
14-
END) as infectionDate,
15-
min(CASE
16-
WHEN t.category = 'ART' THEN t.timePostSivChallenge.daysPostInfection
2+
t.*,
3+
TIMESTAMPDIFF('SQL_TSI_WEEK', t.infectionDate, t.artReleaseDate) as artReleaseWPI
4+
FROM (
5+
SELECT
6+
t.Id,
7+
group_concat(DISTINCT CASE
8+
WHEN t.category = 'SIV Infection' THEN (cast(month(t.date) as varchar) || '/' || cast(dayofmonth(t.date) as varchar) || '/' || cast(year(t.date) as varchar) || ' (' || t.treatment || ')')
179
ELSE NULL
18-
END) as artInitiationDate
19-
FROM study.treatments t
20-
GROUP BY t.Id
10+
END, char(10)) as allInfections,
11+
min(floor(age(t.DataSets.Demographics.birth, CASE WHEN t.category = 'SIV Infection' THEN t.date ELSE NULL END))) AS ageAtInfection,
12+
13+
group_concat(DISTINCT CASE
14+
WHEN t.category = 'ART' THEN (cast(month(t.date) as varchar) || '/' || cast(dayofmonth(t.date) as varchar) || '/' || cast(year(t.date) as varchar) || ' (' || t.treatment || ')')
15+
ELSE NULL
16+
END, char(10)) as allART,
17+
min(CASE
18+
WHEN t.category = 'SIV Infection' THEN t.date
19+
ELSE NULL
20+
END) as infectionDate,
21+
min(CASE
22+
WHEN t.category = 'ART' THEN t.date
23+
ELSE NULL
24+
END) as artInitiationDate,
25+
min(CASE
26+
WHEN t.category = 'ART' THEN t.timePostSivChallenge.daysPostInfection
27+
ELSE NULL
28+
END) as artInitiationDPI,
29+
min(CASE
30+
WHEN t.category = 'ART' THEN t.artInformation.artRelease
31+
ELSE NULL
32+
END) as artReleaseDate
33+
FROM study.treatments t
34+
GROUP BY t.Id
35+
) t
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
<query xmlns="http://labkey.org/data/xml/query">
2+
<metadata>
3+
<tables xmlns="http://labkey.org/data/xml">
4+
<table tableName="demographicsPVL" tableDbType="NOT_IN_DB">
5+
<tableTitle>PVL Summary</tableTitle>
6+
<columns>
7+
<column columnName="Id">
8+
<isKeyField>true</isKeyField>
9+
<isHidden>true</isHidden>
10+
</column>
11+
<column columnName="sivChallenge">
12+
<columnTitle>SIV Challenge</columnTitle>
13+
<formatString>Date</formatString>
14+
</column>
15+
<column columnName="numPVL">
16+
<columnTitle># PVLs</columnTitle>
17+
<url>/query/executeQuery.view?schemaName=study&amp;query.queryName=viralLoads&amp;query.Id~eq=${Id}&amp;query.sampleType~eq=Plasma&amp;query.target~eq=SIV</url>
18+
<urlTarget>_blank</urlTarget>
19+
</column>
20+
<column columnName="dateOfFirstPvl">
21+
<columnTitle>First PVL</columnTitle>
22+
</column>
23+
<column columnName="dateOfLastPvl">
24+
<columnTitle>Last PVL</columnTitle>
25+
</column>
26+
<column columnName="firstPvlDPI">
27+
<columnTitle>DPI of First PVL</columnTitle>
28+
</column>
29+
<column columnName="lastPvlDPI">
30+
<columnTitle>DPI of Last PVL</columnTitle>
31+
</column>
32+
<column columnName="firstPvlWPI">
33+
<columnTitle>WPI of First PVL</columnTitle>
34+
</column>
35+
<column columnName="lastPvlWPI">
36+
<columnTitle>WPI of Last PVL</columnTitle>
37+
</column>
38+
<column columnName="artRelease">
39+
<columnTitle>ART Release</columnTitle>
40+
<formatString>Date</formatString>
41+
</column>
42+
43+
<column columnName="numPVLPostArtRelease">
44+
<columnTitle># PVLs Post-ART Release</columnTitle>
45+
<url>/query/executeQuery.view?schemaName=study&amp;query.queryName=viralLoads&amp;query.Id~eq=${Id}&amp;query.sampleType~eq=Plasma&amp;query.target~eq=SIV&amp;query.date~dategte=${artRelease}</url>
46+
<urlTarget>_blank</urlTarget>
47+
</column>
48+
<column columnName="firstPvlPostArtReleaseWeeks">
49+
<columnTitle>First PVL Post-ART Release (Weeks)</columnTitle>
50+
</column>
51+
<column columnName="lastPvlPostArtReleaseWeeks">
52+
<columnTitle>Last PVL Post-ART Release (Weeks)</columnTitle>
53+
</column>
54+
<column columnName="firstPvlPostArtReleaseMonths">
55+
<columnTitle>First PVL Post-ART Release (Months)</columnTitle>
56+
</column>
57+
<column columnName="lastPvlPostArtReleaseMonths">
58+
<columnTitle>Last PVL Post-ART Release (Months)</columnTitle>
59+
</column>
60+
</columns>
61+
<titleColumn>numPVL</titleColumn>
62+
</table>
63+
</tables>
64+
</metadata>
65+
</query>
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
SELECT
2+
t.Id,
3+
count(*) AS numPVL,
4+
min(sivChallenge) as sivChallenge,
5+
min(t.date) as dateOfFirstPvl,
6+
max(t.date) as dateOfLastPvl,
7+
8+
min(CONVERT(CASE WHEN t.sivChallenge IS NULL THEN NULL WHEN t.date <= t.sivChallenge THEN NULL ELSE TIMESTAMPDIFF('SQL_TSI_DAY', t.sivChallenge, t.date) END, INTEGER)) as firstPvlDPI,
9+
max(CONVERT(CASE WHEN t.sivChallenge IS NULL THEN NULL WHEN t.date <= t.sivChallenge THEN NULL ELSE TIMESTAMPDIFF('SQL_TSI_DAY', t.sivChallenge, t.date) END, INTEGER)) as lastPvlDPI,
10+
11+
min(CONVERT(CASE WHEN t.sivChallenge IS NULL THEN NULL WHEN t.date <= t.sivChallenge THEN NULL ELSE TIMESTAMPDIFF('SQL_TSI_WEEK', t.sivChallenge, t.date) END, INTEGER)) as firstPvlWPI,
12+
max(CONVERT(CASE WHEN t.sivChallenge IS NULL THEN NULL WHEN t.date <= t.sivChallenge THEN NULL ELSE TIMESTAMPDIFF('SQL_TSI_WEEK', t.sivChallenge, t.date) END, INTEGER)) as lastPvlWPI,
13+
14+
min(artRelease) as artRelease,
15+
sum(CASE WHEN (t.artRelease IS NOT NULL AND t.date > t.artRelease) THEN 1 ELSE 0 END) as numPVLPostArtRelease,
16+
17+
min(CONVERT(CASE WHEN t.artRelease IS NULL THEN NULL WHEN t.date <= t.artRelease THEN NULL ELSE age_in_months(t.artRelease, t.date) END, FLOAT)) as firstPvlPostArtReleaseMonths,
18+
max(CONVERT(CASE WHEN t.artRelease IS NULL THEN NULL WHEN t.date <= t.artRelease THEN NULL ELSE age_in_months(t.artRelease, t.date) END, FLOAT)) as lastPvlPostArtReleaseMonths,
19+
20+
min(CONVERT(CASE WHEN t.artRelease IS NULL THEN NULL WHEN t.date <= t.artRelease THEN NULL ELSE TIMESTAMPDIFF('SQL_TSI_WEEK', t.artRelease, t.date) END, INTEGER)) as firstPvlPostArtReleaseWeeks,
21+
max(CONVERT(CASE WHEN t.artRelease IS NULL THEN NULL WHEN t.date <= t.artRelease THEN NULL ELSE TIMESTAMPDIFF('SQL_TSI_WEEK', t.artRelease, t.date) END, INTEGER)) as lastPvlPostArtReleaseWeeks
22+
23+
FROM (SELECT
24+
vl.Id,
25+
vl.date,
26+
(SELECT min(tr.date) as sivChallenge FROM study.treatments tr WHERE tr.category = 'SIV Infection' AND tr.Id = vl.Id) as sivChallenge,
27+
(SELECT max(tr.enddate) as artRelease FROM study.treatments tr WHERE tr.category = 'ART' AND tr.Id = vl.Id) as artRelease
28+
29+
FROM study.viralLoads vl
30+
WHERE vl.target = 'SIV' AND vl.sampleType = 'Plasma'
31+
) t
32+
GROUP BY t.Id

SivStudies/resources/queries/study/demographicsProjects.query.xml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,6 @@
1717
<column columnName="subgroups">
1818
<columnTitle>Subgroups/Treatments</columnTitle>
1919
</column>
20-
<column columnName="rhCmvVaccines">
21-
<columnTitle>RhCMV Vaccines?</columnTitle>
22-
</column>
23-
<column columnName="sivArt">
24-
<columnTitle>SIV/ART Projects?</columnTitle>
25-
</column>
2620
</columns>
2721
<titleColumn>categories</titleColumn>
2822
</table>

SivStudies/resources/queries/study/demographicsProjects.sql

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@ SELECT
22
s.Id,
33
count(s.Id) as totalProjects,
44
group_concat(DISTINCT s.study, char(10)) as allStudies,
5+
group_concat(DISTINCT s.cohortId.studyId.description, char(10)) as studyDescription,
56
group_concat(DISTINCT s.category, char(10)) as categories,
6-
group_concat(DISTINCT s.subgroup, char(10)) as subgroups,
7+
group_concat(DISTINCT s.subgroup, char(10)) as subgroups
78

8-
GROUP_CONCAT(distinct CASE WHEN s.category = 'RhCMV-Vaccines' THEN 'Yes' ELSE null END, char(10)) as rhCmvVaccines,
9-
GROUP_CONCAT(distinct CASE WHEN s.category = 'SIV/ART' THEN 'Yes' ELSE null END, char(10)) as sivArt
109
FROM study.assignment s
1110
GROUP BY s.Id

SivStudies/src/org/labkey/sivstudies/etl/PerformManualIdrStepsTask.java

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ public RecordedActionSet run(@NotNull PipelineJob pipelineJob) throws PipelineJo
3939
pruneSivChallenges(pipelineJob);
4040
updateVaccineInformation(pipelineJob);
4141
updateChallengeAnchorDates(pipelineJob);
42+
updateArtInitiationAnchorDates(pipelineJob);
4243

4344
return new RecordedActionSet();
4445
}
@@ -68,7 +69,7 @@ private void pruneSivChallenges(PipelineJob pipelineJob) throws PipelineJobExcep
6869

6970
if (existingRecords.get(id).contains(rs.getDate(FieldKey.fromString("date"))))
7071
{
71-
toDelete.add(Map.of("lsid", rs.getString(FieldKey.fromString("lsid"))));
72+
toDelete.add(new CaseInsensitiveHashMap<>(Map.of("lsid", rs.getString(FieldKey.fromString("lsid")))));
7273
}
7374
});
7475

@@ -138,7 +139,7 @@ else if (treatment.contains("miR-142"))
138139
{
139140
BatchValidationException bve = new BatchValidationException();
140141

141-
List<Map<String, Object>> oldKeys = toUpdate.stream().map(x -> Map.of("lsid", x.get("lsid"))).toList();
142+
List<Map<String, Object>> oldKeys = toUpdate.stream().map(x -> (Map<String, Object>)new CaseInsensitiveHashMap<>(Map.of("lsid", x.get("lsid")))).toList();
142143
ti.getUpdateService().updateRows(_containerUser.getUser(), _containerUser.getContainer(), toUpdate, oldKeys, bve, null, null);
143144

144145
if (bve.hasErrors())
@@ -154,12 +155,23 @@ else if (treatment.contains("miR-142"))
154155
}
155156

156157
private void updateChallengeAnchorDates(PipelineJob pipelineJob) throws PipelineJobException
158+
{
159+
updateAnchorDates(pipelineJob, "SIV Infection", "SIV Infection", "date");
160+
}
161+
162+
private void updateArtInitiationAnchorDates(PipelineJob pipelineJob) throws PipelineJobException
163+
{
164+
updateAnchorDates(pipelineJob, "ART Initiation", "ART", "date");
165+
updateAnchorDates(pipelineJob, "ART End", "ART", "enddate");
166+
}
167+
168+
private void updateAnchorDates(PipelineJob pipelineJob, String eventType, String treatmentCategory, String sourceDateField) throws PipelineJobException
157169
{
158170
TableInfo treatments = QueryService.get().getUserSchema(_containerUser.getUser(), _containerUser.getContainer(), "study").getTable("treatments");
159171
TableInfo ad = QueryService.get().getUserSchema(_containerUser.getUser(), _containerUser.getContainer(), "studies").getTable("subjectAnchorDates");
160172

161173
Map<String, Set<Date>> existingRecords = new HashMap<>();
162-
new TableSelector(ad, PageFlowUtil.set("subjectId", "date", "rowid"), new SimpleFilter(FieldKey.fromString("eventLabel"), "SIV Infection"), null).forEachResults(rs -> {
174+
new TableSelector(ad, PageFlowUtil.set("subjectId", "date", "rowid"), new SimpleFilter(FieldKey.fromString("eventLabel"), eventType), null).forEachResults(rs -> {
163175
String id = rs.getString(FieldKey.fromString("subjectId"));
164176
if (!existingRecords.containsKey(id))
165177
{
@@ -171,9 +183,13 @@ private void updateChallengeAnchorDates(PipelineJob pipelineJob) throws Pipeline
171183

172184
final Map<String, Set<Date>> sourceRecords = new HashMap<>();
173185
final List<Map<String, Object>> toInsert = new ArrayList<>();
174-
new TableSelector(treatments, PageFlowUtil.set("Id", "date", "objectId"), new SimpleFilter(FieldKey.fromString("category"), "SIV Infection"), null).forEachResults(rs -> {
186+
new TableSelector(treatments, PageFlowUtil.set("Id", "date", "objectId"), new SimpleFilter(FieldKey.fromString("category"), treatmentCategory), null).forEachResults(rs -> {
175187
String id = rs.getString(FieldKey.fromString("Id"));
176-
Date date = rs.getDate(FieldKey.fromString("date"));
188+
Date date = rs.getDate(FieldKey.fromString(sourceDateField));
189+
if (date == null)
190+
{
191+
return;
192+
}
177193

178194
if (!sourceRecords.containsKey(id))
179195
{
@@ -183,18 +199,18 @@ private void updateChallengeAnchorDates(PipelineJob pipelineJob) throws Pipeline
183199

184200
if (!existingRecords.containsKey(id) | !existingRecords.get(id).contains(date))
185201
{
186-
toInsert.add(Map.of(
202+
toInsert.add(new CaseInsensitiveHashMap<>(Map.of(
187203
"subjectId", id,
188204
"date", date,
189-
"category", "SIV Infection",
205+
"category", eventType,
190206
"sourceRecord", rs.getString(FieldKey.fromString("objectId"))
191-
));
207+
)));
192208
}
193209
});
194210

195211
if (!toInsert.isEmpty())
196212
{
197-
pipelineJob.getLogger().info("Inserting " + toInsert.size() + " SIV challenge anchor date records");
213+
pipelineJob.getLogger().info("Inserting " + toInsert.size() + " " + eventType + " anchor date records");
198214

199215
try
200216
{
@@ -213,7 +229,7 @@ private void updateChallengeAnchorDates(PipelineJob pipelineJob) throws Pipeline
213229
}
214230

215231
final List<Map<String, Object>> toDelete = new ArrayList<>();
216-
new TableSelector(ad, PageFlowUtil.set("subjectId", "date", "rowid"), new SimpleFilter(FieldKey.fromString("eventLabel"), "SIV Infection"), null).forEachResults(rs -> {
232+
new TableSelector(ad, PageFlowUtil.set("subjectId", "date", "rowid"), new SimpleFilter(FieldKey.fromString("eventLabel"), eventType), null).forEachResults(rs -> {
217233
String id = rs.getString(FieldKey.fromString("subjectId"));
218234
Date date = rs.getDate(FieldKey.fromString("date"));
219235
if (!sourceRecords.containsKey(id) | !sourceRecords.get(id).contains(date))
@@ -224,7 +240,7 @@ private void updateChallengeAnchorDates(PipelineJob pipelineJob) throws Pipeline
224240

225241
if (!toDelete.isEmpty())
226242
{
227-
pipelineJob.getLogger().info("Deleting " + toDelete.size() + " SIV challenge anchor date records");
243+
pipelineJob.getLogger().info("Deleting " + toDelete.size() + " " + eventType + " anchor date records");
228244

229245
try
230246
{

0 commit comments

Comments
 (0)