Skip to content

Commit 4df6420

Browse files
committed
Enhance calculated fields in SIV Studies
1 parent 989b82a commit 4df6420

File tree

8 files changed

+365
-67
lines changed

8 files changed

+365
-67
lines changed

SivStudies/resources/etls/siv-studies.xml

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,28 @@
33
<name>SIV_PRIMe</name>
44
<description>SIV Studies / PRIMe Data</description>
55
<transforms>
6+
<transform id="demographics" type="TaskRefTransformStep">
7+
<taskref ref="org.labkey.sivstudies.etl.SubjectScopedSelect">
8+
<settings>
9+
<setting name="mode" value="UPDATE_ONLY"/>
10+
11+
<setting name="subjectSourceSchema" value="study"/>
12+
<setting name="subjectSourceQuery" value="demographics"/>
13+
<setting name="subjectSourceColumn" value="Id"/>
14+
15+
<setting name="dataRemoteSource" value="PRIMe"/>
16+
<setting name="dataSourceSchema" value="study"/>
17+
<setting name="dataSourceQuery" value="demographics"/>
18+
<setting name="dataSourceSubjectColumn" value="Id"/>
19+
<setting name="dataSourceColumns" value="Id,gender,geographic_origin,species,Id/parents/dam,Id/parents/sire,birth,death,calculated_status,objectid"/>
20+
<setting name="dataSourceColumnMapping" value="gender=sex,Id/Parents/dam=mother,Id/Parents/sire=father"/>
21+
22+
<setting name="targetSchema" value="study"/>
23+
<setting name="targetQuery" value="demographics"/>
24+
<setting name="targetSubjectColumn" value="Id"/>
25+
</settings>
26+
</taskref>
27+
</transform>
628
<transform id="weight" type="TaskRefTransformStep">
729
<taskref ref="org.labkey.sivstudies.etl.SubjectScopedSelect">
830
<settings>
@@ -116,6 +138,6 @@
116138
</transform>
117139
</transforms>
118140
<schedule>
119-
<cron expression="0 30 2 * * ?"/>
141+
<cron expression="0 30 20 * * ?"/>
120142
</schedule>
121143
</etl>

SivStudies/resources/queries/study/demographics.query.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
<columns>
77
<column columnName="Id">
88
<url />
9+
<isKeyField>true</isKeyField>
910
</column>
1011
<column columnName="date">
1112
<isHidden>true</isHidden>
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<customView xmlns="http://labkey.org/data/xml/queryCustomView" hidden="false" canOverride="false">
2+
<columns>
3+
<column name="Id"/>
4+
<column name="sex"/>
5+
<column name="species"/>
6+
<column name="geographic_origin"/>
7+
<column name="calculated_status"/>
8+
<column name="birth"/>
9+
<!-- <column name="Id/age/yearAndDays">-->
10+
<!-- <properties>-->
11+
<!-- <property name="columnTitle" value="Age"/>-->
12+
<!-- </properties>-->
13+
<!-- </column>-->
14+
<column name="death"/>
15+
</columns>
16+
<sorts>
17+
<sort column="Id" descending="false"/>
18+
</sorts>
19+
</customView>
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<customView xmlns="http://labkey.org/data/xml/queryCustomView" canOverride="false">
2+
<columns>
3+
<column name="Id"/>
4+
<column name="sex"/>
5+
<column name="species"/>
6+
<column name="geographic_origin"/>
7+
<column name="calculated_status"/>
8+
<column name="birth"/>
9+
<column name="death"/>
10+
<column name="mhcGenotypes/A01"/>
11+
<column name="mhcGenotypes/A02"/>
12+
<column name="mhcGenotypes/B08"/>
13+
<column name="mhcGenotypes/B17"/>
14+
</columns>
15+
<sorts>
16+
<sort column="Id" descending="false"/>
17+
</sorts>
18+
</customView>
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<query xmlns="http://labkey.org/data/xml/query">
2+
<metadata>
3+
<tables xmlns="http://labkey.org/data/xml">
4+
<table tableName="demographicsMHC" tableDbType="NOT_IN_DB">
5+
<tableTitle>MHC Genotype</tableTitle>
6+
<columns>
7+
<column columnName="Id">
8+
<isKeyField>true</isKeyField>
9+
<isHidden>true</isHidden>
10+
</column>
11+
<column columnName="allAlleles">
12+
<columnTitle>All MHC Alleles</columnTitle>
13+
</column>
14+
<column columnName="A01">
15+
<columnTitle>Mamu-A01</columnTitle>
16+
</column>
17+
<column columnName="A02">
18+
<columnTitle>Mamu-A02</columnTitle>
19+
</column>
20+
<column columnName="B08">
21+
<columnTitle>Mamu-B08</columnTitle>
22+
</column>
23+
<column columnName="B17">
24+
<columnTitle>Mamu-B17</columnTitle>
25+
</column>
26+
</columns>
27+
<titleColumn>allAlleles</titleColumn>
28+
</table>
29+
</tables>
30+
</metadata>
31+
</query>
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
SELECT
2+
s.Id,
3+
count(s.Id) as totalTests,
4+
group_concat(DISTINCT s.assayType) as assayTypes,
5+
6+
--special case A01/B17/B08
7+
max(CASE
8+
WHEN (s.marker = 'Mamu-A1*001g' AND (s.result = 'POS' OR s.result = 'NEG')) THEN s.result
9+
ELSE null
10+
END) as A01,
11+
12+
max(CASE
13+
WHEN (s.marker = 'Mamu-A1*002g' AND (s.result = 'POS' OR s.result = 'NEG')) THEN s.result
14+
ELSE null
15+
END) as A02,
16+
17+
max(CASE
18+
WHEN (s.marker = 'Mamu-B*008g' AND (s.result = 'POS' OR s.result = 'NEG')) THEN s.result
19+
ELSE ''
20+
END) as B08,
21+
22+
max(CASE
23+
WHEN (s.marker = 'Mamu-B*017g' AND (s.result = 'POS' OR s.result = 'NEG')) THEN s.result
24+
ELSE ''
25+
END) as B17,
26+
GROUP_CONCAT(distinct CASE WHEN s.result = 'POS' THEN s.marker ELSE null END, char(10)) as allAlleles
27+
FROM study.genetics s
28+
WHERE s.category = 'MHC Typing'
29+
GROUP BY s.Id

SivStudies/src/org/labkey/sivstudies/etl/SubjectScopedSelect.java

Lines changed: 84 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@ public class SubjectScopedSelect implements TaskRefTask
5454
protected final Map<String, String> _settings = new CaseInsensitiveHashMap<>();
5555
protected ContainerUser _containerUser;
5656

57+
private enum MODE
58+
{
59+
UPDATE_ONLY,
60+
TRUNCATE;
61+
}
62+
5763
private enum Settings
5864
{
5965
subjectRemoteSource(false),
@@ -90,7 +96,18 @@ public boolean isRequired()
9096
}
9197
}
9298

93-
final int BATCH_SIZE = 500;
99+
final int BATCH_SIZE = 100;
100+
101+
private MODE getMode()
102+
{
103+
String rawVal = StringUtils.trimToNull(_settings.get("mode"));
104+
if (rawVal == null)
105+
{
106+
return MODE.TRUNCATE;
107+
}
108+
109+
return MODE.valueOf(rawVal);
110+
}
94111

95112
@Override
96113
public RecordedActionSet run(@NotNull PipelineJob job) throws PipelineJobException
@@ -113,51 +130,90 @@ private void processBatch(List<String> subjects, Logger log)
113130

114131
try
115132
{
116-
// Find / Delete existing values:
117-
Set<ColumnInfo> keyFields = destinationTable.getColumns().stream().filter(ColumnInfo::isKeyField).collect(Collectors.toSet());
118-
final SimpleFilter subjectFilter = new SimpleFilter(FieldKey.fromString(_settings.get(Settings.targetSubjectColumn.name())), subjects, CompareType.IN);
119-
if (_settings.get(Settings.targetAdditionalFilters.name()) != null)
133+
if (getMode() == MODE.TRUNCATE)
120134
{
121-
List<CompareType.CompareClause> additionalFilters = parseAdditionalFilters(_settings.get(Settings.targetAdditionalFilters.name()));
122-
additionalFilters.forEach(subjectFilter::addCondition);
123-
}
135+
// Find / Delete existing values:
136+
Set<ColumnInfo> keyFields = destinationTable.getColumns().stream().filter(ColumnInfo::isKeyField).collect(Collectors.toSet());
137+
final SimpleFilter subjectFilter = new SimpleFilter(FieldKey.fromString(_settings.get(Settings.targetSubjectColumn.name())), subjects, CompareType.IN);
138+
if (_settings.get(Settings.targetAdditionalFilters.name()) != null)
139+
{
140+
List<CompareType.CompareClause> additionalFilters = parseAdditionalFilters(_settings.get(Settings.targetAdditionalFilters.name()));
141+
additionalFilters.forEach(subjectFilter::addCondition);
142+
}
124143

125-
if (destinationTable.getColumn(FieldKey.fromString(_settings.get(Settings.targetSubjectColumn.name()))) == null)
126-
{
127-
throw new IllegalStateException("Unknown column on table " + destinationTable.getName() + ": " + _settings.get(Settings.targetSubjectColumn.name()));
128-
}
144+
if (destinationTable.getColumn(FieldKey.fromString(_settings.get(Settings.targetSubjectColumn.name()))) == null)
145+
{
146+
throw new IllegalStateException("Unknown column on table " + destinationTable.getName() + ": " + _settings.get(Settings.targetSubjectColumn.name()));
147+
}
129148

130-
Collection<Map<String, Object>> existingRows = new TableSelector(destinationTable, keyFields, subjectFilter, null).getMapCollection();
131-
if (!existingRows.isEmpty())
132-
{
133-
log.info("deleting " + existingRows.size() + " rows");
134-
qus.deleteRows(_containerUser.getUser(), _containerUser.getContainer(), new ArrayList<>(existingRows), null, null);
149+
Collection<Map<String, Object>> existingRows = new TableSelector(destinationTable, keyFields, subjectFilter, null).getMapCollection();
150+
if (!existingRows.isEmpty())
151+
{
152+
log.info("deleting " + existingRows.size() + " rows");
153+
qus.deleteRows(_containerUser.getUser(), _containerUser.getContainer(), new ArrayList<>(existingRows), null, null);
154+
}
155+
else
156+
{
157+
log.info("No rows to delete for this subject batch");
158+
}
135159
}
136160
else
137161
{
138-
log.info("No rows to delete for this subject batch");
162+
log.info("Using " + getMode().name() + " mode, source records will not be deleted");
139163
}
140164

141165
// Query data and import
142-
List<Map<String, Object>> toImport = getRowsToImport(subjects, log);
143-
if (!toImport.isEmpty())
166+
List<Map<String, Object>> toImportOrUpdate = getRowsToImport(subjects, log);
167+
if (!toImportOrUpdate.isEmpty())
144168
{
145-
log.info("inserting " + toImport.size() + " rows");
146-
BatchValidationException bve = new BatchValidationException();
147-
qus.insertRows(_containerUser.getUser(), _containerUser.getContainer(), toImport, bve, null, null);
148-
if (bve.hasErrors())
169+
if (getMode() == MODE.TRUNCATE)
170+
{
171+
log.info("inserting " + toImportOrUpdate.size() + " rows");
172+
BatchValidationException bve = new BatchValidationException();
173+
qus.insertRows(_containerUser.getUser(), _containerUser.getContainer(), toImportOrUpdate, bve, null, null);
174+
if (bve.hasErrors())
175+
{
176+
throw bve;
177+
}
178+
}
179+
else if (getMode() == MODE.UPDATE_ONLY)
180+
{
181+
log.info("updating " + toImportOrUpdate.size() + " rows");
182+
BatchValidationException bve = new BatchValidationException();
183+
184+
Collection<String> keyFields = destinationTable.getPkColumnNames();
185+
List<Map<String, Object>> keys = toImportOrUpdate.stream().map(x -> {
186+
Map<String, Object> map = new HashMap<>();
187+
for (String keyField : keyFields)
188+
{
189+
if (x.get(keyField) != null)
190+
{
191+
map.put(keyField, x.get(keyField));
192+
}
193+
}
194+
195+
return map;
196+
}).toList();
197+
198+
qus.updateRows(_containerUser.getUser(), _containerUser.getContainer(), toImportOrUpdate, keys, bve, null, null);
199+
if (bve.hasErrors())
200+
{
201+
throw bve;
202+
}
203+
}
204+
else
149205
{
150-
throw bve;
206+
throw new IllegalStateException("Unknown mode: " + getMode());
151207
}
152208
}
153209
else
154210
{
155-
log.info("No rows to import for this subject batch");
211+
log.info("No rows to import/update for this subject batch");
156212
}
157213
}
158214
catch (SQLException | InvalidKeyException | BatchValidationException | QueryUpdateServiceException | DuplicateKeyException e)
159215
{
160-
throw new IllegalStateException("Error Importing Rows", e);
216+
throw new IllegalStateException("Error Importing/Updating Rows", e);
161217
}
162218
}
163219

@@ -350,7 +406,7 @@ else if (f.getParamVals().length == 1)
350406
throw new IllegalStateException("Table is missing column: " + _settings.get(Settings.dataSourceSubjectColumn.name()));
351407
}
352408

353-
final SimpleFilter filter = new SimpleFilter(_settings.get(Settings.dataSourceSubjectColumn.name()), subjects, CompareType.IN);
409+
final SimpleFilter filter = new SimpleFilter(FieldKey.fromString(_settings.get(Settings.dataSourceSubjectColumn.name())), subjects, CompareType.IN);
354410
if (_settings.get(Settings.dataSourceAdditionalFilters.name()) != null)
355411
{
356412
List<CompareType.CompareClause> additionalFilters = parseAdditionalFilters(_settings.get(Settings.dataSourceAdditionalFilters.name()));

0 commit comments

Comments
 (0)