Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions categoryOptionCombo_checks/01-create_coc_sqls.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
jq -r '.categoryOptionCombos[].id
| "INSERT INTO tmp_uids_to_check(uid) VALUES (\"" + . + "\") ON CONFLICT DO NOTHING;"' cocs_to_remove.json \
| sed "s/\"/'/g" \
> 02_load_uids.sql
17 changes: 17 additions & 0 deletions categoryOptionCombo_checks/01-createtables.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
-- Tabla con UIDs a comprobar
DROP TABLE IF EXISTS tmp_uids_to_check;
CREATE TABLE tmp_uids_to_check (
uid TEXT PRIMARY KEY
);

-- Tabla con hits en dataentryform.htmlcode
DROP TABLE IF EXISTS tmp_dataentryform_hits;
CREATE TABLE tmp_dataentryform_hits (
target_uid TEXT NOT NULL,
dataentryformid INTEGER,
form_uid TEXT,
form_name TEXT,
form_lastupdated TIMESTAMP,
PRIMARY KEY (target_uid, dataentryformid)
);

240 changes: 240 additions & 0 deletions categoryOptionCombo_checks/03-check_cocs_to_csv.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
/* =======================================================================
EXPORT ALL QUERIES TO CSV (PostgreSQL)
- Uses SERVER-SIDE COPY (writes files on the DB server).
- Change /tmp/coc_checks to your preferred server path.
======================================================================= */

-- 0) (Optional) create folder on the DB server beforehand:
-- mkdir -p /tmp/coc_checks


/* 1) DataValue hits per COC (dv_count > 0) -> 01_datavalue_summary.csv */
COPY (
WITH target AS (
SELECT coc.categoryoptioncomboid, coc.uid, coc.name
FROM categoryoptioncombo coc
JOIN tmp_uids_to_check t ON t.uid = coc.uid
)
SELECT
target.uid AS coc_uid,
target.name AS coc_name,
COUNT(dv.*) AS dv_count,
MAX(dv.lastupdated) AS dv_max_lastupdated,
MAX(dv.created) AS dv_max_created
FROM target
LEFT JOIN datavalue dv
ON dv.categoryoptioncomboid = target.categoryoptioncomboid
GROUP BY target.uid, target.name
HAVING COUNT(dv.*) > 0
ORDER BY dv_count DESC, coc_name, coc_uid
) TO '/tmp/01_datavalue_summary.csv'
CSV HEADER;


/* 2) DataValueAudit hits per COC (dva_count > 0) -> 02_datavalueaudit_summary.csv */
COPY (
WITH target AS (
SELECT coc.categoryoptioncomboid, coc.uid, coc.name
FROM categoryoptioncombo coc
JOIN tmp_uids_to_check t ON t.uid = coc.uid
)
SELECT
target.uid AS coc_uid,
target.name AS coc_name,
COUNT(dva.*) AS dva_count,
MAX(dva.created) AS dva_max_created
FROM target
LEFT JOIN datavalueaudit dva
ON dva.categoryoptioncomboid = target.categoryoptioncomboid
GROUP BY target.uid, target.name
HAVING COUNT(dva.*) > 0
ORDER BY dva_count DESC, coc_name, coc_uid
) TO '/tmp/02_datavalueaudit_summary.csv'
CSV HEADER;


/* 3) DataValue details -> 03_datavalue_details.csv */
COPY (
WITH target AS (
SELECT coc.categoryoptioncomboid, coc.uid, coc.name
FROM categoryoptioncombo coc
JOIN tmp_uids_to_check t ON t.uid = coc.uid
)
SELECT
target.uid AS coc_uid,
target.name AS coc_name,
de.uid AS dataelement_uid,
de.name AS dataelement_name,
ou.uid AS orgunit_uid,
ou.name AS orgunit_name,
ps.iso AS period_iso,
COUNT(*) AS dv_count
FROM datavalue dv
JOIN target
ON target.categoryoptioncomboid = dv.categoryoptioncomboid
JOIN dataelement de
ON de.dataelementid = dv.dataelementid
JOIN organisationunit ou
ON ou.organisationunitid = dv.sourceid
JOIN analytics_rs_periodstructure ps
ON ps.periodid = dv.periodid
GROUP BY target.uid, target.name, de.uid, de.name, ou.uid, ou.name, ps.iso
ORDER BY target.name, dv_count DESC, de.name, ou.name, ps.iso
) TO '/tmp/03_datavalue_details.csv'
CSV HEADER;


/* 4) DataElementOperand hits per COC (deo_count > 0) -> 04_deo_summary.csv */
COPY (
WITH target AS (
SELECT coc.categoryoptioncomboid, coc.uid, coc.name
FROM categoryoptioncombo coc
JOIN tmp_uids_to_check t ON t.uid = coc.uid
)
SELECT
target.uid AS coc_uid,
target.name AS coc_name,
COUNT(deo.*) AS deo_count
FROM target
LEFT JOIN dataelementoperand deo
ON deo.categoryoptioncomboid = target.categoryoptioncomboid
GROUP BY target.uid, target.name
HAVING COUNT(deo.*) > 0
ORDER BY deo_count DESC, coc_name, coc_uid
) TO '/tmp/04_deo_summary.csv'
CSV HEADER;


/* 5) DataElementOperand details -> 05_deo_details.csv */
COPY (
WITH target AS (
SELECT coc.categoryoptioncomboid, coc.uid, coc.name
FROM categoryoptioncombo coc
JOIN tmp_uids_to_check t ON t.uid = coc.uid
)
SELECT
target.uid AS coc_uid,
target.name AS coc_name,
deo.dataelementoperandid,
de.uid AS dataelement_uid,
de.name AS dataelement_name
FROM dataelementoperand deo
JOIN target
ON target.categoryoptioncomboid = deo.categoryoptioncomboid
LEFT JOIN dataelement de
ON de.dataelementid = deo.dataelementid
ORDER BY target.name, deo.dataelementoperandid
) TO '/tmp/05_deo_details.csv'
CSV HEADER;


/* 6) Event hits by AOC (event_aoc_count > 0) -> 06_event_aoc_summary.csv */
COPY (
WITH target AS (
SELECT coc.categoryoptioncomboid, coc.uid, coc.name
FROM categoryoptioncombo coc
JOIN tmp_uids_to_check t ON t.uid = coc.uid
)
SELECT
target.uid AS coc_uid,
target.name AS coc_name,
COUNT(e.*) AS event_aoc_count,
MAX(e.lastupdated) AS event_max_lastupdated,
MAX(e.created) AS event_max_created
FROM target
LEFT JOIN event e
ON e.attributeoptioncomboid = target.categoryoptioncomboid
GROUP BY target.uid, target.name
HAVING COUNT(e.*) > 0
ORDER BY event_aoc_count DESC, coc_name, coc_uid
) TO '/tmp/06_event_aoc_summary.csv'
CSV HEADER;


/* 7) Event details by program/stage -> 07_event_details.csv */
COPY (
WITH target AS (
SELECT coc.categoryoptioncomboid, coc.uid, coc.name
FROM categoryoptioncombo coc
JOIN tmp_uids_to_check t ON t.uid = coc.uid
)
SELECT
target.uid AS coc_uid,
target.name AS coc_name,
p.uid AS program_uid,
p.name AS program_name,
ps.uid AS programstage_uid,
ps.name AS programstage_name,
COUNT(*) AS event_count
FROM event e
JOIN target
ON target.categoryoptioncomboid = e.attributeoptioncomboid
JOIN programstage ps
ON ps.programstageid = e.programstageid
JOIN program p
ON p.programid = ps.programid
GROUP BY target.uid, target.name, p.uid, p.name, ps.uid, ps.name
ORDER BY target.name, event_count DESC, program_name, programstage_name
) TO '/tmp/07_event_details.csv'
CSV HEADER;


/* 8) DataEntryForm hits -> 08_dataentryform_hits.csv
IMPORTANT: This exports what's already in tmp_dataentryform_hits.
If you need to (re)populate it, run your INSERT before this COPY.
*/
COPY (
SELECT
target_uid,
form_uid,
form_name,
form_lastupdated
FROM tmp_dataentryform_hits
ORDER BY target_uid, form_name
) TO '/tmp/08_dataentryform_hits.csv'
CSV HEADER;


/* 9) Final summary -> 09_coc_final_summary.csv */
COPY (
WITH target AS (
SELECT coc.categoryoptioncomboid, coc.uid, coc.name
FROM categoryoptioncombo coc
JOIN tmp_uids_to_check t ON t.uid = coc.uid
),
summary AS (
SELECT
target.uid AS coc_uid,
target.name AS coc_name,

(SELECT COUNT(*) FROM datavalue dv WHERE dv.categoryoptioncomboid = target.categoryoptioncomboid) AS dv_count,
(SELECT COUNT(*) FROM dataelementoperand deo WHERE deo.categoryoptioncomboid = target.categoryoptioncomboid) AS deo_count,
(SELECT COUNT(*) FROM event e WHERE e.attributeoptioncomboid = target.categoryoptioncomboid) AS event_aoc_count,
(SELECT COUNT(*) FROM datavalueaudit dva WHERE dva.categoryoptioncomboid = target.categoryoptioncomboid) AS dva_count,
(SELECT COUNT(*) FROM tmp_dataentryform_hits h WHERE h.target_uid = target.uid) AS def_hits,

CASE
WHEN (SELECT COUNT(*) FROM datavalue dv WHERE dv.categoryoptioncomboid = target.categoryoptioncomboid) > 0
THEN 'IN_USE_DATAVALUE'
WHEN (SELECT COUNT(*) FROM dataelementoperand deo WHERE deo.categoryoptioncomboid = target.categoryoptioncomboid) > 0
THEN 'IN_USE_DATAELEMENTOPERAND'
WHEN (SELECT COUNT(*) FROM event e WHERE e.attributeoptioncomboid = target.categoryoptioncomboid) > 0
THEN 'IN_USE_EVENT_AOC'
WHEN (SELECT COUNT(*) FROM tmp_dataentryform_hits h WHERE h.target_uid = target.uid) > 0
THEN 'REFERENCED_IN_DATAENTRYFORM'
ELSE 'SAFE_CANDIDATE'
END AS status
FROM target
)
SELECT *
FROM summary
ORDER BY
(dv_count > 0) DESC,
dv_count DESC,
deo_count DESC,
event_aoc_count DESC,
def_hits DESC,
coc_name, coc_uid
) TO '/tmp/09_coc_final_summary.csv'
CSV HEADER;

66 changes: 66 additions & 0 deletions categoryOptionCombo_checks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Category Option Combo Checks

Scripts to assess whether a set of categoryOptionCombos (COCs) can be safely removed. They load a list of COC UIDs, check for usage across data values, audits, data element operands, events (as AOC), and data entry forms, and export CSV summaries.

## Prerequisites
- Access to the target DHIS2 PostgreSQL database with permissions to create temp tables and write server-side files via `COPY`.
- `jq` available where you run the helper command.
- A JSON file named `cocs_to_remove.json` with structure:
```json
{
"categoryOptionCombos": [
{ "id": "UID1" },
{ "id": "UID2" }
]
}
```

## Workflow
1) Create staging tables
In `psql`, run:
```
\i categoryOptionCombo_checks/01-createtables.sql
```

2) Generate insert script for the target UIDs
From the repo root, build `02_load_uids.sql` from your JSON:
```
jq -r '.categoryOptionCombos[].id | "INSERT INTO tmp_uids_to_check(uid) VALUES (\"" + . + "\") ON CONFLICT DO NOTHING;"' cocs_to_remove.json \
| sed "s/\"/'/g" \
> 02_load_uids.sql
```

3) Load the target UIDs
In `psql`, run:
```
\i 02_load_uids.sql
```

4) (Optional) Populate data entry form hits
If you need to detect references inside `dataentryform.htmlcode`, insert rows into `tmp_dataentryform_hits` before exporting. Example pattern:
```
INSERT INTO tmp_dataentryform_hits (target_uid, dataentryformid, form_uid, form_name, form_lastupdated)
SELECT t.uid,
def.dataentryformid,
def.uid,
def.name,
def.lastupdated
FROM tmp_uids_to_check t
JOIN dataentryform def ON def.htmlcode ILIKE '%' || t.uid || '%';
```

5) Export checks to CSV
In `psql`, run:
```
\i categoryOptionCombo_checks/03-check_cocs_to_csv.sql
```
- Writes CSVs to `/tmp` on the DB server (`01_datavalue_summary.csv` … `09_coc_final_summary.csv`). Adjust paths in the script if needed.
- Uses `analytics_rs_periodstructure` to render period ISO codes in data value details; adjust if your schema differs.

6) Review results
- `01_*`, `02_*`, `03_*`, `04_*`, `05_*`, `06_*`, `07_*` give per-COC usage across data values, audits, DE operands, and events.
- `08_*` lists any data entry form hits you inserted.
- `09_coc_final_summary.csv` provides a status flag:
- `IN_USE_*` variants indicate blocking references.
- `REFERENCED_IN_DATAENTRYFORM` indicates only form HTML references.
- `SAFE_CANDIDATE` means no detected references.