diff --git a/categoryOptionCombo_checks/01-create_coc_sqls.txt b/categoryOptionCombo_checks/01-create_coc_sqls.txt new file mode 100644 index 0000000..8e2f29f --- /dev/null +++ b/categoryOptionCombo_checks/01-create_coc_sqls.txt @@ -0,0 +1,4 @@ +jq -r '.categoryOptionCombos[].id + | "INSERT INTO tmp_uids_to_check(uid) VALUES (\"" + . + "\") ON CONFLICT DO NOTHING;"' cocs_to_remove.json \ +| sed "s/\"/'/g" \ +> 02_load_uids.sql diff --git a/categoryOptionCombo_checks/01-createtables.sql b/categoryOptionCombo_checks/01-createtables.sql new file mode 100644 index 0000000..79dede7 --- /dev/null +++ b/categoryOptionCombo_checks/01-createtables.sql @@ -0,0 +1,17 @@ +-- Tabla con UIDs a comprobar +DROP TABLE IF EXISTS tmp_uids_to_check; +CREATE TABLE tmp_uids_to_check ( + uid TEXT PRIMARY KEY +); + +-- Tabla con hits en dataentryform.htmlcode +DROP TABLE IF EXISTS tmp_dataentryform_hits; +CREATE TABLE tmp_dataentryform_hits ( + target_uid TEXT NOT NULL, + dataentryformid INTEGER, + form_uid TEXT, + form_name TEXT, + form_lastupdated TIMESTAMP, + PRIMARY KEY (target_uid, dataentryformid) +); + diff --git a/categoryOptionCombo_checks/03-check_cocs_to_csv.sql b/categoryOptionCombo_checks/03-check_cocs_to_csv.sql new file mode 100644 index 0000000..ef32e36 --- /dev/null +++ b/categoryOptionCombo_checks/03-check_cocs_to_csv.sql @@ -0,0 +1,240 @@ +/* ======================================================================= + EXPORT ALL QUERIES TO CSV (PostgreSQL) + - Uses SERVER-SIDE COPY (writes files on the DB server). + - Change /tmp/coc_checks to your preferred server path. + ======================================================================= */ + +-- 0) (Optional) create folder on the DB server beforehand: +-- mkdir -p /tmp/coc_checks + + +/* 1) DataValue hits per COC (dv_count > 0) -> 01_datavalue_summary.csv */ +COPY ( + WITH target AS ( + SELECT coc.categoryoptioncomboid, coc.uid, coc.name + FROM categoryoptioncombo coc + JOIN tmp_uids_to_check t ON t.uid = coc.uid + ) + SELECT + target.uid AS coc_uid, + target.name AS coc_name, + COUNT(dv.*) AS dv_count, + MAX(dv.lastupdated) AS dv_max_lastupdated, + MAX(dv.created) AS dv_max_created + FROM target + LEFT JOIN datavalue dv + ON dv.categoryoptioncomboid = target.categoryoptioncomboid + GROUP BY target.uid, target.name + HAVING COUNT(dv.*) > 0 + ORDER BY dv_count DESC, coc_name, coc_uid +) TO '/tmp/01_datavalue_summary.csv' +CSV HEADER; + + +/* 2) DataValueAudit hits per COC (dva_count > 0) -> 02_datavalueaudit_summary.csv */ +COPY ( + WITH target AS ( + SELECT coc.categoryoptioncomboid, coc.uid, coc.name + FROM categoryoptioncombo coc + JOIN tmp_uids_to_check t ON t.uid = coc.uid + ) + SELECT + target.uid AS coc_uid, + target.name AS coc_name, + COUNT(dva.*) AS dva_count, + MAX(dva.created) AS dva_max_created + FROM target + LEFT JOIN datavalueaudit dva + ON dva.categoryoptioncomboid = target.categoryoptioncomboid + GROUP BY target.uid, target.name + HAVING COUNT(dva.*) > 0 + ORDER BY dva_count DESC, coc_name, coc_uid +) TO '/tmp/02_datavalueaudit_summary.csv' +CSV HEADER; + + +/* 3) DataValue details -> 03_datavalue_details.csv */ +COPY ( + WITH target AS ( + SELECT coc.categoryoptioncomboid, coc.uid, coc.name + FROM categoryoptioncombo coc + JOIN tmp_uids_to_check t ON t.uid = coc.uid + ) + SELECT + target.uid AS coc_uid, + target.name AS coc_name, + de.uid AS dataelement_uid, + de.name AS dataelement_name, + ou.uid AS orgunit_uid, + ou.name AS orgunit_name, + ps.iso AS period_iso, + COUNT(*) AS dv_count + FROM datavalue dv + JOIN target + ON target.categoryoptioncomboid = dv.categoryoptioncomboid + JOIN dataelement de + ON de.dataelementid = dv.dataelementid + JOIN organisationunit ou + ON ou.organisationunitid = dv.sourceid + JOIN analytics_rs_periodstructure ps + ON ps.periodid = dv.periodid + GROUP BY target.uid, target.name, de.uid, de.name, ou.uid, ou.name, ps.iso + ORDER BY target.name, dv_count DESC, de.name, ou.name, ps.iso +) TO '/tmp/03_datavalue_details.csv' +CSV HEADER; + + +/* 4) DataElementOperand hits per COC (deo_count > 0) -> 04_deo_summary.csv */ +COPY ( + WITH target AS ( + SELECT coc.categoryoptioncomboid, coc.uid, coc.name + FROM categoryoptioncombo coc + JOIN tmp_uids_to_check t ON t.uid = coc.uid + ) + SELECT + target.uid AS coc_uid, + target.name AS coc_name, + COUNT(deo.*) AS deo_count + FROM target + LEFT JOIN dataelementoperand deo + ON deo.categoryoptioncomboid = target.categoryoptioncomboid + GROUP BY target.uid, target.name + HAVING COUNT(deo.*) > 0 + ORDER BY deo_count DESC, coc_name, coc_uid +) TO '/tmp/04_deo_summary.csv' +CSV HEADER; + + +/* 5) DataElementOperand details -> 05_deo_details.csv */ +COPY ( + WITH target AS ( + SELECT coc.categoryoptioncomboid, coc.uid, coc.name + FROM categoryoptioncombo coc + JOIN tmp_uids_to_check t ON t.uid = coc.uid + ) + SELECT + target.uid AS coc_uid, + target.name AS coc_name, + deo.dataelementoperandid, + de.uid AS dataelement_uid, + de.name AS dataelement_name + FROM dataelementoperand deo + JOIN target + ON target.categoryoptioncomboid = deo.categoryoptioncomboid + LEFT JOIN dataelement de + ON de.dataelementid = deo.dataelementid + ORDER BY target.name, deo.dataelementoperandid +) TO '/tmp/05_deo_details.csv' +CSV HEADER; + + +/* 6) Event hits by AOC (event_aoc_count > 0) -> 06_event_aoc_summary.csv */ +COPY ( + WITH target AS ( + SELECT coc.categoryoptioncomboid, coc.uid, coc.name + FROM categoryoptioncombo coc + JOIN tmp_uids_to_check t ON t.uid = coc.uid + ) + SELECT + target.uid AS coc_uid, + target.name AS coc_name, + COUNT(e.*) AS event_aoc_count, + MAX(e.lastupdated) AS event_max_lastupdated, + MAX(e.created) AS event_max_created + FROM target + LEFT JOIN event e + ON e.attributeoptioncomboid = target.categoryoptioncomboid + GROUP BY target.uid, target.name + HAVING COUNT(e.*) > 0 + ORDER BY event_aoc_count DESC, coc_name, coc_uid +) TO '/tmp/06_event_aoc_summary.csv' +CSV HEADER; + + +/* 7) Event details by program/stage -> 07_event_details.csv */ +COPY ( + WITH target AS ( + SELECT coc.categoryoptioncomboid, coc.uid, coc.name + FROM categoryoptioncombo coc + JOIN tmp_uids_to_check t ON t.uid = coc.uid + ) + SELECT + target.uid AS coc_uid, + target.name AS coc_name, + p.uid AS program_uid, + p.name AS program_name, + ps.uid AS programstage_uid, + ps.name AS programstage_name, + COUNT(*) AS event_count + FROM event e + JOIN target + ON target.categoryoptioncomboid = e.attributeoptioncomboid + JOIN programstage ps + ON ps.programstageid = e.programstageid + JOIN program p + ON p.programid = ps.programid + GROUP BY target.uid, target.name, p.uid, p.name, ps.uid, ps.name + ORDER BY target.name, event_count DESC, program_name, programstage_name +) TO '/tmp/07_event_details.csv' +CSV HEADER; + + +/* 8) DataEntryForm hits -> 08_dataentryform_hits.csv + IMPORTANT: This exports what's already in tmp_dataentryform_hits. + If you need to (re)populate it, run your INSERT before this COPY. +*/ +COPY ( + SELECT + target_uid, + form_uid, + form_name, + form_lastupdated + FROM tmp_dataentryform_hits + ORDER BY target_uid, form_name +) TO '/tmp/08_dataentryform_hits.csv' +CSV HEADER; + + +/* 9) Final summary -> 09_coc_final_summary.csv */ +COPY ( + WITH target AS ( + SELECT coc.categoryoptioncomboid, coc.uid, coc.name + FROM categoryoptioncombo coc + JOIN tmp_uids_to_check t ON t.uid = coc.uid + ), + summary AS ( + SELECT + target.uid AS coc_uid, + target.name AS coc_name, + + (SELECT COUNT(*) FROM datavalue dv WHERE dv.categoryoptioncomboid = target.categoryoptioncomboid) AS dv_count, + (SELECT COUNT(*) FROM dataelementoperand deo WHERE deo.categoryoptioncomboid = target.categoryoptioncomboid) AS deo_count, + (SELECT COUNT(*) FROM event e WHERE e.attributeoptioncomboid = target.categoryoptioncomboid) AS event_aoc_count, + (SELECT COUNT(*) FROM datavalueaudit dva WHERE dva.categoryoptioncomboid = target.categoryoptioncomboid) AS dva_count, + (SELECT COUNT(*) FROM tmp_dataentryform_hits h WHERE h.target_uid = target.uid) AS def_hits, + + CASE + WHEN (SELECT COUNT(*) FROM datavalue dv WHERE dv.categoryoptioncomboid = target.categoryoptioncomboid) > 0 + THEN 'IN_USE_DATAVALUE' + WHEN (SELECT COUNT(*) FROM dataelementoperand deo WHERE deo.categoryoptioncomboid = target.categoryoptioncomboid) > 0 + THEN 'IN_USE_DATAELEMENTOPERAND' + WHEN (SELECT COUNT(*) FROM event e WHERE e.attributeoptioncomboid = target.categoryoptioncomboid) > 0 + THEN 'IN_USE_EVENT_AOC' + WHEN (SELECT COUNT(*) FROM tmp_dataentryform_hits h WHERE h.target_uid = target.uid) > 0 + THEN 'REFERENCED_IN_DATAENTRYFORM' + ELSE 'SAFE_CANDIDATE' + END AS status + FROM target + ) + SELECT * + FROM summary + ORDER BY + (dv_count > 0) DESC, + dv_count DESC, + deo_count DESC, + event_aoc_count DESC, + def_hits DESC, + coc_name, coc_uid +) TO '/tmp/09_coc_final_summary.csv' +CSV HEADER; + diff --git a/categoryOptionCombo_checks/README.md b/categoryOptionCombo_checks/README.md new file mode 100644 index 0000000..f1d01c4 --- /dev/null +++ b/categoryOptionCombo_checks/README.md @@ -0,0 +1,66 @@ +# Category Option Combo Checks + +Scripts to assess whether a set of categoryOptionCombos (COCs) can be safely removed. They load a list of COC UIDs, check for usage across data values, audits, data element operands, events (as AOC), and data entry forms, and export CSV summaries. + +## Prerequisites +- Access to the target DHIS2 PostgreSQL database with permissions to create temp tables and write server-side files via `COPY`. +- `jq` available where you run the helper command. +- A JSON file named `cocs_to_remove.json` with structure: + ```json + { + "categoryOptionCombos": [ + { "id": "UID1" }, + { "id": "UID2" } + ] + } + ``` + +## Workflow +1) Create staging tables + In `psql`, run: + ``` + \i categoryOptionCombo_checks/01-createtables.sql + ``` + +2) Generate insert script for the target UIDs + From the repo root, build `02_load_uids.sql` from your JSON: + ``` + jq -r '.categoryOptionCombos[].id | "INSERT INTO tmp_uids_to_check(uid) VALUES (\"" + . + "\") ON CONFLICT DO NOTHING;"' cocs_to_remove.json \ + | sed "s/\"/'/g" \ + > 02_load_uids.sql + ``` + +3) Load the target UIDs + In `psql`, run: + ``` + \i 02_load_uids.sql + ``` + +4) (Optional) Populate data entry form hits + If you need to detect references inside `dataentryform.htmlcode`, insert rows into `tmp_dataentryform_hits` before exporting. Example pattern: + ``` + INSERT INTO tmp_dataentryform_hits (target_uid, dataentryformid, form_uid, form_name, form_lastupdated) + SELECT t.uid, + def.dataentryformid, + def.uid, + def.name, + def.lastupdated + FROM tmp_uids_to_check t + JOIN dataentryform def ON def.htmlcode ILIKE '%' || t.uid || '%'; + ``` + +5) Export checks to CSV + In `psql`, run: + ``` + \i categoryOptionCombo_checks/03-check_cocs_to_csv.sql + ``` + - Writes CSVs to `/tmp` on the DB server (`01_datavalue_summary.csv` … `09_coc_final_summary.csv`). Adjust paths in the script if needed. + - Uses `analytics_rs_periodstructure` to render period ISO codes in data value details; adjust if your schema differs. + +6) Review results + - `01_*`, `02_*`, `03_*`, `04_*`, `05_*`, `06_*`, `07_*` give per-COC usage across data values, audits, DE operands, and events. + - `08_*` lists any data entry form hits you inserted. + - `09_coc_final_summary.csv` provides a status flag: + - `IN_USE_*` variants indicate blocking references. + - `REFERENCED_IN_DATAENTRYFORM` indicates only form HTML references. + - `SAFE_CANDIDATE` means no detected references.