From beed75aa7b23ec77577da05844aeac6fccbb8c92 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sun, 5 Oct 2025 15:11:32 -0400
Subject: [PATCH 1/8] Move all randomness to data package for deterministic
 country package
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change moves ALL random number generation from policyengine-us into the
dataset generation in policyengine-us-data. The country package is now a
purely deterministic rules engine.

## Key Changes

### policyengine-us-data:
- Add take-up rate YAML parameter files in `parameters/take_up/`
- Generate all stochastic boolean take-up decisions in CPS dataset
- Use seeded RNG (seed=100) for full reproducibility

### Stochastic variables generated:
**Take-up decisions (boolean):**
- takes_up_snap_if_eligible
- takes_up_aca_if_eligible
- takes_up_medicaid_if_eligible
- takes_up_eitc (already boolean)
- takes_up_dc_ptc (already boolean)

All random generation now uses np.random.default_rng(seed=100) for full
reproducibility across dataset builds.

## Trade-offs

**IMPORTANT**: Take-up rates can no longer be adjusted dynamically via policy
reforms or in the web app. They are fixed in the microdata. This is an
acceptable trade-off for the cleaner architecture of keeping the country
package purely deterministic. To adjust take-up rates, the microdata must be
regenerated.

Related: policyengine-us PR (must be merged after this)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 household_counts.txt                          | 12 +++++
 policyengine_us_data/datasets/cps/cps.py      | 40 ++++++++++----
 policyengine_us_data/parameters/__init__.py   | 53 +++++++++++++++++++
 .../parameters/take_up/aca.yaml               | 10 ++++
 .../parameters/take_up/dc_ptc.yaml            | 11 ++++
 .../parameters/take_up/eitc.yaml              | 12 +++++
 .../parameters/take_up/medicaid.yaml          | 10 ++++
 .../parameters/take_up/snap.yaml              |  9 ++++
 8 files changed, 148 insertions(+), 9 deletions(-)
 create mode 100644 household_counts.txt
 create mode 100644 policyengine_us_data/parameters/__init__.py
 create mode 100644 policyengine_us_data/parameters/take_up/aca.yaml
 create mode 100644 policyengine_us_data/parameters/take_up/dc_ptc.yaml
 create mode 100644 policyengine_us_data/parameters/take_up/eitc.yaml
 create mode 100644 policyengine_us_data/parameters/take_up/medicaid.yaml
 create mode 100644 policyengine_us_data/parameters/take_up/snap.yaml

diff --git a/household_counts.txt b/household_counts.txt
new file mode 100644
index 000000000..b336dd0ac
--- /dev/null
+++ b/household_counts.txt
@@ -0,0 +1,12 @@
+L0 Value | Household Count
+---------|----------------
+4.9999e-07 | 7,977
+4.999e-07 | 8,005
+5.0000078125e-07 | 7,784
+5.000015625e-07 | 7,617
+5.00003125e-07 | 8,069
+5.0000625e-07 | 7,908
+5.000125e-07 | 7,937
+5.00025e-07 | 7,889
+5.0005e-07 | 7,935
+5.002e-07 | 7,897
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 84f01a8bb..4442398f6 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -14,6 +14,7 @@
 )
 from microimpute.models.qrf import QRF
 import logging
+from policyengine_us_data.parameters import load_take_up_rate
 
 
 class CPS(Dataset):
@@ -194,25 +195,46 @@ def add_takeup(self):
     from policyengine_us import system, Microsimulation
 
     baseline = Microsimulation(dataset=self)
-    parameters = baseline.tax_benefit_system.parameters(self.time_period)
 
+    # Generate all stochastic take-up decisions using take-up rates from parameter files
+    # This keeps the country package purely deterministic
     generator = np.random.default_rng(seed=100)
 
-    eitc_takeup_rates = parameters.gov.irs.credits.eitc.takeup
+    # Load take-up rates from parameter files
+    eitc_rates_by_children = load_take_up_rate("eitc", self.time_period)
+    dc_ptc_rate = load_take_up_rate("dc_ptc", self.time_period)
+    snap_rate = load_take_up_rate("snap", self.time_period)
+    aca_rate = load_take_up_rate("aca", self.time_period)
+    medicaid_rate = load_take_up_rate("medicaid", self.time_period)
+
+    # EITC: varies by number of children
     eitc_child_count = baseline.calculate("eitc_child_count").values
-    eitc_takeup_rate = eitc_takeup_rates.calc(eitc_child_count)
+    eitc_takeup_rate = np.array(
+        [eitc_rates_by_children.get(min(int(c), 3), 0.85) for c in eitc_child_count]
+    )
     data["takes_up_eitc"] = (
         generator.random(len(data["tax_unit_id"])) < eitc_takeup_rate
     )
-    dc_ptc_takeup_rate = parameters.gov.states.dc.tax.income.credits.ptc.takeup
+
+    # DC Property Tax Credit
     data["takes_up_dc_ptc"] = (
-        generator.random(len(data["tax_unit_id"])) < dc_ptc_takeup_rate
+        generator.random(len(data["tax_unit_id"])) < dc_ptc_rate
     )
-    generator = np.random.default_rng(seed=100)
 
-    data["snap_take_up_seed"] = generator.random(len(data["spm_unit_id"]))
-    data["aca_take_up_seed"] = generator.random(len(data["tax_unit_id"]))
-    data["medicaid_take_up_seed"] = generator.random(len(data["person_id"]))
+    # SNAP
+    data["takes_up_snap_if_eligible"] = (
+        generator.random(len(data["spm_unit_id"])) < snap_rate
+    )
+
+    # ACA
+    data["takes_up_aca_if_eligible"] = (
+        generator.random(len(data["tax_unit_id"])) < aca_rate
+    )
+
+    # Medicaid
+    data["takes_up_medicaid_if_eligible"] = (
+        generator.random(len(data["person_id"])) < medicaid_rate
+    )
 
     self.save_dataset(data)
 
diff --git a/policyengine_us_data/parameters/__init__.py b/policyengine_us_data/parameters/__init__.py
new file mode 100644
index 000000000..a8acad195
--- /dev/null
+++ b/policyengine_us_data/parameters/__init__.py
@@ -0,0 +1,53 @@
+"""
+Take-up rate parameters for stochastic simulation.
+
+These parameters are stored in the data package to keep the country package
+as a purely deterministic rules engine.
+"""
+import yaml
+from pathlib import Path
+
+PARAMETERS_DIR = Path(__file__).parent
+
+
+def load_take_up_rate(variable_name: str, year: int = 2018) -> float:
+    """Load take-up rate from YAML parameter files.
+
+    Args:
+        variable_name: Name of the take-up parameter file (without .yaml)
+        year: Year for which to get the rate
+
+    Returns:
+        Take-up rate as a float between 0 and 1
+    """
+    yaml_path = PARAMETERS_DIR / "take_up" / f"{variable_name}.yaml"
+
+    with open(yaml_path) as f:
+        data = yaml.safe_load(f)
+
+    # Handle EITC special case (has rates_by_children instead of values)
+    if "rates_by_children" in data:
+        return data["rates_by_children"]  # Return the dict
+
+    # Find the applicable value for the year
+    values = data["values"]
+    applicable_value = None
+
+    for date_key, value in sorted(values.items()):
+        # Handle both string and datetime.date objects from YAML
+        if hasattr(date_key, "year"):
+            # It's a datetime.date object
+            date_year = date_key.year
+        else:
+            # It's a string
+            date_year = int(date_key.split("-")[0])
+
+        if date_year <= year:
+            applicable_value = value
+        else:
+            break
+
+    if applicable_value is None:
+        raise ValueError(f"No take-up rate found for {variable_name} in {year}")
+
+    return applicable_value
diff --git a/policyengine_us_data/parameters/take_up/aca.yaml b/policyengine_us_data/parameters/take_up/aca.yaml
new file mode 100644
index 000000000..98f920142
--- /dev/null
+++ b/policyengine_us_data/parameters/take_up/aca.yaml
@@ -0,0 +1,10 @@
+description: Percentage of eligible people who do enroll in Affordable Care Act coverage, if eligible.
+metadata:
+  label: ACA takeup rate
+  unit: /1
+  period: year
+  reference:
+    - title: KFF "A Closer Look at the Remaining Uninsured Population Eligible for Medicaid and CHIP"
+      href: https://www.kff.org/uninsured/issue-brief/a-closer-look-at-the-remaining-uninsured-population-eligible-for-medicaid-and-chip/#:~:text=the%20uninsured%20rate%20dropped%20to,States%20began%20the
+values:
+  2018-01-01: 0.672
diff --git a/policyengine_us_data/parameters/take_up/dc_ptc.yaml b/policyengine_us_data/parameters/take_up/dc_ptc.yaml
new file mode 100644
index 000000000..6195ecf33
--- /dev/null
+++ b/policyengine_us_data/parameters/take_up/dc_ptc.yaml
@@ -0,0 +1,11 @@
+description: The share of eligible individuals who claim the DC property tax credit.
+metadata:
+  unit: /1
+  label: DC property tax credit takeup rate
+  period: year
+  reference:
+    - title: District of Columbia Tax Expenditure Report, 2024
+      href: https://ora-cfo.dc.gov/sites/default/files/dc/sites/ora-cfo/publication/attachments/2024%20Tax%20Expenditure%20Report.pdf#page=234
+values:
+  # 37,133 (from 2024 Tax Expenditure Report) / 131,791,388 (PolicyEngine DC PTC value estimate)
+  2021-01-01: 0.32
diff --git a/policyengine_us_data/parameters/take_up/eitc.yaml b/policyengine_us_data/parameters/take_up/eitc.yaml
new file mode 100644
index 000000000..17aa9daae
--- /dev/null
+++ b/policyengine_us_data/parameters/take_up/eitc.yaml
@@ -0,0 +1,12 @@
+description: The share of eligible individuals who claim the EITC (by number of children).
+metadata:
+  label: EITC take-up rate by number of children
+  reference:
+    - title: National Taxpayer Advocate Special Report to Congress 2020 | IRS
+      href: https://www.taxpayeradvocate.irs.gov/wp-content/uploads/2020/08/JRC20_Volume3.pdf#page=62
+# Maps number of children to take-up rate
+rates_by_children:
+  0: 0.65
+  1: 0.86
+  2: 0.85
+  3: 0.85  # Assume same as 2
diff --git a/policyengine_us_data/parameters/take_up/medicaid.yaml b/policyengine_us_data/parameters/take_up/medicaid.yaml
new file mode 100644
index 000000000..cfdf919a5
--- /dev/null
+++ b/policyengine_us_data/parameters/take_up/medicaid.yaml
@@ -0,0 +1,10 @@
+description: Percentage of people who do enroll in Medicaid, if eligible.
+metadata:
+  label: Medicaid takeup rate
+  unit: /1
+  period: year
+  reference:
+    - title: KFF "A Closer Look at the Remaining Uninsured Population Eligible for Medicaid and CHIP"
+      href: https://www.kff.org/uninsured/issue-brief/a-closer-look-at-the-remaining-uninsured-population-eligible-for-medicaid-and-chip/#:~:text=the%20uninsured%20rate%20dropped%20to,States%20began%20the
+values:
+  2018-01-01: 0.93
diff --git a/policyengine_us_data/parameters/take_up/snap.yaml b/policyengine_us_data/parameters/take_up/snap.yaml
new file mode 100644
index 000000000..12b6012ec
--- /dev/null
+++ b/policyengine_us_data/parameters/take_up/snap.yaml
@@ -0,0 +1,9 @@
+description: Percentage of eligible SNAP recipients who claim SNAP.
+metadata:
+  label: SNAP takeup rate
+  unit: /1
+  reference:
+    - title: USDA
+      href: https://www.fns.usda.gov/usamap
+values:
+  2018-01-01: 0.82

From 97252d6aa3ade2097e68db91ce03b969fe5ba71b Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Mon, 10 Nov 2025 05:54:03 -0600
Subject: [PATCH 2/8] Add Head Start and Early Head Start takeup generation

- Create takeup parameter files with rates from NIEER report
- Head Start: 40% (pre-pandemic), 30% (pandemic 2020-2021)
- Early Head Start: 9%
- Generate stochastic takeup in CPS dataset using same pattern as SNAP/Medicaid
- Coordinates with policyengine-us PR adding takeup variables
---
 policyengine_us_data/datasets/cps/cps.py             | 12 ++++++++++++
 .../parameters/take_up/early_head_start.yaml         |  9 +++++++++
 .../parameters/take_up/head_start.yaml               | 10 ++++++++++
 3 files changed, 31 insertions(+)
 create mode 100644 policyengine_us_data/parameters/take_up/early_head_start.yaml
 create mode 100644 policyengine_us_data/parameters/take_up/head_start.yaml

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 4442398f6..bf080db45 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -206,6 +206,8 @@ def add_takeup(self):
     snap_rate = load_take_up_rate("snap", self.time_period)
     aca_rate = load_take_up_rate("aca", self.time_period)
     medicaid_rate = load_take_up_rate("medicaid", self.time_period)
+    head_start_rate = load_take_up_rate("head_start", self.time_period)
+    early_head_start_rate = load_take_up_rate("early_head_start", self.time_period)
 
     # EITC: varies by number of children
     eitc_child_count = baseline.calculate("eitc_child_count").values
@@ -236,6 +238,16 @@ def add_takeup(self):
         generator.random(len(data["person_id"])) < medicaid_rate
     )
 
+    # Head Start
+    data["takes_up_head_start_if_eligible"] = (
+        generator.random(len(data["person_id"])) < head_start_rate
+    )
+
+    # Early Head Start
+    data["takes_up_early_head_start_if_eligible"] = (
+        generator.random(len(data["person_id"])) < early_head_start_rate
+    )
+
     self.save_dataset(data)
 
 
diff --git a/policyengine_us_data/parameters/take_up/early_head_start.yaml b/policyengine_us_data/parameters/take_up/early_head_start.yaml
new file mode 100644
index 000000000..3802d9880
--- /dev/null
+++ b/policyengine_us_data/parameters/take_up/early_head_start.yaml
@@ -0,0 +1,9 @@
+description: Percentage of eligible infants and toddlers who enroll in Early Head Start.
+metadata:
+  label: Early Head Start take-up rate
+  unit: /1
+  reference:
+    - title: NIEER State(s) of Head Start and Early Head Start Report
+      href: https://nieer.org/research-library/states-head-start-early-head-start
+values:
+  2020-09-01: 0.09
diff --git a/policyengine_us_data/parameters/take_up/head_start.yaml b/policyengine_us_data/parameters/take_up/head_start.yaml
new file mode 100644
index 000000000..9495f44bd
--- /dev/null
+++ b/policyengine_us_data/parameters/take_up/head_start.yaml
@@ -0,0 +1,10 @@
+description: Percentage of eligible children who enroll in Head Start.
+metadata:
+  label: Head Start take-up rate
+  unit: /1
+  reference:
+    - title: NIEER State(s) of Head Start and Early Head Start Report
+      href: https://nieer.org/research-library/states-head-start-early-head-start
+values:
+  2020-09-01: 0.40
+  2021-09-01: 0.30

From 470b48fe47d0a35d65ff74dbf1339caf4c632af7 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Wed, 3 Dec 2025 17:20:56 -0500
Subject: [PATCH 3/8] Add changelog entry and remove debug file

---
 changelog_entry.yaml                        |  4 ++++
 household_counts.txt                        | 12 ------------
 policyengine_us_data/datasets/cps/cps.py    |  9 +++++++--
 policyengine_us_data/parameters/__init__.py |  5 ++++-
 4 files changed, 15 insertions(+), 15 deletions(-)
 delete mode 100644 household_counts.txt

diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29bb..1a8cc410b 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: minor
+  changes:
+    added:
+      - Move all randomness to data package for deterministic country package. Take-up decisions for SNAP, Medicaid, ACA, EITC, DC PTC, Head Start, and Early Head Start are now generated stochastically during dataset creation using take-up rates from YAML parameter files.
diff --git a/household_counts.txt b/household_counts.txt
deleted file mode 100644
index b336dd0ac..000000000
--- a/household_counts.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-L0 Value | Household Count
----------|----------------
-4.9999e-07 | 7,977
-4.999e-07 | 8,005
-5.0000078125e-07 | 7,784
-5.000015625e-07 | 7,617
-5.00003125e-07 | 8,069
-5.0000625e-07 | 7,908
-5.000125e-07 | 7,937
-5.00025e-07 | 7,889
-5.0005e-07 | 7,935
-5.002e-07 | 7,897
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index bf080db45..695372858 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -207,12 +207,17 @@ def add_takeup(self):
     aca_rate = load_take_up_rate("aca", self.time_period)
     medicaid_rate = load_take_up_rate("medicaid", self.time_period)
     head_start_rate = load_take_up_rate("head_start", self.time_period)
-    early_head_start_rate = load_take_up_rate("early_head_start", self.time_period)
+    early_head_start_rate = load_take_up_rate(
+        "early_head_start", self.time_period
+    )
 
     # EITC: varies by number of children
     eitc_child_count = baseline.calculate("eitc_child_count").values
     eitc_takeup_rate = np.array(
-        [eitc_rates_by_children.get(min(int(c), 3), 0.85) for c in eitc_child_count]
+        [
+            eitc_rates_by_children.get(min(int(c), 3), 0.85)
+            for c in eitc_child_count
+        ]
     )
     data["takes_up_eitc"] = (
         generator.random(len(data["tax_unit_id"])) < eitc_takeup_rate
diff --git a/policyengine_us_data/parameters/__init__.py b/policyengine_us_data/parameters/__init__.py
index a8acad195..0e5856a61 100644
--- a/policyengine_us_data/parameters/__init__.py
+++ b/policyengine_us_data/parameters/__init__.py
@@ -4,6 +4,7 @@
 These parameters are stored in the data package to keep the country package
 as a purely deterministic rules engine.
 """
+
 import yaml
 from pathlib import Path
 
@@ -48,6 +49,8 @@ def load_take_up_rate(variable_name: str, year: int = 2018) -> float:
             break
 
     if applicable_value is None:
-        raise ValueError(f"No take-up rate found for {variable_name} in {year}")
+        raise ValueError(
+            f"No take-up rate found for {variable_name} in {year}"
+        )
 
     return applicable_value

From 7df52459e4196f26b12e1c37cde35052020a2524 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Wed, 3 Dec 2025 18:02:39 -0500
Subject: [PATCH 4/8] Add tests for stochastic variable generation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tests verify:
- Take-up rate parameters load correctly (EITC, SNAP, Medicaid, etc.)
- Seeded RNG produces deterministic results
- Take-up proportions match expected rates

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../tests/test_stochastic_variables.py        | 110 ++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 policyengine_us_data/tests/test_stochastic_variables.py

diff --git a/policyengine_us_data/tests/test_stochastic_variables.py b/policyengine_us_data/tests/test_stochastic_variables.py
new file mode 100644
index 000000000..e4d896e44
--- /dev/null
+++ b/policyengine_us_data/tests/test_stochastic_variables.py
@@ -0,0 +1,110 @@
+"""Tests for stochastic variable generation in the data package.
+
+These tests verify that:
+1. Take-up rate parameters load correctly
+2. Seeded RNG produces deterministic results
+3. Take-up rates produce plausible proportions
+"""
+
+import pytest
+import numpy as np
+from policyengine_us_data.parameters import load_take_up_rate
+
+
+class TestTakeUpRateParameters:
+    """Test that take-up rate parameters load correctly."""
+
+    def test_eitc_rate_loads(self):
+        """EITC take-up rates should load and be plausible."""
+        rates = load_take_up_rate("eitc", 2022)
+        # EITC rates are by number of children: 0, 1, 2, 3+
+        assert isinstance(rates, dict) or isinstance(rates, float)
+        if isinstance(rates, dict):
+            for key, rate in rates.items():
+                assert 0 < rate <= 1
+
+    def test_snap_rate_loads(self):
+        """SNAP take-up rate should load and be plausible."""
+        rate = load_take_up_rate("snap", 2022)
+        assert 0 < rate <= 1
+
+    def test_medicaid_rate_loads(self):
+        """Medicaid take-up rate should load and be plausible."""
+        rate = load_take_up_rate("medicaid", 2022)
+        assert 0 < rate <= 1
+
+    def test_aca_rate_loads(self):
+        """ACA take-up rate should load and be plausible."""
+        rate = load_take_up_rate("aca", 2022)
+        assert 0 < rate <= 1
+
+    def test_head_start_rate_loads(self):
+        """Head Start take-up rate should load and be plausible."""
+        rate = load_take_up_rate("head_start", 2022)
+        assert 0 < rate <= 1
+
+    def test_early_head_start_rate_loads(self):
+        """Early Head Start take-up rate should load and be plausible."""
+        rate = load_take_up_rate("early_head_start", 2022)
+        assert 0 < rate <= 1
+
+    def test_dc_ptc_rate_loads(self):
+        """DC PTC take-up rate should load and be plausible."""
+        rate = load_take_up_rate("dc_ptc", 2022)
+        assert 0 < rate <= 1
+
+
+class TestSeededRandomness:
+    """Test that stochastic generation is deterministic."""
+
+    def test_same_seed_produces_same_results(self):
+        """Using the same seed should produce identical results."""
+        seed = 0
+        n = 1_000
+
+        generator1 = np.random.default_rng(seed=seed)
+        result1 = generator1.random(n)
+
+        generator2 = np.random.default_rng(seed=seed)
+        result2 = generator2.random(n)
+
+        np.testing.assert_array_equal(result1, result2)
+
+    def test_different_seeds_produce_different_results(self):
+        """Different seeds should produce different results."""
+        n = 1_000
+
+        generator1 = np.random.default_rng(seed=0)
+        result1 = generator1.random(n)
+
+        generator2 = np.random.default_rng(seed=1)
+        result2 = generator2.random(n)
+
+        assert not np.array_equal(result1, result2)
+
+
+class TestTakeUpProportions:
+    """Test that take-up rates produce plausible proportions."""
+
+    def test_take_up_produces_expected_proportion(self):
+        """Simulated take-up should match the rate approximately."""
+        rate = 0.7
+        n = 10_000
+        generator = np.random.default_rng(seed=42)
+
+        take_up = generator.random(n) < rate
+        actual_proportion = take_up.mean()
+
+        # Should be within 5 percentage points of the rate
+        assert abs(actual_proportion - rate) < 0.05
+
+    def test_boolean_generation(self):
+        """Take-up decisions should be boolean."""
+        rate = 0.5
+        n = 100
+        generator = np.random.default_rng(seed=42)
+
+        take_up = generator.random(n) < rate
+
+        assert take_up.dtype == bool
+        assert set(take_up).issubset({True, False})

From 7f67999982bf21f319ec118a4f27a6f920bab24f Mon Sep 17 00:00:00 2001
From: "baogorek@gmail.com" <baogorek@gmail.com>
Date: Thu, 5 Feb 2026 17:21:52 -0500
Subject: [PATCH 5/8] Add name-based seeding, state-specific Medicaid, SSI and
 WIC variables

Replace shared RNG (seed=100) with per-variable name-based seeding using
_stable_string_hash for order-independent reproducibility. Add state-specific
Medicaid takeup rates (53%-99%), SSI resource test pass rate, and WIC
takeup/nutritional risk draw variables.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 changelog_entry.yaml                          |   9 +-
 policyengine_us_data/datasets/cps/cps.py      |  68 ++++++---
 policyengine_us_data/parameters/__init__.py   |  18 ++-
 .../parameters/take_up/medicaid.yaml          |  60 +++++++-
 .../parameters/take_up/ssi_pass_rate.yaml     |  10 ++
 .../tests/test_stochastic_variables.py        | 139 +++++++++++-------
 policyengine_us_data/utils/randomness.py      |  29 ++++
 7 files changed, 242 insertions(+), 91 deletions(-)
 create mode 100644 policyengine_us_data/parameters/take_up/ssi_pass_rate.yaml
 create mode 100644 policyengine_us_data/utils/randomness.py

diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index 1a8cc410b..0afd182ae 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -1,4 +1,11 @@
 - bump: minor
   changes:
     added:
-      - Move all randomness to data package for deterministic country package. Take-up decisions for SNAP, Medicaid, ACA, EITC, DC PTC, Head Start, and Early Head Start are now generated stochastically during dataset creation using take-up rates from YAML parameter files.
+      - Name-based seeding (seeded_rng) for order-independent reproducibility
+      - State-specific Medicaid takeup rates (53%-99% range, 51 jurisdictions)
+      - SSI resource test pass rate parameter (0.4)
+      - WIC takeup and nutritional risk draw variables (float)
+      - meets_ssi_resource_test boolean generation
+    changed:
+      - Replaced shared RNG (seed=100) with per-variable name-based seeding
+      - Medicaid takeup now uses state-specific rates instead of uniform 93%
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 695372858..d4c899cdc 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -15,6 +15,7 @@
 from microimpute.models.qrf import QRF
 import logging
 from policyengine_us_data.parameters import load_take_up_rate
+from policyengine_us_data.utils.randomness import seeded_rng
 
 
 class CPS(Dataset):
@@ -192,24 +193,25 @@ def add_rent(self, cps: h5py.File, person: DataFrame, household: DataFrame):
 def add_takeup(self):
     data = self.load_dataset()
 
-    from policyengine_us import system, Microsimulation
+    from policyengine_us import Microsimulation
 
     baseline = Microsimulation(dataset=self)
 
-    # Generate all stochastic take-up decisions using take-up rates from parameter files
-    # This keeps the country package purely deterministic
-    generator = np.random.default_rng(seed=100)
+    n_persons = len(data["person_id"])
+    n_tax_units = len(data["tax_unit_id"])
+    n_spm_units = len(data["spm_unit_id"])
 
-    # Load take-up rates from parameter files
+    # Load take-up rates
     eitc_rates_by_children = load_take_up_rate("eitc", self.time_period)
     dc_ptc_rate = load_take_up_rate("dc_ptc", self.time_period)
     snap_rate = load_take_up_rate("snap", self.time_period)
     aca_rate = load_take_up_rate("aca", self.time_period)
-    medicaid_rate = load_take_up_rate("medicaid", self.time_period)
+    medicaid_rates_by_state = load_take_up_rate("medicaid", self.time_period)
     head_start_rate = load_take_up_rate("head_start", self.time_period)
     early_head_start_rate = load_take_up_rate(
         "early_head_start", self.time_period
     )
+    ssi_pass_rate = load_take_up_rate("ssi_pass_rate", self.time_period)
 
     # EITC: varies by number of children
     eitc_child_count = baseline.calculate("eitc_child_count").values
@@ -219,38 +221,60 @@ def add_takeup(self):
             for c in eitc_child_count
         ]
     )
-    data["takes_up_eitc"] = (
-        generator.random(len(data["tax_unit_id"])) < eitc_takeup_rate
-    )
+    rng = seeded_rng("takes_up_eitc")
+    data["takes_up_eitc"] = rng.random(n_tax_units) < eitc_takeup_rate
 
     # DC Property Tax Credit
-    data["takes_up_dc_ptc"] = (
-        generator.random(len(data["tax_unit_id"])) < dc_ptc_rate
-    )
+    rng = seeded_rng("takes_up_dc_ptc")
+    data["takes_up_dc_ptc"] = rng.random(n_tax_units) < dc_ptc_rate
 
     # SNAP
-    data["takes_up_snap_if_eligible"] = (
-        generator.random(len(data["spm_unit_id"])) < snap_rate
-    )
+    rng = seeded_rng("takes_up_snap_if_eligible")
+    data["takes_up_snap_if_eligible"] = rng.random(n_spm_units) < snap_rate
 
     # ACA
-    data["takes_up_aca_if_eligible"] = (
-        generator.random(len(data["tax_unit_id"])) < aca_rate
-    )
+    rng = seeded_rng("takes_up_aca_if_eligible")
+    data["takes_up_aca_if_eligible"] = rng.random(n_tax_units) < aca_rate
 
-    # Medicaid
+    # Medicaid: state-specific rates
+    state_codes = baseline.calculate("state_code_str").values
+    hh_ids = data["household_id"]
+    person_hh_ids = data["person_household_id"]
+    hh_to_state = dict(zip(hh_ids, state_codes))
+    person_states = np.array(
+        [hh_to_state.get(hh_id, "CA") for hh_id in person_hh_ids]
+    )
+    medicaid_rate_by_person = np.array(
+        [medicaid_rates_by_state.get(s, 0.93) for s in person_states]
+    )
+    rng = seeded_rng("takes_up_medicaid_if_eligible")
     data["takes_up_medicaid_if_eligible"] = (
-        generator.random(len(data["person_id"])) < medicaid_rate
+        rng.random(n_persons) < medicaid_rate_by_person
     )
 
     # Head Start
+    rng = seeded_rng("takes_up_head_start_if_eligible")
     data["takes_up_head_start_if_eligible"] = (
-        generator.random(len(data["person_id"])) < head_start_rate
+        rng.random(n_persons) < head_start_rate
     )
 
     # Early Head Start
+    rng = seeded_rng("takes_up_early_head_start_if_eligible")
     data["takes_up_early_head_start_if_eligible"] = (
-        generator.random(len(data["person_id"])) < early_head_start_rate
+        rng.random(n_persons) < early_head_start_rate
+    )
+
+    # SSI resource test
+    rng = seeded_rng("meets_ssi_resource_test")
+    data["meets_ssi_resource_test"] = rng.random(n_persons) < ssi_pass_rate
+
+    # WIC draws (country package compares against category-specific rates)
+    rng = seeded_rng("wic_takeup_draw")
+    data["wic_takeup_draw"] = rng.random(n_persons).astype(np.float32)
+
+    rng = seeded_rng("wic_nutritional_risk_draw")
+    data["wic_nutritional_risk_draw"] = rng.random(n_persons).astype(
+        np.float32
     )
 
     self.save_dataset(data)
diff --git a/policyengine_us_data/parameters/__init__.py b/policyengine_us_data/parameters/__init__.py
index 0e5856a61..babd99037 100644
--- a/policyengine_us_data/parameters/__init__.py
+++ b/policyengine_us_data/parameters/__init__.py
@@ -11,7 +11,7 @@
 PARAMETERS_DIR = Path(__file__).parent
 
 
-def load_take_up_rate(variable_name: str, year: int = 2018) -> float:
+def load_take_up_rate(variable_name: str, year: int = 2018):
     """Load take-up rate from YAML parameter files.
 
     Args:
@@ -19,28 +19,30 @@ def load_take_up_rate(variable_name: str, year: int = 2018) -> float:
         year: Year for which to get the rate
 
     Returns:
-        Take-up rate as a float between 0 and 1
+        float, dict (EITC rates_by_children), or dict (Medicaid
+        rates_by_state)
     """
     yaml_path = PARAMETERS_DIR / "take_up" / f"{variable_name}.yaml"
 
     with open(yaml_path) as f:
         data = yaml.safe_load(f)
 
-    # Handle EITC special case (has rates_by_children instead of values)
+    # EITC: rates by number of children
     if "rates_by_children" in data:
-        return data["rates_by_children"]  # Return the dict
+        return data["rates_by_children"]
 
-    # Find the applicable value for the year
+    # Medicaid: state-specific rates
+    if "rates_by_state" in data:
+        return data["rates_by_state"]
+
+    # Standard time-series values
     values = data["values"]
     applicable_value = None
 
     for date_key, value in sorted(values.items()):
-        # Handle both string and datetime.date objects from YAML
         if hasattr(date_key, "year"):
-            # It's a datetime.date object
             date_year = date_key.year
         else:
-            # It's a string
             date_year = int(date_key.split("-")[0])
 
         if date_year <= year:
diff --git a/policyengine_us_data/parameters/take_up/medicaid.yaml b/policyengine_us_data/parameters/take_up/medicaid.yaml
index cfdf919a5..3beb851b0 100644
--- a/policyengine_us_data/parameters/take_up/medicaid.yaml
+++ b/policyengine_us_data/parameters/take_up/medicaid.yaml
@@ -3,8 +3,62 @@ metadata:
   label: Medicaid takeup rate
   unit: /1
   period: year
+  breakdown:
+    - state_code
   reference:
     - title: KFF "A Closer Look at the Remaining Uninsured Population Eligible for Medicaid and CHIP"
-      href: https://www.kff.org/uninsured/issue-brief/a-closer-look-at-the-remaining-uninsured-population-eligible-for-medicaid-and-chip/#:~:text=the%20uninsured%20rate%20dropped%20to,States%20began%20the
-values:
-  2018-01-01: 0.93
+      href: https://www.kff.org/uninsured/issue-brief/a-closer-look-at-the-remaining-uninsured-population-eligible-for-medicaid-and-chip/
+    - title: State-specific rates derived from MACPAC enrollment targets vs modeled eligibility
+      href: https://www.medicaid.gov/medicaid/program-information/medicaid-and-chip-enrollment-data/report-highlights/index.html
+rates_by_state:
+  AK: 0.88
+  AL: 0.92
+  AR: 0.79
+  AZ: 0.95
+  CA: 0.78
+  CO: 0.99
+  CT: 0.89
+  DC: 0.99
+  DE: 0.86
+  FL: 0.98
+  GA: 0.73
+  HI: 0.88
+  IA: 0.84
+  ID: 0.78
+  IL: 0.85
+  IN: 0.99
+  KS: 0.92
+  KY: 0.87
+  LA: 0.79
+  MA: 0.94
+  MD: 0.95
+  ME: 0.92
+  MI: 0.91
+  MN: 0.89
+  MO: 0.89
+  MS: 0.75
+  MT: 0.83
+  NC: 0.94
+  ND: 0.91
+  NE: 0.79
+  NH: 0.84
+  NJ: 0.74
+  NM: 0.84
+  NV: 0.93
+  NY: 0.86
+  OH: 0.82
+  OK: 0.77
+  OR: 0.92
+  PA: 0.64
+  RI: 0.94
+  SC: 0.93
+  SD: 0.88
+  TN: 0.92
+  TX: 0.76
+  UT: 0.53
+  VA: 0.82
+  VT: 0.93
+  WA: 0.98
+  WI: 0.91
+  WV: 0.83
+  WY: 0.70
diff --git a/policyengine_us_data/parameters/take_up/ssi_pass_rate.yaml b/policyengine_us_data/parameters/take_up/ssi_pass_rate.yaml
new file mode 100644
index 000000000..415b3c6c0
--- /dev/null
+++ b/policyengine_us_data/parameters/take_up/ssi_pass_rate.yaml
@@ -0,0 +1,10 @@
+description: Proportion of SSI-aged-blind-disabled recipients who meet the asset test.
+metadata:
+  label: SSI resource test pass rate
+  unit: /1
+  period: year
+  reference:
+    - title: SSI resource test pass rate from policyengine-us
+      href: https://github.com/PolicyEngine/policyengine-us
+values:
+  2018-01-01: 0.4
diff --git a/policyengine_us_data/tests/test_stochastic_variables.py b/policyengine_us_data/tests/test_stochastic_variables.py
index e4d896e44..293d29a7e 100644
--- a/policyengine_us_data/tests/test_stochastic_variables.py
+++ b/policyengine_us_data/tests/test_stochastic_variables.py
@@ -1,110 +1,135 @@
-"""Tests for stochastic variable generation in the data package.
-
-These tests verify that:
-1. Take-up rate parameters load correctly
-2. Seeded RNG produces deterministic results
-3. Take-up rates produce plausible proportions
-"""
+"""Tests for stochastic variable generation in the data package."""
 
 import pytest
 import numpy as np
 from policyengine_us_data.parameters import load_take_up_rate
+from policyengine_us_data.utils.randomness import (
+    _stable_string_hash,
+    seeded_rng,
+)
 
 
 class TestTakeUpRateParameters:
-    """Test that take-up rate parameters load correctly."""
 
     def test_eitc_rate_loads(self):
-        """EITC take-up rates should load and be plausible."""
         rates = load_take_up_rate("eitc", 2022)
-        # EITC rates are by number of children: 0, 1, 2, 3+
-        assert isinstance(rates, dict) or isinstance(rates, float)
-        if isinstance(rates, dict):
-            for key, rate in rates.items():
-                assert 0 < rate <= 1
+        assert isinstance(rates, dict)
+        for key, rate in rates.items():
+            assert 0 < rate <= 1
 
     def test_snap_rate_loads(self):
-        """SNAP take-up rate should load and be plausible."""
         rate = load_take_up_rate("snap", 2022)
         assert 0 < rate <= 1
 
-    def test_medicaid_rate_loads(self):
-        """Medicaid take-up rate should load and be plausible."""
-        rate = load_take_up_rate("medicaid", 2022)
-        assert 0 < rate <= 1
+    def test_medicaid_rate_loads_state_specific(self):
+        rates = load_take_up_rate("medicaid", 2022)
+        assert isinstance(rates, dict)
+        assert len(rates) == 51  # 50 states + DC
+        for state, rate in rates.items():
+            assert 0 < rate <= 1, f"{state}: {rate}"
+        assert rates["UT"] == 0.53
+        assert rates["CO"] == 0.99
 
     def test_aca_rate_loads(self):
-        """ACA take-up rate should load and be plausible."""
         rate = load_take_up_rate("aca", 2022)
         assert 0 < rate <= 1
 
     def test_head_start_rate_loads(self):
-        """Head Start take-up rate should load and be plausible."""
         rate = load_take_up_rate("head_start", 2022)
         assert 0 < rate <= 1
 
     def test_early_head_start_rate_loads(self):
-        """Early Head Start take-up rate should load and be plausible."""
         rate = load_take_up_rate("early_head_start", 2022)
         assert 0 < rate <= 1
 
     def test_dc_ptc_rate_loads(self):
-        """DC PTC take-up rate should load and be plausible."""
         rate = load_take_up_rate("dc_ptc", 2022)
         assert 0 < rate <= 1
 
+    def test_ssi_pass_rate_loads(self):
+        rate = load_take_up_rate("ssi_pass_rate", 2022)
+        assert rate == 0.4
 
-class TestSeededRandomness:
-    """Test that stochastic generation is deterministic."""
 
-    def test_same_seed_produces_same_results(self):
-        """Using the same seed should produce identical results."""
-        seed = 0
-        n = 1_000
+class TestStableStringHash:
 
-        generator1 = np.random.default_rng(seed=seed)
-        result1 = generator1.random(n)
+    def test_deterministic(self):
+        h1 = _stable_string_hash("takes_up_snap_if_eligible")
+        h2 = _stable_string_hash("takes_up_snap_if_eligible")
+        assert h1 == h2
 
-        generator2 = np.random.default_rng(seed=seed)
-        result2 = generator2.random(n)
+    def test_different_strings_differ(self):
+        h1 = _stable_string_hash("takes_up_snap_if_eligible")
+        h2 = _stable_string_hash("takes_up_aca_if_eligible")
+        assert h1 != h2
 
-        np.testing.assert_array_equal(result1, result2)
+    def test_returns_uint64(self):
+        h = _stable_string_hash("test")
+        assert h.dtype == np.uint64
 
-    def test_different_seeds_produce_different_results(self):
-        """Different seeds should produce different results."""
-        n = 1_000
 
-        generator1 = np.random.default_rng(seed=0)
-        result1 = generator1.random(n)
+class TestSeededRng:
 
-        generator2 = np.random.default_rng(seed=1)
-        result2 = generator2.random(n)
+    def test_same_name_same_results(self):
+        rng1 = seeded_rng("takes_up_snap_if_eligible")
+        result1 = rng1.random(1000)
+        rng2 = seeded_rng("takes_up_snap_if_eligible")
+        result2 = rng2.random(1000)
+        np.testing.assert_array_equal(result1, result2)
 
+    def test_different_names_different_results(self):
+        rng1 = seeded_rng("takes_up_snap_if_eligible")
+        result1 = rng1.random(1000)
+        rng2 = seeded_rng("takes_up_aca_if_eligible")
+        result2 = rng2.random(1000)
         assert not np.array_equal(result1, result2)
 
+    def test_order_independence(self):
+        """Generating variables in different order produces same values."""
+        # Order A: SNAP then ACA
+        rng_snap_a = seeded_rng("takes_up_snap_if_eligible")
+        snap_a = rng_snap_a.random(1000)
+        rng_aca_a = seeded_rng("takes_up_aca_if_eligible")
+        aca_a = rng_aca_a.random(1000)
+
+        # Order B: ACA then SNAP
+        rng_aca_b = seeded_rng("takes_up_aca_if_eligible")
+        aca_b = rng_aca_b.random(1000)
+        rng_snap_b = seeded_rng("takes_up_snap_if_eligible")
+        snap_b = rng_snap_b.random(1000)
+
+        np.testing.assert_array_equal(snap_a, snap_b)
+        np.testing.assert_array_equal(aca_a, aca_b)
+
 
 class TestTakeUpProportions:
-    """Test that take-up rates produce plausible proportions."""
 
     def test_take_up_produces_expected_proportion(self):
-        """Simulated take-up should match the rate approximately."""
         rate = 0.7
         n = 10_000
-        generator = np.random.default_rng(seed=42)
-
-        take_up = generator.random(n) < rate
-        actual_proportion = take_up.mean()
-
-        # Should be within 5 percentage points of the rate
-        assert abs(actual_proportion - rate) < 0.05
+        rng = seeded_rng("test_variable")
+        take_up = rng.random(n) < rate
+        assert abs(take_up.mean() - rate) < 0.05
 
     def test_boolean_generation(self):
-        """Take-up decisions should be boolean."""
-        rate = 0.5
-        n = 100
-        generator = np.random.default_rng(seed=42)
-
-        take_up = generator.random(n) < rate
-
+        rng = seeded_rng("test_bool")
+        take_up = rng.random(100) < 0.5
         assert take_up.dtype == bool
         assert set(take_up).issubset({True, False})
+
+    def test_wic_draws_are_float(self):
+        rng = seeded_rng("wic_takeup_draw")
+        draws = rng.random(1000).astype(np.float32)
+        assert draws.dtype == np.float32
+        assert np.all(draws >= 0)
+        assert np.all(draws < 1)
+
+    def test_state_specific_medicaid_proportions(self):
+        rates = load_take_up_rate("medicaid", 2022)
+        rng = seeded_rng("takes_up_medicaid_if_eligible")
+        n = 50_000
+        draws = rng.random(n)
+        # Test a few states
+        for state, expected_rate in [("UT", 0.53), ("CO", 0.99)]:
+            take_up = draws[:10_000] < expected_rate
+            assert abs(take_up.mean() - expected_rate) < 0.05
diff --git a/policyengine_us_data/utils/randomness.py b/policyengine_us_data/utils/randomness.py
new file mode 100644
index 000000000..f4f2e51d7
--- /dev/null
+++ b/policyengine_us_data/utils/randomness.py
@@ -0,0 +1,29 @@
+import warnings
+import numpy as np
+
+
+def _stable_string_hash(s: str) -> np.uint64:
+    """Deterministic hash consistent across Python processes.
+
+    Python's built-in hash() is not deterministic across processes
+    (since 3.3), so we use a polynomial rolling hash with mixing.
+
+    Ported from policyengine_core.commons.formulas._stable_string_hash.
+    """
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore", "overflow encountered", RuntimeWarning
+        )
+        h = np.uint64(0)
+        for byte in s.encode("utf-8"):
+            h = h * np.uint64(31) + np.uint64(byte)
+        h = h ^ (h >> np.uint64(33))
+        h = h * np.uint64(0xFF51AFD7ED558CCD)
+        h = h ^ (h >> np.uint64(33))
+    return h
+
+
+def seeded_rng(variable_name: str) -> np.random.Generator:
+    """Create a per-variable RNG seeded by variable name hash."""
+    seed = int(_stable_string_hash(variable_name)) % (2**63)
+    return np.random.default_rng(seed=seed)

From 373235c8128eade0f89e509be128293472c0b83a Mon Sep 17 00:00:00 2001
From: "baogorek@gmail.com" <baogorek@gmail.com>
Date: Fri, 6 Feb 2026 09:20:12 -0500
Subject: [PATCH 6/8] Add optional salt parameter to seeded_rng

Supports per-subgroup RNG streams (e.g., congressional districts)
while keeping the unsalted default distinct from any salted variant.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 policyengine_us_data/utils/randomness.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/policyengine_us_data/utils/randomness.py b/policyengine_us_data/utils/randomness.py
index f4f2e51d7..eac015227 100644
--- a/policyengine_us_data/utils/randomness.py
+++ b/policyengine_us_data/utils/randomness.py
@@ -23,7 +23,8 @@ def _stable_string_hash(s: str) -> np.uint64:
     return h
 
 
-def seeded_rng(variable_name: str) -> np.random.Generator:
+def seeded_rng(variable_name: str, salt: str = None) -> np.random.Generator:
     """Create a per-variable RNG seeded by variable name hash."""
-    seed = int(_stable_string_hash(variable_name)) % (2**63)
+    key = variable_name if salt is None else f"{variable_name}:{salt}"
+    seed = int(_stable_string_hash(key)) % (2**63)
     return np.random.default_rng(seed=seed)

From d90f42bc0d75e3d4bcd7395ec7212984b4ece6bc Mon Sep 17 00:00:00 2001
From: "baogorek@gmail.com" <baogorek@gmail.com>
Date: Fri, 6 Feb 2026 14:02:45 -0500
Subject: [PATCH 7/8] Convert WIC float draws to bools for consistency with
 other takeup variables

Resolves category-specific rate comparisons at data generation time so only
bools (would_claim_wic, wic_nutritional_risk_imputed) are stored in the
dataset, matching the pattern used by all other takeup variables.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 policyengine_us_data/datasets/cps/cps.py      | 23 +++++++++----
 policyengine_us_data/parameters/__init__.py   | 14 ++++++++
 .../take_up/wic_nutritional_risk.yaml         | 13 ++++++++
 .../parameters/take_up/wic_takeup.yaml        | 33 +++++++++++++++++++
 .../tests/test_stochastic_variables.py        | 31 +++++++++++++----
 5 files changed, 102 insertions(+), 12 deletions(-)
 create mode 100644 policyengine_us_data/parameters/take_up/wic_nutritional_risk.yaml
 create mode 100644 policyengine_us_data/parameters/take_up/wic_takeup.yaml

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index d4c899cdc..c6224e1c3 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -268,13 +268,24 @@ def add_takeup(self):
     rng = seeded_rng("meets_ssi_resource_test")
     data["meets_ssi_resource_test"] = rng.random(n_persons) < ssi_pass_rate
 
-    # WIC draws (country package compares against category-specific rates)
-    rng = seeded_rng("wic_takeup_draw")
-    data["wic_takeup_draw"] = rng.random(n_persons).astype(np.float32)
+    # WIC: resolve draws to bools using category-specific rates
+    wic_categories = baseline.calculate("wic_category_str").values
+    wic_takeup_rates = load_take_up_rate("wic_takeup", self.time_period)
+    wic_takeup_rate_by_person = np.array(
+        [wic_takeup_rates.get(c, 0) for c in wic_categories]
+    )
+    rng = seeded_rng("would_claim_wic")
+    data["would_claim_wic"] = rng.random(n_persons) < wic_takeup_rate_by_person
 
-    rng = seeded_rng("wic_nutritional_risk_draw")
-    data["wic_nutritional_risk_draw"] = rng.random(n_persons).astype(
-        np.float32
+    wic_risk_rates = load_take_up_rate(
+        "wic_nutritional_risk", self.time_period
+    )
+    wic_risk_rate_by_person = np.array(
+        [wic_risk_rates.get(c, 0) for c in wic_categories]
+    )
+    rng = seeded_rng("wic_nutritional_risk_imputed")
+    data["wic_nutritional_risk_imputed"] = (
+        rng.random(n_persons) < wic_risk_rate_by_person
     )
 
     self.save_dataset(data)
diff --git a/policyengine_us_data/parameters/__init__.py b/policyengine_us_data/parameters/__init__.py
index babd99037..2fcddb5af 100644
--- a/policyengine_us_data/parameters/__init__.py
+++ b/policyengine_us_data/parameters/__init__.py
@@ -35,6 +35,20 @@ def load_take_up_rate(variable_name: str, year: int = 2018):
     if "rates_by_state" in data:
         return data["rates_by_state"]
 
+    # WIC-style: rates by category (each category has a time series)
+    if "rates_by_category" in data:
+        result = {}
+        for category, time_series in data["rates_by_category"].items():
+            applicable_value = None
+            for y, value in sorted(time_series.items()):
+                if int(y) <= year:
+                    applicable_value = value
+                else:
+                    break
+            if applicable_value is not None:
+                result[category] = applicable_value
+        return result
+
     # Standard time-series values
     values = data["values"]
     applicable_value = None
diff --git a/policyengine_us_data/parameters/take_up/wic_nutritional_risk.yaml b/policyengine_us_data/parameters/take_up/wic_nutritional_risk.yaml
new file mode 100644
index 000000000..c327b4285
--- /dev/null
+++ b/policyengine_us_data/parameters/take_up/wic_nutritional_risk.yaml
@@ -0,0 +1,13 @@
+rates_by_category:
+  PREGNANT:
+    1980: 0.913
+  POSTPARTUM:
+    1980: 0.933
+  BREASTFEEDING:
+    1980: 0.889
+  INFANT:
+    1980: 0.95
+  CHILD:
+    1980: 0.752
+  NONE:
+    1980: 0
diff --git a/policyengine_us_data/parameters/take_up/wic_takeup.yaml b/policyengine_us_data/parameters/take_up/wic_takeup.yaml
new file mode 100644
index 000000000..ec0f63554
--- /dev/null
+++ b/policyengine_us_data/parameters/take_up/wic_takeup.yaml
@@ -0,0 +1,33 @@
+rates_by_category:
+  PREGNANT:
+    2018: 0.533
+    2019: 0.523
+    2020: 0.456
+    2021: 0.437
+    2022: 0.456
+  POSTPARTUM:
+    2018: 0.844
+    2019: 0.847
+    2020: 0.685
+    2021: 0.672
+    2022: 0.689
+  BREASTFEEDING:
+    2018: 0.687
+    2019: 0.684
+    2020: 0.604
+    2021: 0.608
+    2022: 0.663
+  INFANT:
+    2018: 0.978
+    2019: 0.984
+    2020: 0.817
+    2021: 0.78
+    2022: 0.784
+  CHILD:
+    2018: 0.442
+    2019: 0.448
+    2020: 0.406
+    2021: 0.432
+    2022: 0.46
+  NONE:
+    2018: 0
diff --git a/policyengine_us_data/tests/test_stochastic_variables.py b/policyengine_us_data/tests/test_stochastic_variables.py
index 293d29a7e..1819241a2 100644
--- a/policyengine_us_data/tests/test_stochastic_variables.py
+++ b/policyengine_us_data/tests/test_stochastic_variables.py
@@ -117,12 +117,31 @@ def test_boolean_generation(self):
         assert take_up.dtype == bool
         assert set(take_up).issubset({True, False})
 
-    def test_wic_draws_are_float(self):
-        rng = seeded_rng("wic_takeup_draw")
-        draws = rng.random(1000).astype(np.float32)
-        assert draws.dtype == np.float32
-        assert np.all(draws >= 0)
-        assert np.all(draws < 1)
+    def test_wic_takeup_rates_load(self):
+        rates = load_take_up_rate("wic_takeup", 2022)
+        assert isinstance(rates, dict)
+        assert rates["PREGNANT"] == 0.456
+        assert rates["INFANT"] == 0.784
+        assert rates["NONE"] == 0
+
+    def test_wic_nutritional_risk_rates_load(self):
+        rates = load_take_up_rate("wic_nutritional_risk", 2022)
+        assert isinstance(rates, dict)
+        assert rates["INFANT"] == 0.95
+        assert rates["CHILD"] == 0.752
+        assert rates["NONE"] == 0
+
+    def test_wic_category_specific_proportions(self):
+        rates = load_take_up_rate("wic_takeup", 2022)
+        n = 10_000
+        rng = seeded_rng("would_claim_wic")
+        draws = rng.random(n)
+        for category, expected_rate in [
+            ("INFANT", 0.784),
+            ("CHILD", 0.46),
+        ]:
+            take_up = draws[:n] < expected_rate
+            assert abs(take_up.mean() - expected_rate) < 0.05
 
     def test_state_specific_medicaid_proportions(self):
         rates = load_take_up_rate("medicaid", 2022)

From eafd4380d8bfac32d597b03ce1c26bed9ebe43f1 Mon Sep 17 00:00:00 2001
From: "baogorek@gmail.com" <baogorek@gmail.com>
Date: Fri, 6 Feb 2026 16:46:16 -0500
Subject: [PATCH 8/8] Resolve WIC nutritional risk fully in data, eliminate
 _imputed variable

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 policyengine_us_data/datasets/cps/cps.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index c6224e1c3..90dc3f03d 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -277,16 +277,17 @@ def add_takeup(self):
     rng = seeded_rng("would_claim_wic")
     data["would_claim_wic"] = rng.random(n_persons) < wic_takeup_rate_by_person
 
+    # WIC nutritional risk — fully resolved
     wic_risk_rates = load_take_up_rate(
         "wic_nutritional_risk", self.time_period
     )
     wic_risk_rate_by_person = np.array(
         [wic_risk_rates.get(c, 0) for c in wic_categories]
     )
-    rng = seeded_rng("wic_nutritional_risk_imputed")
-    data["wic_nutritional_risk_imputed"] = (
-        rng.random(n_persons) < wic_risk_rate_by_person
-    )
+    receives_wic = baseline.calculate("receives_wic").values
+    rng = seeded_rng("is_wic_at_nutritional_risk")
+    imputed_risk = rng.random(n_persons) < wic_risk_rate_by_person
+    data["is_wic_at_nutritional_risk"] = receives_wic | imputed_risk
 
     self.save_dataset(data)