From 61c8f8f271af40841a7d1a00d8f45ec4ba63f406 Mon Sep 17 00:00:00 2001
From: Vahid Ahmadi <va.vahidahmadi@gmail.com>
Date: Tue, 17 Feb 2026 14:53:25 +0000
Subject: [PATCH 1/2] Fix salary sacrifice headcount regression from uprating
 mismatch

PR #268 added Stage 2 imputation (records at exactly 2k) and headcount
calibration targets. The calibrator classified above/below 2k at uprated
2025 prices, but the saved h5 stores 2023 prices where the classification
differs (8.55% inflation flips boundary records). Now evaluates SS at
base-year prices before thresholding so calibration matches end-user
values. Also reduces Stage 2 target to 3M so the calibrator upweights
gently rather than overshooting.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 changelog_entry.yaml                          | 10 +++++++++
 .../datasets/imputations/salary_sacrifice.py  | 10 ++++++---
 .../tests/test_salary_sacrifice_headcount.py  | 11 ----------
 policyengine_uk_data/utils/loss.py            | 22 ++++++++++++++++---
 4 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29bb..92d0ca69c 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,10 @@
+- bump: patch
+  changes:
+    fixed:
+    - Fix salary sacrifice headcount regression caused by uprating mismatch. The
+      calibrator classified records above/below the 2k cap at uprated 2025 prices,
+      but the saved dataset stores values at 2023 prices where the classification
+      differs. Now evaluates SS amounts at base-year prices before applying the
+      threshold, so calibration matches what end-users see. Also reduces Stage 2
+      imputation target to let the calibrator upweight gently rather than overshoot.
+    - Remove xfail markers from salary sacrifice headcount tests.
diff --git a/policyengine_uk_data/datasets/imputations/salary_sacrifice.py b/policyengine_uk_data/datasets/imputations/salary_sacrifice.py
index 5d5ae657a..dce8a4fb9 100644
--- a/policyengine_uk_data/datasets/imputations/salary_sacrifice.py
+++ b/policyengine_uk_data/datasets/imputations/salary_sacrifice.py
@@ -194,15 +194,19 @@ def impute_salary_sacrifice(
     # Donor pool: employed pension contributors not already SS users
     is_donor = (employee_pension > 0) & ~has_ss & (employment_income > 0)
 
-    # Target ~4.3mn below-cap SS users (HMRC/ASHE estimate)
-    TARGET_BELOW_CAP = 4_300_000
+    # Create enough below-cap SS records for the calibrator to work
+    # with. The final 4.3mn target is hit by weight optimisation in
+    # loss.py; here we just need a plausible pool of records. Target
+    # ~70% of the final headcount so the calibrator can gently
+    # upweight rather than fight a large overshoot.
+    TARGET_BELOW_CAP = 3_000_000
     current_below_cap = (person_weight * below_cap_ss).sum()
     shortfall = max(0, TARGET_BELOW_CAP - current_below_cap)
 
     if shortfall > 0:
         donor_weighted = (person_weight * is_donor).sum()
         if donor_weighted > 0:
-            imputation_rate = min(0.8, shortfall / donor_weighted)
+            imputation_rate = min(0.5, shortfall / donor_weighted)
             rng = np.random.default_rng(seed=2024)
             newly_imputed = is_donor & (
                 rng.random(len(final_ss)) < imputation_rate
diff --git a/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py b/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py
index 10ee93da6..af0fdfd4c 100644
--- a/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py
+++ b/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py
@@ -5,14 +5,9 @@
 7.7mn total SS users (3.3mn above 2k cap, 4.3mn below 2k cap)
 """
 
-import pytest
-
 TOLERANCE = 0.15  # 15% relative tolerance
 
 
-@pytest.mark.xfail(
-    reason="Will pass after recalibration with new headcount targets"
-)
 def test_salary_sacrifice_total_users(baseline):
     """Test that total SS user count is close to 7.7mn."""
     ss = baseline.calculate(
@@ -33,9 +28,6 @@ def test_salary_sacrifice_total_users(baseline):
     )
 
 
-@pytest.mark.xfail(
-    reason="Will pass after recalibration with new headcount targets"
-)
 def test_salary_sacrifice_below_cap_users(baseline):
     """Test that below-cap (<=2k) SS users are close to 4.3mn."""
     ss = baseline.calculate(
@@ -57,9 +49,6 @@ def test_salary_sacrifice_below_cap_users(baseline):
     )
 
 
-@pytest.mark.xfail(
-    reason="Will pass after recalibration with new headcount targets"
-)
 def test_salary_sacrifice_above_cap_users(baseline):
     """Test that above-cap (>2k) SS users are close to 3.3mn."""
     ss = baseline.calculate(
diff --git a/policyengine_uk_data/utils/loss.py b/policyengine_uk_data/utils/loss.py
index 562897fd3..1ef9f4388 100644
--- a/policyengine_uk_data/utils/loss.py
+++ b/policyengine_uk_data/utils/loss.py
@@ -477,9 +477,25 @@ def pe_count(*variables):
     # Source: HMRC, "Salary sacrifice reform for pension contributions"
     # https://www.gov.uk/government/publications/salary-sacrifice-reform-for-pension-contributions-effective-from-6-april-2029
     # 7.7mn total SS users (3.3mn above £2k cap, 4.3mn below £2k cap)
-    ss_has_contributions = ss_contributions > 0
-    ss_below_cap = ss_has_contributions & (ss_contributions <= 2000)
-    ss_above_cap = ss_has_contributions & (ss_contributions > 2000)
+    # The £2,000 cap is defined at 2023-24 FRS prices. The dataset is
+    # uprated to 2025 for calibration then downrated back to 2023 for
+    # saving. To keep the above/below classification consistent across
+    # price years, evaluate SS amounts at 2023-24 base-year prices.
+    ss_uprating_factors = pd.read_csv(
+        STORAGE_FOLDER / "uprating_factors.csv"
+    ).set_index("Variable")
+    ss_price_adjustment = (
+        ss_uprating_factors.loc[
+            "pension_contributions_via_salary_sacrifice", "2023"
+        ]
+        / ss_uprating_factors.loc[
+            "pension_contributions_via_salary_sacrifice", str(time_period)
+        ]
+    )
+    ss_at_base_prices = ss_contributions * ss_price_adjustment
+    ss_has_contributions = ss_at_base_prices > 0
+    ss_below_cap = ss_has_contributions & (ss_at_base_prices <= 2000)
+    ss_above_cap = ss_has_contributions & (ss_at_base_prices > 2000)
 
     df["obr/salary_sacrifice_users_total"] = household_from_person(
         ss_has_contributions

From e981000be31eeb8ed6262e0b51741f89e5835916 Mon Sep 17 00:00:00 2001
From: Vahid Ahmadi <va.vahidahmadi@gmail.com>
Date: Tue, 17 Feb 2026 15:16:12 +0000
Subject: [PATCH 2/2] Stage 2: move full employee pension to SS for both
 above/below cap records

The previous Stage 2 capped all new SS records at 2k, putting them all
below-cap. At 2023 prices only ~1,308 QRF records were above-cap, not
enough for the calibrator to reach 3.3M. Now donors keep their full
employee pension amount so the natural above/below split provides
records in both categories.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 changelog_entry.yaml                          |  5 ++--
 .../datasets/imputations/salary_sacrifice.py  | 26 +++++++++----------
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index 92d0ca69c..b9ca1ba36 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -5,6 +5,7 @@
       calibrator classified records above/below the 2k cap at uprated 2025 prices,
       but the saved dataset stores values at 2023 prices where the classification
       differs. Now evaluates SS amounts at base-year prices before applying the
-      threshold, so calibration matches what end-users see. Also reduces Stage 2
-      imputation target to let the calibrator upweight gently rather than overshoot.
+      threshold, so calibration matches what end-users see. Stage 2 imputation now
+      moves the full employee pension amount to SS (instead of capping at 2k) so
+      both above-cap and below-cap records are created for the calibrator.
     - Remove xfail markers from salary sacrifice headcount tests.
diff --git a/policyengine_uk_data/datasets/imputations/salary_sacrifice.py b/policyengine_uk_data/datasets/imputations/salary_sacrifice.py
index dce8a4fb9..7769f7a5c 100644
--- a/policyengine_uk_data/datasets/imputations/salary_sacrifice.py
+++ b/policyengine_uk_data/datasets/imputations/salary_sacrifice.py
@@ -179,29 +179,28 @@ def impute_salary_sacrifice(
         imputed_ss,  # Use imputed for non-respondents
     )
 
-    # Stage 2: Headcount-targeted imputation for below-cap SS users.
+    # Stage 2: Headcount-targeted imputation for SS users.
     # ASHE data shows many more SS users than the FRS captures due to
     # self-reporting bias in auto-enrolment. Impute additional SS users
     # from pension contributors to create enough records for calibration
-    # to hit OBR headcount targets (7.7mn total, 4.3mn below £2,000).
+    # to hit OBR headcount targets (7.7mn total, 3.3mn above 2k,
+    # 4.3mn below 2k). Donors keep their full employee pension amount
+    # so those above 2k become above-cap records and the rest below-cap.
     person_weight = sim.calculate("person_weight").values
     employee_pension = dataset.person[
         "employee_pension_contributions"
     ].values.copy()
     has_ss = final_ss > 0
-    below_cap_ss = has_ss & (final_ss <= 2000)
 
     # Donor pool: employed pension contributors not already SS users
     is_donor = (employee_pension > 0) & ~has_ss & (employment_income > 0)
 
-    # Create enough below-cap SS records for the calibrator to work
-    # with. The final 4.3mn target is hit by weight optimisation in
-    # loss.py; here we just need a plausible pool of records. Target
-    # ~70% of the final headcount so the calibrator can gently
+    # Create enough SS records for the calibrator to work with.
+    # Target ~70% of the 7.7mn total so the calibrator can gently
     # upweight rather than fight a large overshoot.
-    TARGET_BELOW_CAP = 3_000_000
-    current_below_cap = (person_weight * below_cap_ss).sum()
-    shortfall = max(0, TARGET_BELOW_CAP - current_below_cap)
+    TARGET_TOTAL = 5_400_000
+    current_total = (person_weight * has_ss).sum()
+    shortfall = max(0, TARGET_TOTAL - current_total)
 
     if shortfall > 0:
         donor_weighted = (person_weight * is_donor).sum()
@@ -212,14 +211,15 @@ def impute_salary_sacrifice(
                 rng.random(len(final_ss)) < imputation_rate
             )
 
-            # Move up to £2,000 of employee pension to SS
-            ss_new = np.minimum(employee_pension, 2000.0)
+            # Move full employee pension to SS so the above/below
+            # 2k split reflects the natural pension distribution
+            ss_new = employee_pension.copy()
             final_ss = np.where(newly_imputed, ss_new, final_ss)
 
             # Reduce employee pension correspondingly
             dataset.person["employee_pension_contributions"] = np.where(
                 newly_imputed,
-                employee_pension - ss_new,
+                0.0,
                 employee_pension,
             )