From 61c8f8f271af40841a7d1a00d8f45ec4ba63f406 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Tue, 17 Feb 2026 14:53:25 +0000 Subject: [PATCH 1/2] Fix salary sacrifice headcount regression from uprating mismatch PR #268 added Stage 2 imputation (records at exactly 2k) and headcount calibration targets. The calibrator classified above/below 2k at uprated 2025 prices, but the saved h5 stores 2023 prices where the classification differs (8.55% inflation flips boundary records). Now evaluates SS at base-year prices before thresholding so calibration matches end-user values. Also reduces Stage 2 target to 3M so the calibrator upweights gently rather than overshooting. Co-Authored-By: Claude Opus 4.6 --- changelog_entry.yaml | 10 +++++++++ .../datasets/imputations/salary_sacrifice.py | 10 ++++++--- .../tests/test_salary_sacrifice_headcount.py | 11 ---------- policyengine_uk_data/utils/loss.py | 22 ++++++++++++++++--- 4 files changed, 36 insertions(+), 17 deletions(-) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb..92d0ca69c 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,10 @@ +- bump: patch + changes: + fixed: + - Fix salary sacrifice headcount regression caused by uprating mismatch. The + calibrator classified records above/below the 2k cap at uprated 2025 prices, + but the saved dataset stores values at 2023 prices where the classification + differs. Now evaluates SS amounts at base-year prices before applying the + threshold, so calibration matches what end-users see. Also reduces Stage 2 + imputation target to let the calibrator upweight gently rather than overshoot. + - Remove xfail markers from salary sacrifice headcount tests. diff --git a/policyengine_uk_data/datasets/imputations/salary_sacrifice.py b/policyengine_uk_data/datasets/imputations/salary_sacrifice.py index 5d5ae657a..dce8a4fb9 100644 --- a/policyengine_uk_data/datasets/imputations/salary_sacrifice.py +++ b/policyengine_uk_data/datasets/imputations/salary_sacrifice.py @@ -194,15 +194,19 @@ def impute_salary_sacrifice( # Donor pool: employed pension contributors not already SS users is_donor = (employee_pension > 0) & ~has_ss & (employment_income > 0) - # Target ~4.3mn below-cap SS users (HMRC/ASHE estimate) - TARGET_BELOW_CAP = 4_300_000 + # Create enough below-cap SS records for the calibrator to work + # with. The final 4.3mn target is hit by weight optimisation in + # loss.py; here we just need a plausible pool of records. Target + # ~70% of the final headcount so the calibrator can gently + # upweight rather than fight a large overshoot. + TARGET_BELOW_CAP = 3_000_000 current_below_cap = (person_weight * below_cap_ss).sum() shortfall = max(0, TARGET_BELOW_CAP - current_below_cap) if shortfall > 0: donor_weighted = (person_weight * is_donor).sum() if donor_weighted > 0: - imputation_rate = min(0.8, shortfall / donor_weighted) + imputation_rate = min(0.5, shortfall / donor_weighted) rng = np.random.default_rng(seed=2024) newly_imputed = is_donor & ( rng.random(len(final_ss)) < imputation_rate diff --git a/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py b/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py index 10ee93da6..af0fdfd4c 100644 --- a/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py +++ b/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py @@ -5,14 +5,9 @@ 7.7mn total SS users (3.3mn above 2k cap, 4.3mn below 2k cap) """ -import pytest - TOLERANCE = 0.15 # 15% relative tolerance -@pytest.mark.xfail( - reason="Will pass after recalibration with new headcount targets" -) def test_salary_sacrifice_total_users(baseline): """Test that total SS user count is close to 7.7mn.""" ss = baseline.calculate( @@ -33,9 +28,6 @@ def test_salary_sacrifice_total_users(baseline): ) -@pytest.mark.xfail( - reason="Will pass after recalibration with new headcount targets" -) def test_salary_sacrifice_below_cap_users(baseline): """Test that below-cap (<=2k) SS users are close to 4.3mn.""" ss = baseline.calculate( @@ -57,9 +49,6 @@ def test_salary_sacrifice_below_cap_users(baseline): ) -@pytest.mark.xfail( - reason="Will pass after recalibration with new headcount targets" -) def test_salary_sacrifice_above_cap_users(baseline): """Test that above-cap (>2k) SS users are close to 3.3mn.""" ss = baseline.calculate( diff --git a/policyengine_uk_data/utils/loss.py b/policyengine_uk_data/utils/loss.py index 562897fd3..1ef9f4388 100644 --- a/policyengine_uk_data/utils/loss.py +++ b/policyengine_uk_data/utils/loss.py @@ -477,9 +477,25 @@ def pe_count(*variables): # Source: HMRC, "Salary sacrifice reform for pension contributions" # https://www.gov.uk/government/publications/salary-sacrifice-reform-for-pension-contributions-effective-from-6-april-2029 # 7.7mn total SS users (3.3mn above £2k cap, 4.3mn below £2k cap) - ss_has_contributions = ss_contributions > 0 - ss_below_cap = ss_has_contributions & (ss_contributions <= 2000) - ss_above_cap = ss_has_contributions & (ss_contributions > 2000) + # The £2,000 cap is defined at 2023-24 FRS prices. The dataset is + # uprated to 2025 for calibration then downrated back to 2023 for + # saving. To keep the above/below classification consistent across + # price years, evaluate SS amounts at 2023-24 base-year prices. + ss_uprating_factors = pd.read_csv( + STORAGE_FOLDER / "uprating_factors.csv" + ).set_index("Variable") + ss_price_adjustment = ( + ss_uprating_factors.loc[ + "pension_contributions_via_salary_sacrifice", "2023" + ] + / ss_uprating_factors.loc[ + "pension_contributions_via_salary_sacrifice", str(time_period) + ] + ) + ss_at_base_prices = ss_contributions * ss_price_adjustment + ss_has_contributions = ss_at_base_prices > 0 + ss_below_cap = ss_has_contributions & (ss_at_base_prices <= 2000) + ss_above_cap = ss_has_contributions & (ss_at_base_prices > 2000) df["obr/salary_sacrifice_users_total"] = household_from_person( ss_has_contributions From e981000be31eeb8ed6262e0b51741f89e5835916 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Tue, 17 Feb 2026 15:16:12 +0000 Subject: [PATCH 2/2] Stage 2: move full employee pension to SS for both above/below cap records The previous Stage 2 capped all new SS records at 2k, putting them all below-cap. At 2023 prices only ~1,308 QRF records were above-cap, not enough for the calibrator to reach 3.3M. Now donors keep their full employee pension amount so the natural above/below split provides records in both categories. Co-Authored-By: Claude Opus 4.6 --- changelog_entry.yaml | 5 ++-- .../datasets/imputations/salary_sacrifice.py | 26 +++++++++---------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index 92d0ca69c..b9ca1ba36 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -5,6 +5,7 @@ calibrator classified records above/below the 2k cap at uprated 2025 prices, but the saved dataset stores values at 2023 prices where the classification differs. Now evaluates SS amounts at base-year prices before applying the - threshold, so calibration matches what end-users see. Also reduces Stage 2 - imputation target to let the calibrator upweight gently rather than overshoot. + threshold, so calibration matches what end-users see. Stage 2 imputation now + moves the full employee pension amount to SS (instead of capping at 2k) so + both above-cap and below-cap records are created for the calibrator. - Remove xfail markers from salary sacrifice headcount tests. diff --git a/policyengine_uk_data/datasets/imputations/salary_sacrifice.py b/policyengine_uk_data/datasets/imputations/salary_sacrifice.py index dce8a4fb9..7769f7a5c 100644 --- a/policyengine_uk_data/datasets/imputations/salary_sacrifice.py +++ b/policyengine_uk_data/datasets/imputations/salary_sacrifice.py @@ -179,29 +179,28 @@ def impute_salary_sacrifice( imputed_ss, # Use imputed for non-respondents ) - # Stage 2: Headcount-targeted imputation for below-cap SS users. + # Stage 2: Headcount-targeted imputation for SS users. # ASHE data shows many more SS users than the FRS captures due to # self-reporting bias in auto-enrolment. Impute additional SS users # from pension contributors to create enough records for calibration - # to hit OBR headcount targets (7.7mn total, 4.3mn below £2,000). + # to hit OBR headcount targets (7.7mn total, 3.3mn above 2k, + # 4.3mn below 2k). Donors keep their full employee pension amount + # so those above 2k become above-cap records and the rest below-cap. person_weight = sim.calculate("person_weight").values employee_pension = dataset.person[ "employee_pension_contributions" ].values.copy() has_ss = final_ss > 0 - below_cap_ss = has_ss & (final_ss <= 2000) # Donor pool: employed pension contributors not already SS users is_donor = (employee_pension > 0) & ~has_ss & (employment_income > 0) - # Create enough below-cap SS records for the calibrator to work - # with. The final 4.3mn target is hit by weight optimisation in - # loss.py; here we just need a plausible pool of records. Target - # ~70% of the final headcount so the calibrator can gently + # Create enough SS records for the calibrator to work with. + # Target ~70% of the 7.7mn total so the calibrator can gently # upweight rather than fight a large overshoot. - TARGET_BELOW_CAP = 3_000_000 - current_below_cap = (person_weight * below_cap_ss).sum() - shortfall = max(0, TARGET_BELOW_CAP - current_below_cap) + TARGET_TOTAL = 5_400_000 + current_total = (person_weight * has_ss).sum() + shortfall = max(0, TARGET_TOTAL - current_total) if shortfall > 0: donor_weighted = (person_weight * is_donor).sum() @@ -212,14 +211,15 @@ def impute_salary_sacrifice( rng.random(len(final_ss)) < imputation_rate ) - # Move up to £2,000 of employee pension to SS - ss_new = np.minimum(employee_pension, 2000.0) + # Move full employee pension to SS so the above/below + # 2k split reflects the natural pension distribution + ss_new = employee_pension.copy() final_ss = np.where(newly_imputed, ss_new, final_ss) # Reduce employee pension correspondingly dataset.person["employee_pension_contributions"] = np.where( newly_imputed, - employee_pension - ss_new, + 0.0, employee_pension, )