From de061fc166edbdbc83ba27931c9a8ad834bba8c3 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil@policyengine.org>
Date: Sun, 15 Feb 2026 13:40:07 +0000
Subject: [PATCH 1/6] Replace ad-hoc targets with structured registry and
 source modules

The old system had ~700 lines of hardcoded targets scattered across loss.py with no provenance. This replaces it with a targets registry (sources.yaml + per-source Python scrapers) that produces 570 national targets with source URLs, and factors local area data loading into dedicated source modules.

National: pydantic Target schema, YAML registry, build_loss_matrix.py computes household columns from registry. Local: 4 source modules (local_age, local_income, local_uc, local_la_extras) replace inline file reads in constituencies/loss.py and local_authorities/loss.py. Both calibrate.py files updated to import create_national_target_matrix from the new location.

Full pipeline runs end-to-end with zero skipped targets.

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
---
 .../datasets/create_datasets.py               |   4 +-
 .../local_areas/constituencies/calibrate.py   |   4 +-
 .../local_areas/constituencies/loss.py        | 144 ++-
 .../local_authorities/calibrate.py            |   4 +-
 .../local_areas/local_authorities/loss.py     | 424 +++------
 policyengine_uk_data/targets/__init__.py      |  15 +
 .../targets/build_loss_matrix.py              | 880 ++++++++++++++++++
 policyengine_uk_data/targets/registry.py      |  69 ++
 policyengine_uk_data/targets/schema.py        |  49 +
 policyengine_uk_data/targets/sources.yaml     |  43 +
 .../targets/sources/__init__.py               |   4 +
 policyengine_uk_data/targets/sources/dwp.py   | 265 ++++++
 .../targets/sources/hmrc_salary_sacrifice.py  | 135 +++
 .../targets/sources/hmrc_spi.py               | 297 ++++++
 .../targets/sources/housing.py                |  37 +
 .../targets/sources/local_age.py              | 100 ++
 .../targets/sources/local_income.py           |  96 ++
 .../targets/sources/local_la_extras.py        | 129 +++
 .../targets/sources/local_uc.py               |  42 +
 .../targets/sources/nts_vehicles.py           |  49 +
 policyengine_uk_data/targets/sources/obr.py   | 504 ++++++++++
 .../targets/sources/ons_demographics.py       | 331 +++++++
 .../targets/sources/ons_households.py         | 114 +++
 .../targets/sources/ons_savings.py            |  72 ++
 .../targets/sources/ons_tenure.py             | 119 +++
 .../targets/sources/scottish_government.py    |  37 +
 .../targets/sources/voa_council_tax.py        |  63 ++
 .../tests/test_target_registry.py             | 103 ++
 .../tests/test_vehicle_ownership.py           |   2 +-
 policyengine_uk_data/utils/loss.py            | 692 +-------------
 pyproject.toml                                |   2 +
 31 files changed, 3754 insertions(+), 1075 deletions(-)
 create mode 100644 policyengine_uk_data/targets/__init__.py
 create mode 100644 policyengine_uk_data/targets/build_loss_matrix.py
 create mode 100644 policyengine_uk_data/targets/registry.py
 create mode 100644 policyengine_uk_data/targets/schema.py
 create mode 100644 policyengine_uk_data/targets/sources.yaml
 create mode 100644 policyengine_uk_data/targets/sources/__init__.py
 create mode 100644 policyengine_uk_data/targets/sources/dwp.py
 create mode 100644 policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py
 create mode 100644 policyengine_uk_data/targets/sources/hmrc_spi.py
 create mode 100644 policyengine_uk_data/targets/sources/housing.py
 create mode 100644 policyengine_uk_data/targets/sources/local_age.py
 create mode 100644 policyengine_uk_data/targets/sources/local_income.py
 create mode 100644 policyengine_uk_data/targets/sources/local_la_extras.py
 create mode 100644 policyengine_uk_data/targets/sources/local_uc.py
 create mode 100644 policyengine_uk_data/targets/sources/nts_vehicles.py
 create mode 100644 policyengine_uk_data/targets/sources/obr.py
 create mode 100644 policyengine_uk_data/targets/sources/ons_demographics.py
 create mode 100644 policyengine_uk_data/targets/sources/ons_households.py
 create mode 100644 policyengine_uk_data/targets/sources/ons_savings.py
 create mode 100644 policyengine_uk_data/targets/sources/ons_tenure.py
 create mode 100644 policyengine_uk_data/targets/sources/scottish_government.py
 create mode 100644 policyengine_uk_data/targets/sources/voa_council_tax.py
 create mode 100644 policyengine_uk_data/tests/test_target_registry.py

diff --git a/policyengine_uk_data/datasets/create_datasets.py b/policyengine_uk_data/datasets/create_datasets.py
index 641644a4..ed969d07 100644
--- a/policyengine_uk_data/datasets/create_datasets.py
+++ b/policyengine_uk_data/datasets/create_datasets.py
@@ -119,7 +119,9 @@ def main():
             )
             from policyengine_uk_data.datasets.local_areas.constituencies.loss import (
                 create_constituency_target_matrix,
-                create_national_target_matrix,
+            )
+            from policyengine_uk_data.targets.build_loss_matrix import (
+                create_target_matrix as create_national_target_matrix,
             )
             from policyengine_uk_data.datasets.local_areas.constituencies.calibrate import (
                 get_performance,
diff --git a/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py b/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py
index b264559b..6ea99677 100644
--- a/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py
+++ b/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py
@@ -2,7 +2,9 @@
 from policyengine_uk_data.utils.calibrate import calibrate_local_areas
 from policyengine_uk_data.datasets.local_areas.constituencies.loss import (
     create_constituency_target_matrix,
-    create_national_target_matrix,
+)
+from policyengine_uk_data.targets.build_loss_matrix import (
+    create_target_matrix as create_national_target_matrix,
 )
 from policyengine_uk_data.storage import STORAGE_FOLDER
 from policyengine_uk.data import UKSingleYearDataset
diff --git a/policyengine_uk_data/datasets/local_areas/constituencies/loss.py b/policyengine_uk_data/datasets/local_areas/constituencies/loss.py
index 0397f0d9..0cbd291d 100644
--- a/policyengine_uk_data/datasets/local_areas/constituencies/loss.py
+++ b/policyengine_uk_data/datasets/local_areas/constituencies/loss.py
@@ -1,19 +1,36 @@
+"""Constituency-level calibration target matrix.
+
+Constructs the (matrix, y, country_mask) triple for calibrating
+household weights across 650 parliamentary constituencies. Target
+data is loaded from source modules in the targets system.
+
+Sources:
+- Age: ONS mid-year population estimates
+- Income: HMRC SPI table 3.15
+- UC: DWP Stat-Xplore
+"""
+
 from policyengine_uk import Microsimulation
 import pandas as pd
 import numpy as np
-from pathlib import Path
 
-from policyengine_uk_data.utils.loss import (
-    create_target_matrix as create_national_target_matrix,
-)
+from policyengine_uk.data import UKSingleYearDataset
 from policyengine_uk_data.storage import STORAGE_FOLDER
 from policyengine_uk_data.datasets.local_areas.constituencies.boundary_changes.mapping_matrix import (
     mapping_matrix,
 )
-from policyengine_uk.data import UKSingleYearDataset
-from policyengine_uk_data.utils.uc_data import uc_pc_households
-
-FOLDER = Path(__file__).parent
+from policyengine_uk_data.targets.sources.local_age import (
+    get_constituency_age_targets,
+    get_uk_total_population,
+)
+from policyengine_uk_data.targets.sources.local_income import (
+    get_constituency_income_targets,
+    get_national_income_projections,
+    INCOME_VARIABLES,
+)
+from policyengine_uk_data.targets.sources.local_uc import (
+    get_constituency_uc_targets,
+)
 
 
 def create_constituency_target_matrix(
@@ -23,26 +40,18 @@ def create_constituency_target_matrix(
 ):
     if time_period is None:
         time_period = dataset.time_period
-    ages = pd.read_csv(FOLDER / "targets" / "age.csv")
-    national_demographics = pd.read_csv(STORAGE_FOLDER / "demographics.csv")
-    incomes = pd.read_csv(FOLDER / "targets" / "spi_by_constituency.csv")
 
     sim = Microsimulation(dataset=dataset, reform=reform)
     sim.default_calculation_period = dataset.time_period
 
-    national_incomes = pd.read_csv(STORAGE_FOLDER / "incomes_projection.csv")
-    national_incomes = national_incomes[
-        national_incomes.year
-        == max(national_incomes.year.min(), int(dataset.time_period))
-    ]
-
     matrix = pd.DataFrame()
     y = pd.DataFrame()
 
-    INCOME_VARIABLES = [
-        "self_employment_income",
-        "employment_income",
-    ]
+    # ── Income targets ─────────────────────────────────────────────
+    incomes = get_constituency_income_targets()
+    national_incomes = get_national_income_projections(
+        int(dataset.time_period)
+    )
 
     for income_variable in INCOME_VARIABLES:
         income_values = sim.calculate(income_variable).values
@@ -56,84 +65,50 @@ def create_constituency_target_matrix(
             (national_incomes.total_income_lower_bound == 12_570)
             & (national_incomes.total_income_upper_bound == np.inf)
         ][income_variable + "_amount"].iloc[0]
-        national_consistency_adjustment_factor = (
-            national_target / local_target_sum
-        )
-        y[f"hmrc/{income_variable}/amount"] = (
-            local_targets * national_consistency_adjustment_factor
-        )
+        adjustment = national_target / local_target_sum
+        y[f"hmrc/{income_variable}/amount"] = local_targets * adjustment
+
         matrix[f"hmrc/{income_variable}/count"] = sim.map_result(
             (income_values != 0) * in_spi_frame, "person", "household"
         )
-        local_targets = incomes[f"{income_variable}_count"].values
-        local_target_sum = local_targets.sum()
-        national_target = national_incomes[
-            (national_incomes.total_income_lower_bound == 12_570)
-            & (national_incomes.total_income_upper_bound == np.inf)
-        ][income_variable + "_count"].iloc[0]
         y[f"hmrc/{income_variable}/count"] = (
-            incomes[f"{income_variable}_count"].values
-            * national_consistency_adjustment_factor
+            incomes[f"{income_variable}_count"].values * adjustment
         )
 
-    uk_total_population = (
-        national_demographics[national_demographics.name == "uk_population"][
-            str(time_period)
-        ].values[0]
-        * 1e6
-    )
+    # ── Age targets ────────────────────────────────────────────────
+    age_targets = get_constituency_age_targets()
+    uk_total_population = get_uk_total_population(int(time_period))
 
     age = sim.calculate("age").values
     targets_total_pop = 0
-    for lower_age in range(0, 80, 10):
-        upper_age = lower_age + 10
-
-        in_age_band = (age >= lower_age) & (age < upper_age)
-
-        age_str = f"{lower_age}_{upper_age}"
-        matrix[f"age/{age_str}"] = sim.map_result(
-            in_age_band, "person", "household"
-        )
-
-        age_count = ages[
-            [str(age) for age in range(lower_age, upper_age)]
-        ].sum(axis=1)
-
-        age_str = f"{lower_age}_{upper_age}"
-        y[f"age/{age_str}"] = age_count.values
-        targets_total_pop += age_count.values.sum()
-
-    # Adjust for consistency
-    for lower_age in range(0, 80, 10):
-        upper_age = lower_age + 10
-
-        in_age_band = (age >= lower_age) & (age < upper_age)
-
-        age_str = f"{lower_age}_{upper_age}"
-        y[f"age/{age_str}"] *= uk_total_population / targets_total_pop * 0.9
-
-    # UC household count by constituency
-    y["uc_households"] = uc_pc_households.household_count.values
+    age_cols = [c for c in age_targets.columns if c.startswith("age/")]
+    for col in age_cols:
+        lower, upper = col.removeprefix("age/").split("_")
+        lower, upper = int(lower), int(upper)
+        in_band = (age >= lower) & (age < upper)
+        matrix[col] = sim.map_result(in_band, "person", "household")
+        y[col] = age_targets[col].values
+        targets_total_pop += age_targets[col].values.sum()
+
+    # National consistency adjustment
+    for col in age_cols:
+        y[col] *= uk_total_population / targets_total_pop * 0.9
+
+    # ── UC targets ─────────────────────────────────────────────────
+    y["uc_households"] = get_constituency_uc_targets().values
     matrix["uc_households"] = sim.map_result(
         (sim.calculate("universal_credit").values > 0).astype(int),
         "benunit",
         "household",
     )
 
+    # ── Boundary mapping (2010 → 2024) ────────────────────────────
     const_2024 = pd.read_csv(STORAGE_FOLDER / "constituencies_2024.csv")
-    const_2010 = pd.read_csv(STORAGE_FOLDER / "constituencies_2010.csv")
-
-    y_2010 = y.copy()
-    y_2010["name"] = const_2010["name"].values
 
     y_columns = list(y.columns)
-    y_values = mapping_matrix @ y.values  # Transform to 2024 constituencies
-
+    y_values = mapping_matrix @ y.values
     y = pd.DataFrame(y_values, columns=y_columns)
 
-    y_2024 = y.copy()
-    y_2024["name"] = const_2024["name"].values
-
     country_mask = create_country_mask(
         household_countries=sim.calculate("country").values,
         codes=const_2024.code,
@@ -144,10 +119,8 @@ def create_constituency_target_matrix(
 def create_country_mask(
     household_countries: np.ndarray, codes: pd.Series
 ) -> np.ndarray:
-    # Create a matrix R to accompany the loss matrix M s.t. (W x M) x R = Y_
-    # where Y_ is the target matrix for the country where no target is constructed from weights from a different country.
-
-    constituency_countries = codes.apply(lambda code: code[0]).map(
+    """Country mask: R[i,j] = 1 iff household j is in same country as area i."""
+    area_countries = codes.apply(lambda code: code[0]).map(
         {
             "E": "ENGLAND",
             "W": "WALES",
@@ -155,10 +128,7 @@ def create_country_mask(
             "N": "NORTHERN_IRELAND",
         }
     )
-
     r = np.zeros((len(codes), len(household_countries)))
-
     for i in range(len(codes)):
-        r[i] = household_countries == constituency_countries[i]
-
+        r[i] = household_countries == area_countries.iloc[i]
     return r
diff --git a/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py b/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py
index f6a8d7dc..588f2955 100644
--- a/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py
+++ b/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py
@@ -2,7 +2,9 @@
 from policyengine_uk_data.utils.calibrate import calibrate_local_areas
 from policyengine_uk_data.datasets.local_areas.local_authorities.loss import (
     create_local_authority_target_matrix,
-    create_national_target_matrix,
+)
+from policyengine_uk_data.targets.build_loss_matrix import (
+    create_target_matrix as create_national_target_matrix,
 )
 from policyengine_uk_data.storage import STORAGE_FOLDER
 from policyengine_uk.data import UKSingleYearDataset
diff --git a/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py b/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py
index 26e58a6e..177b2883 100644
--- a/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py
+++ b/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py
@@ -1,60 +1,44 @@
+"""Local authority calibration target matrix.
+
+Constructs the (matrix, y, country_mask) triple for calibrating
+household weights across 360 local authorities. Target data is
+loaded from source modules in the targets system.
+
+Sources:
+- Age: ONS mid-year population estimates
+- Income: HMRC SPI table 3.15
+- UC: DWP Stat-Xplore
+- ONS income: ONS small area income estimates
+- Tenure: English Housing Survey
+- Private rent: VOA/ONS private rental market statistics
+"""
+
 from policyengine_uk import Microsimulation
 import pandas as pd
 import numpy as np
-from pathlib import Path
 
-from policyengine_uk_data.utils.loss import (
-    create_target_matrix as create_national_target_matrix,
-)
-from policyengine_uk_data.storage import STORAGE_FOLDER
 from policyengine_uk.data import UKSingleYearDataset
-from policyengine_uk_data.utils.uc_data import uc_la_households
-
-FOLDER = Path(__file__).parent
-
-# Uprating factors from FYE 2020 to 2025 (OBR Nov 2025 EFO)
-# RHDI index: 1985.1 (2025-26) / 1467.6 (2020-21) = 1.352
-UPRATING_NET_INCOME_BHC_2020_TO_2025 = 1985.1 / 1467.6
-# House price index: 103.5 (2025-26) / 84.9 (2020-21) = 1.219
-UPRATING_HOUSING_COSTS_2020_TO_2025 = 103.5 / 84.9
-
-
-def load_ons_la_income_targets() -> pd.DataFrame:
-    """Load ONS income estimates by local authority.
-
-    Returns a DataFrame with columns: la_code, total_income, net_income_bhc, net_income_ahc
-    (mean income per household, FYE 2020)
-    """
-    xlsx = pd.ExcelFile(STORAGE_FOLDER / "local_authority_ons_income.xlsx")
-
-    def load_sheet(sheet_name: str, value_col: str) -> pd.DataFrame:
-        df = pd.read_excel(xlsx, sheet_name=sheet_name, header=3)
-        df.columns = [
-            "msoa_code",
-            "msoa_name",
-            "la_code",
-            "la_name",
-            "region_code",
-            "region_name",
-            value_col,
-            "upper_ci",
-            "lower_ci",
-            "ci_width",
-        ]
-        df = df.iloc[1:].dropna(subset=["msoa_code"])
-        df[value_col] = pd.to_numeric(df[value_col])
-        return df[["la_code", value_col]]
-
-    total = load_sheet("Total annual income", "total_income")
-    bhc = load_sheet("Net income before housing costs", "net_income_bhc")
-    ahc = load_sheet("Net income after housing costs", "net_income_ahc")
-
-    # Group by LA to get mean income per household
-    la_total = total.groupby("la_code")["total_income"].mean().reset_index()
-    la_bhc = bhc.groupby("la_code")["net_income_bhc"].mean().reset_index()
-    la_ahc = ahc.groupby("la_code")["net_income_ahc"].mean().reset_index()
-
-    return la_total.merge(la_bhc, on="la_code").merge(la_ahc, on="la_code")
+from policyengine_uk_data.storage import STORAGE_FOLDER
+from policyengine_uk_data.targets.sources.local_age import (
+    get_la_age_targets,
+    get_uk_total_population,
+)
+from policyengine_uk_data.targets.sources.local_income import (
+    get_la_income_targets,
+    get_national_income_projections,
+    INCOME_VARIABLES,
+)
+from policyengine_uk_data.targets.sources.local_uc import (
+    get_la_uc_targets,
+)
+from policyengine_uk_data.targets.sources.local_la_extras import (
+    load_ons_la_income,
+    load_household_counts,
+    load_tenure_data,
+    load_private_rents,
+    UPRATING_NET_INCOME_BHC_2020_TO_2025,
+    UPRATING_HOUSING_COSTS_2020_TO_2025,
+)
 
 
 def create_local_authority_target_matrix(
@@ -64,8 +48,7 @@ def create_local_authority_target_matrix(
 ):
     if time_period is None:
         time_period = dataset.time_period
-    ages = pd.read_csv(FOLDER / "targets" / "age.csv")
-    incomes = pd.read_csv(FOLDER / "targets" / "spi_by_la.csv")
+
     la_codes = pd.read_csv(STORAGE_FOLDER / "local_authorities_2021.csv")
 
     sim = Microsimulation(dataset=dataset, reform=reform)
@@ -75,16 +58,11 @@ def create_local_authority_target_matrix(
     matrix = pd.DataFrame()
     y = pd.DataFrame()
 
-    INCOME_VARIABLES = [
-        "self_employment_income",
-        "employment_income",
-    ]
-
-    national_incomes = pd.read_csv(STORAGE_FOLDER / "incomes_projection.csv")
-    national_incomes = national_incomes[
-        national_incomes.year
-        == max(national_incomes.year.min(), int(dataset.time_period))
-    ]
+    # ── Income targets ─────────────────────────────────────────────
+    incomes = get_la_income_targets()
+    national_incomes = get_national_income_projections(
+        int(dataset.time_period)
+    )
 
     for income_variable in INCOME_VARIABLES:
         income_values = sim.calculate(income_variable).values
@@ -98,107 +76,66 @@ def create_local_authority_target_matrix(
             (national_incomes.total_income_lower_bound == 12_570)
             & (national_incomes.total_income_upper_bound == np.inf)
         ][income_variable + "_amount"].iloc[0]
-        national_consistency_adjustment_factor = (
-            national_target / local_target_sum
-        )
-        y[f"hmrc/{income_variable}/amount"] = (
-            local_targets * national_consistency_adjustment_factor
-        )
+        adjustment = national_target / local_target_sum
+        y[f"hmrc/{income_variable}/amount"] = local_targets * adjustment
+
         matrix[f"hmrc/{income_variable}/count"] = sim.map_result(
             (income_values != 0) * in_spi_frame, "person", "household"
         )
-        local_targets = incomes[f"{income_variable}_count"].values
-        local_target_sum = local_targets.sum()
-        national_target = national_incomes[
-            (national_incomes.total_income_lower_bound == 12_570)
-            & (national_incomes.total_income_upper_bound == np.inf)
-        ][income_variable + "_count"].iloc[0]
         y[f"hmrc/{income_variable}/count"] = (
-            incomes[f"{income_variable}_count"].values
-            * national_consistency_adjustment_factor
+            incomes[f"{income_variable}_count"].values * adjustment
         )
 
-    age = sim.calculate("age").values
-    national_demographics = pd.read_csv(STORAGE_FOLDER / "demographics.csv")
-    uk_total_population = (
-        national_demographics[national_demographics.name == "uk_population"][
-            str(time_period)
-        ].values[0]
-        * 1e6
-    )
+    # ── Age targets ────────────────────────────────────────────────
+    age_targets = get_la_age_targets()
+    uk_total_population = get_uk_total_population(int(time_period))
 
     age = sim.calculate("age").values
     targets_total_pop = 0
-    for lower_age in range(0, 80, 10):
-        upper_age = lower_age + 10
-
-        in_age_band = (age >= lower_age) & (age < upper_age)
-
-        age_str = f"{lower_age}_{upper_age}"
-        matrix[f"age/{age_str}"] = sim.map_result(
-            in_age_band, "person", "household"
-        )
-
-        age_count = ages[
-            [str(age) for age in range(lower_age, upper_age)]
-        ].sum(axis=1)
-
-        age_str = f"{lower_age}_{upper_age}"
-        y[f"age/{age_str}"] = age_count.values
-        targets_total_pop += age_count.values.sum()
-
-    # Adjust for consistency
-    for lower_age in range(0, 80, 10):
-        upper_age = lower_age + 10
-
-        in_age_band = (age >= lower_age) & (age < upper_age)
-
-        age_str = f"{lower_age}_{upper_age}"
-        y[f"age/{age_str}"] *= uk_total_population / targets_total_pop * 0.9
-
-    # UC household count by local authority
-    y["uc_households"] = uc_la_households.household_count.values
+    age_cols = [c for c in age_targets.columns if c.startswith("age/")]
+    for col in age_cols:
+        lower, upper = col.removeprefix("age/").split("_")
+        lower, upper = int(lower), int(upper)
+        in_band = (age >= lower) & (age < upper)
+        matrix[col] = sim.map_result(in_band, "person", "household")
+        y[col] = age_targets[col].values
+        targets_total_pop += age_targets[col].values.sum()
+
+    for col in age_cols:
+        y[col] *= uk_total_population / targets_total_pop * 0.9
+
+    # ── UC targets ─────────────────────────────────────────────────
+    y["uc_households"] = get_la_uc_targets().values
     matrix["uc_households"] = sim.map_result(
         (sim.calculate("universal_credit").values > 0).astype(int),
         "benunit",
         "household",
     )
 
-    # ONS income targets by local authority
-    # ONS definitions:
-    #   total_income (ONS) = household_market_income + household_benefits (PE)
-    #   net_income_bhc (ONS) = hbai_household_net_income (PE)
-    #   net_income_ahc (ONS) = hbai_household_net_income_ahc (PE)
-    ons_income = load_ons_la_income_targets()
-    households_by_la = pd.read_excel(
-        STORAGE_FOLDER / "la_count_households.xlsx", sheet_name="Dataset"
-    )
-    households_by_la.columns = ["la_code", "la_name", "households"]
+    # ── ONS income targets ─────────────────────────────────────────
+    ons_income = load_ons_la_income()
+    households_by_la = load_household_counts()
 
-    # Merge ONS income with our LA codes to get targets aligned
     ons_merged = la_codes.merge(
         ons_income, left_on="code", right_on="la_code", how="left"
     ).merge(
-        households_by_la[["la_code", "households"]],
+        households_by_la,
         left_on="code",
         right_on="la_code",
         how="left",
         suffixes=("", "_hh"),
     )
 
-    # Calculate PE household income variables
     hbai_net_income = sim.calculate("equiv_hbai_household_net_income").values
     hbai_net_income_ahc = sim.calculate(
         "equiv_hbai_household_net_income_ahc"
     ).values
     housing_costs = hbai_net_income - hbai_net_income_ahc
 
-    # Add to matrix (household-level values, will be summed with weights)
     matrix["ons/equiv_net_income_bhc"] = hbai_net_income
     matrix["ons/equiv_net_income_ahc"] = hbai_net_income_ahc
     matrix["ons/equiv_housing_costs"] = housing_costs
 
-    # Calculate LA-level targets: mean income * households, uprated to 2025
     ons_merged["equiv_net_income_bhc_target"] = (
         ons_merged["net_income_bhc"]
         * ons_merged["households"]
@@ -213,205 +150,93 @@ def create_local_authority_target_matrix(
         - ons_merged["equiv_housing_costs_target"]
     )
 
-    country_mask = create_country_mask(
-        household_countries=sim.calculate("country").values,
-        codes=la_codes.code,
-    )
-
-    # For LAs without ONS data (or without household counts), use national
-    # average scaled by LA household count
     has_ons_data = (
-        ons_merged["net_income_bhc"].notna() & ons_merged["households"].notna()
+        ons_merged["net_income_bhc"].notna()
+        & ons_merged["households"].notna()
     ).values
-    # For LAs without household data, use equal share (1/360) as fallback
     total_households = ons_merged["households"].sum()
-    equal_share = 1 / len(la_codes)
     la_household_share = np.where(
         ons_merged["households"].notna(),
         ons_merged["households"].values / total_households,
-        equal_share,
+        1 / len(la_codes),
     )
 
-    # National totals (weighted sum across all households)
-    national_net_income_bhc = (original_weights * hbai_net_income).sum()
-    national_net_income_ahc = (original_weights * hbai_net_income_ahc).sum()
-    national_housing_costs = (original_weights * housing_costs).sum()
-
-    # Default targets = national total * LA's share of households
-    default_net_income_bhc = national_net_income_bhc * la_household_share
-    default_net_income_ahc = national_net_income_ahc * la_household_share
-    default_housing_costs = national_housing_costs * la_household_share
+    national_bhc = (original_weights * hbai_net_income).sum()
+    national_ahc = (original_weights * hbai_net_income_ahc).sum()
+    national_hc = (original_weights * housing_costs).sum()
 
     y["ons/equiv_net_income_bhc"] = np.where(
         has_ons_data,
         ons_merged["equiv_net_income_bhc_target"].values,
-        default_net_income_bhc,
+        national_bhc * la_household_share,
     )
     y["ons/equiv_net_income_ahc"] = np.where(
         has_ons_data,
         ons_merged["equiv_net_income_ahc_target"].values,
-        default_net_income_ahc,
+        national_ahc * la_household_share,
     )
     y["ons/equiv_housing_costs"] = np.where(
         has_ons_data,
         ons_merged["equiv_housing_costs_target"].values,
-        default_housing_costs,
+        national_hc * la_household_share,
     )
 
-    # Tenure type targets by local authority
-    tenure_data = pd.read_excel(
-        STORAGE_FOLDER / "la_tenure.xlsx", sheet_name="data download"
-    )
-    tenure_data.columns = [
-        "region_code",
-        "region_name",
-        "la_code",
-        "la_name",
-        "owned_outright_pct",
-        "owned_mortgage_pct",
-        "private_rent_pct",
-        "social_rent_pct",
-    ]
-
-    # Merge with LA codes and households
+    # ── Tenure targets ─────────────────────────────────────────────
+    tenure_data = load_tenure_data()
+
     tenure_merged = la_codes.merge(
-        tenure_data[
-            [
-                "la_code",
-                "owned_outright_pct",
-                "owned_mortgage_pct",
-                "private_rent_pct",
-                "social_rent_pct",
-            ]
-        ],
-        left_on="code",
-        right_on="la_code",
-        how="left",
+        tenure_data, left_on="code", right_on="la_code", how="left"
     ).merge(
-        households_by_la[["la_code", "households"]],
+        households_by_la,
         left_on="code",
         right_on="la_code",
         how="left",
         suffixes=("", "_hh"),
     )
 
-    # Calculate household counts by tenure type
     tenure_type = sim.calculate("tenure_type").values
-
-    # Matrix columns for tenure (1 if household has that tenure type)
-    matrix["tenure/owned_outright"] = (tenure_type == "OWNED_OUTRIGHT").astype(
-        float
-    )
+    matrix["tenure/owned_outright"] = (
+        tenure_type == "OWNED_OUTRIGHT"
+    ).astype(float)
     matrix["tenure/owned_mortgage"] = (
         tenure_type == "OWNED_WITH_MORTGAGE"
     ).astype(float)
-    matrix["tenure/private_rent"] = (tenure_type == "RENT_PRIVATELY").astype(
-        float
-    )
+    matrix["tenure/private_rent"] = (
+        tenure_type == "RENT_PRIVATELY"
+    ).astype(float)
     matrix["tenure/social_rent"] = (
-        (tenure_type == "RENT_FROM_COUNCIL") | (tenure_type == "RENT_FROM_HA")
+        (tenure_type == "RENT_FROM_COUNCIL")
+        | (tenure_type == "RENT_FROM_HA")
     ).astype(float)
 
-    # Calculate targets: percentage * households
-    tenure_merged["owned_outright_target"] = (
-        tenure_merged["owned_outright_pct"] / 100 * tenure_merged["households"]
-    )
-    tenure_merged["owned_mortgage_target"] = (
-        tenure_merged["owned_mortgage_pct"] / 100 * tenure_merged["households"]
-    )
-    tenure_merged["private_rent_target"] = (
-        tenure_merged["private_rent_pct"] / 100 * tenure_merged["households"]
-    )
-    tenure_merged["social_rent_target"] = (
-        tenure_merged["social_rent_pct"] / 100 * tenure_merged["households"]
-    )
-
-    # For LAs without tenure data (or without household counts), use national
-    # average scaled by LA household count
-    has_tenure_data = (
+    has_tenure = (
         tenure_merged["owned_outright_pct"].notna()
         & tenure_merged["households"].notna()
     ).values
 
-    # National totals for each tenure type
-    national_owned_outright = (
-        original_weights * matrix["tenure/owned_outright"].values
-    ).sum()
-    national_owned_mortgage = (
-        original_weights * matrix["tenure/owned_mortgage"].values
-    ).sum()
-    national_private_rent = (
-        original_weights * matrix["tenure/private_rent"].values
-    ).sum()
-    national_social_rent = (
-        original_weights * matrix["tenure/social_rent"].values
-    ).sum()
-
-    # Default targets = national total * LA's share of households
-    default_owned_outright = national_owned_outright * la_household_share
-    default_owned_mortgage = national_owned_mortgage * la_household_share
-    default_private_rent = national_private_rent * la_household_share
-    default_social_rent = national_social_rent * la_household_share
-
-    y["tenure/owned_outright"] = np.where(
-        has_tenure_data,
-        tenure_merged["owned_outright_target"].values,
-        default_owned_outright,
-    )
-    y["tenure/owned_mortgage"] = np.where(
-        has_tenure_data,
-        tenure_merged["owned_mortgage_target"].values,
-        default_owned_mortgage,
-    )
-    y["tenure/private_rent"] = np.where(
-        has_tenure_data,
-        tenure_merged["private_rent_target"].values,
-        default_private_rent,
-    )
-    y["tenure/social_rent"] = np.where(
-        has_tenure_data,
-        tenure_merged["social_rent_target"].values,
-        default_social_rent,
-    )
+    for tenure_key, pct_col in [
+        ("owned_outright", "owned_outright_pct"),
+        ("owned_mortgage", "owned_mortgage_pct"),
+        ("private_rent", "private_rent_pct"),
+        ("social_rent", "social_rent_pct"),
+    ]:
+        targets = (
+            tenure_merged[pct_col] / 100 * tenure_merged["households"]
+        )
+        national = (
+            original_weights * matrix[f"tenure/{tenure_key}"].values
+        ).sum()
+        y[f"tenure/{tenure_key}"] = np.where(
+            has_tenure, targets.values, national * la_household_share
+        )
 
-    # Private rent amounts by local authority
-    rent_data = pd.read_excel(
-        STORAGE_FOLDER / "la_private_rents_median.xlsx",
-        sheet_name="Figure 3",
-        header=5,
-    )
-    rent_data.columns = [
-        "col0",
-        "la_code_old",
-        "area_code",
-        "area_name",
-        "room",
-        "studio",
-        "one_bed",
-        "two_bed",
-        "three_bed",
-        "four_plus",
-        "median_monthly_rent",
-    ]
-    # Filter to LA rows and convert to numeric
-    rent_data = rent_data[
-        rent_data["area_code"].astype(str).str.match(r"^E0[6789]")
-    ]
-    rent_data["median_monthly_rent"] = pd.to_numeric(
-        rent_data["median_monthly_rent"], errors="coerce"
-    )
-    # Convert to annual rent
-    rent_data["median_annual_rent"] = rent_data["median_monthly_rent"] * 12
+    # ── Private rent amounts ───────────────────────────────────────
+    rent_data = load_private_rents()
 
-    # Add rent data to tenure_merged (which already has tenure pcts and households)
     tenure_merged = tenure_merged.merge(
-        rent_data[["area_code", "median_annual_rent"]],
-        left_on="code",
-        right_on="area_code",
-        how="left",
+        rent_data, left_on="code", right_on="area_code", how="left"
     )
 
-    # Calculate private rent variable for matrix (rent for private renters, 0 otherwise)
     is_private_renter = (tenure_type == "RENT_PRIVATELY").astype(float)
     benunit_rent = sim.calculate("benunit_rent").values
     household_rent = sim.map_result(benunit_rent, "benunit", "household")
@@ -419,29 +244,29 @@ def create_local_authority_target_matrix(
 
     matrix["rent/private_rent"] = private_rent_amount
 
-    # Target = median rent (assumed = mean) * number of private renting households
-    # Number of private renters = households * private_rent_pct (from tenure data)
     tenure_merged["private_rent_target"] = (
         tenure_merged["median_annual_rent"]
-        * tenure_merged["private_rent_pct"]
-        / 100
+        * tenure_merged["private_rent_pct"] / 100
         * tenure_merged["households"]
     )
 
-    # For LAs without rent data (need rent, tenure, and household data), use
-    # national average scaled by LA household share
-    has_rent_data = (
+    has_rent = (
         tenure_merged["median_annual_rent"].notna()
         & tenure_merged["private_rent_pct"].notna()
         & tenure_merged["households"].notna()
     ).values
-    national_private_rent = (original_weights * private_rent_amount).sum()
-    default_private_rent_amount = national_private_rent * la_household_share
+    national_rent = (original_weights * private_rent_amount).sum()
 
     y["rent/private_rent"] = np.where(
-        has_rent_data,
+        has_rent,
         tenure_merged["private_rent_target"].values,
-        default_private_rent_amount,
+        national_rent * la_household_share,
+    )
+
+    # ── Country mask ───────────────────────────────────────────────
+    country_mask = create_country_mask(
+        household_countries=sim.calculate("country").values,
+        codes=la_codes.code,
     )
 
     return matrix, y, country_mask
@@ -450,10 +275,8 @@ def create_local_authority_target_matrix(
 def create_country_mask(
     household_countries: np.ndarray, codes: pd.Series
 ) -> np.ndarray:
-    # Create a matrix R to accompany the loss matrix M s.t. (W x M) x R = Y_
-    # where Y_ is the target matrix for the country where no target is constructed from weights from a different country.
-
-    constituency_countries = codes.apply(lambda code: code[0]).map(
+    """Country mask: R[i,j] = 1 iff household j is in same country as area i."""
+    area_countries = codes.apply(lambda code: code[0]).map(
         {
             "E": "ENGLAND",
             "W": "WALES",
@@ -461,10 +284,7 @@ def create_country_mask(
             "N": "NORTHERN_IRELAND",
         }
     )
-
     r = np.zeros((len(codes), len(household_countries)))
-
     for i in range(len(codes)):
-        r[i] = household_countries == constituency_countries[i]
-
+        r[i] = household_countries == area_countries.iloc[i]
     return r
diff --git a/policyengine_uk_data/targets/__init__.py b/policyengine_uk_data/targets/__init__.py
new file mode 100644
index 00000000..1b50479a
--- /dev/null
+++ b/policyengine_uk_data/targets/__init__.py
@@ -0,0 +1,15 @@
+"""Targets system: structured, source-traceable calibration targets."""
+
+from policyengine_uk_data.targets.registry import get_all_targets
+from policyengine_uk_data.targets.schema import (
+    GeographicLevel,
+    Target,
+    Unit,
+)
+
+__all__ = [
+    "get_all_targets",
+    "GeographicLevel",
+    "Target",
+    "Unit",
+]
diff --git a/policyengine_uk_data/targets/build_loss_matrix.py b/policyengine_uk_data/targets/build_loss_matrix.py
new file mode 100644
index 00000000..6b366594
--- /dev/null
+++ b/policyengine_uk_data/targets/build_loss_matrix.py
@@ -0,0 +1,880 @@
+"""Build calibration loss matrices from the targets registry.
+
+Bridges the targets system to the calibration pipeline by converting
+each Target into a household-level column vector and a scalar target
+value, producing the (matrix, targets) pair that the weight optimiser
+expects.
+
+For most targets the column is a straightforward simulation query
+(sum a variable, count recipients, filter by region/age/income band).
+For targets requiring custom logic (counterfactuals, cross-tabs), the
+Target's custom_compute callable is invoked instead.
+"""
+
+import logging
+
+import numpy as np
+import pandas as pd
+
+from policyengine_uk_data.targets import get_all_targets
+from policyengine_uk_data.targets.schema import GeographicLevel, Target, Unit
+
+logger = logging.getLogger(__name__)
+
+
+def create_target_matrix(
+    dataset,
+    time_period: str = None,
+    reform=None,
+) -> tuple[pd.DataFrame, pd.Series]:
+    """Create (matrix, target_values) for calibration.
+
+    Args:
+        dataset: a UKSingleYearDataset instance.
+        time_period: calendar year as string; defaults to dataset year.
+        reform: optional PolicyEngine reform.
+
+    Returns:
+        (df, targets) where df has one column per target and one row
+        per household, and targets is a Series of scalar target values
+        indexed by target name.
+    """
+    from policyengine_uk import Microsimulation
+
+    if time_period is None:
+        time_period = dataset.time_period
+
+    year = int(time_period)
+    sim = Microsimulation(dataset=dataset, reform=reform)
+    sim.default_calculation_period = time_period
+
+    # Helper closures for the simulation
+    ctx = _SimContext(sim, time_period, dataset, reform)
+
+    # Fetch all targets (no year filter — we resolve values below)
+    all_targets = []
+    seen = set()
+    for level in (
+        GeographicLevel.NATIONAL,
+        GeographicLevel.REGION,
+        GeographicLevel.COUNTRY,
+    ):
+        for t in get_all_targets(geographic_level=level):
+            if t.name not in seen:
+                seen.add(t.name)
+                all_targets.append(t)
+
+    df = pd.DataFrame()
+    target_names = []
+    target_values = []
+
+    for target in all_targets:
+        try:
+            val = _resolve_value(target, year)
+            if val is None:
+                continue
+            col = _compute_column(target, ctx, year)
+            if col is None:
+                continue
+            df[target.name] = col
+            target_names.append(target.name)
+            target_values.append(val)
+        except Exception as e:
+            logger.warning("Skipping target %s: %s", target.name, e)
+
+    return df, pd.Series(target_values, index=target_names)
+
+
+def _resolve_value(target: Target, year: int) -> float | None:
+    """Get the target value for a year, falling back to nearest year."""
+    if year in target.values:
+        return target.values[year]
+    # Use nearest available year
+    available = sorted(target.values.keys())
+    if not available:
+        return None
+    closest = min(available, key=lambda y: abs(y - year))
+    # Only allow ±3 years of extrapolation
+    if abs(closest - year) > 3:
+        return None
+    return target.values[closest]
+
+
+class _SimContext:
+    """Holds the simulation and lazily computed intermediate arrays."""
+
+    def __init__(self, sim, time_period, dataset, reform):
+        self.sim = sim
+        self.time_period = time_period
+        self.dataset = dataset
+        self.reform = reform
+        self._cache = {}
+
+    def pe(self, variable: str):
+        """Calculate variable mapped to household level."""
+        key = ("pe", variable)
+        if key not in self._cache:
+            self._cache[key] = self.sim.calculate(
+                variable, map_to="household"
+            ).values
+        return self._cache[key]
+
+    def pe_person(self, variable: str):
+        """Calculate variable at person level."""
+        key = ("pe_person", variable)
+        if key not in self._cache:
+            self._cache[key] = self.sim.calculate(variable).values
+        return self._cache[key]
+
+    def pe_count(self, *variables):
+        """Count people with variable > 0, mapped to household."""
+        total = 0
+        for variable in variables:
+            entity = self.sim.tax_benefit_system.variables[variable].entity.key
+            total += self.sim.map_result(
+                self.sim.calculate(variable) > 0,
+                entity,
+                "household",
+            )
+        return total
+
+    def household_from_person(self, values):
+        return self.sim.map_result(values, "person", "household")
+
+    def household_from_family(self, values):
+        return self.sim.map_result(values, "benunit", "household")
+
+    @property
+    def region(self):
+        if "region" not in self._cache:
+            self._cache["region"] = self.sim.calculate(
+                "region", map_to="person"
+            )
+        return self._cache["region"]
+
+    @property
+    def household_region(self):
+        if "household_region" not in self._cache:
+            self._cache["household_region"] = self.sim.calculate(
+                "region", map_to="household"
+            ).values
+        return self._cache["household_region"]
+
+    @property
+    def age(self):
+        if "age" not in self._cache:
+            self._cache["age"] = self.sim.calculate("age").values
+        return self._cache["age"]
+
+    @property
+    def country(self):
+        if "country" not in self._cache:
+            self._cache["country"] = self.sim.calculate("country").values
+        return self._cache["country"]
+
+    @property
+    def counterfactual_sim(self):
+        """Lazily create the salary sacrifice counterfactual simulation."""
+        if "counterfactual_sim" not in self._cache:
+            from policyengine_uk import Microsimulation
+
+            ss = self.sim.calculate(
+                "pension_contributions_via_salary_sacrifice"
+            )
+            emp = self.sim.calculate("employment_income")
+            cf_sim = Microsimulation(
+                dataset=self.dataset, reform=self.reform
+            )
+            cf_sim.set_input(
+                "pension_contributions_via_salary_sacrifice",
+                self.time_period,
+                np.zeros_like(ss),
+            )
+            cf_sim.set_input(
+                "employment_income",
+                self.time_period,
+                emp + ss,
+            )
+            self._cache["counterfactual_sim"] = cf_sim
+        return self._cache["counterfactual_sim"]
+
+
+# ── Region name mapping ──────────────────────────────────────────────
+
+_REGION_MAP = {
+    "NORTH_EAST": "north_east",
+    "SOUTH_EAST": "south_east",
+    "EAST_MIDLANDS": "east_midlands",
+    "WEST_MIDLANDS": "west_midlands",
+    "YORKSHIRE": "yorkshire_and_the_humber",
+    "EAST_OF_ENGLAND": "east",
+    "LONDON": "london",
+    "SOUTH_WEST": "south_west",
+    "NORTH_WEST": "north_west",
+    "WALES": "wales",
+    "SCOTLAND": "scotland",
+    "NORTHERN_IRELAND": "northern_ireland",
+}
+
+_REGION_INV = {v: k for k, v in _REGION_MAP.items()}
+
+
+# ── Column computation dispatch ──────────────────────────────────────
+
+def _compute_column(
+    target: Target, ctx: _SimContext, year: int
+) -> np.ndarray | None:
+    """Compute the household-level column for a target.
+
+    Returns None if the target can't be computed (e.g. missing
+    custom_compute for a complex target).
+    """
+    # If the target has a custom compute function, use it
+    if target.custom_compute is not None:
+        return target.custom_compute(ctx, target, year)
+
+    # Dispatch by target name patterns and metadata
+    name = target.name
+
+    # ── Regional age bands ────────────────────────────────────────
+    # Names like "ons/north_east_age_0_9"
+    if name.startswith("ons/") and "_age_" in name:
+        return _compute_regional_age(target, ctx)
+
+    # ── Gender × age bands ────────────────────────────────────────
+    # Names like "ons/female_0_14"
+    if name.startswith("ons/") and (
+        name.startswith("ons/female_") or name.startswith("ons/male_")
+    ):
+        return _compute_gender_age(target, ctx)
+
+    # ── UK total population ───────────────────────────────────────
+    if name == "ons/uk_population":
+        return ctx.household_from_person(ctx.age >= 0)
+
+    # ── Scotland-specific demographics ────────────────────────────
+    if name == "ons/scotland_children_under_16":
+        return ctx.household_from_person(
+            (ctx.region.values == "SCOTLAND") & (ctx.age < 16)
+        )
+    if name == "ons/scotland_babies_under_1":
+        return ctx.household_from_person(
+            (ctx.region.values == "SCOTLAND") & (ctx.age < 1)
+        )
+    if name == "ons/scotland_households_3plus_children":
+        is_child = ctx.pe_person("is_child")
+        children_per_hh = ctx.household_from_person(is_child)
+        return (
+            (ctx.household_region == "SCOTLAND") & (children_per_hh >= 3)
+        ).astype(float)
+
+    # ── Household type targets ────────────────────────────────────
+    if target.variable == "family_type" and target.is_count:
+        return _compute_household_type(target, ctx)
+
+    # ── Tenure targets ────────────────────────────────────────────
+    if target.variable == "tenure_type" and target.is_count:
+        return _compute_tenure(target, ctx)
+
+    # ── Income band breakdowns (HMRC SPI) ─────────────────────────
+    if target.breakdown_variable == "total_income":
+        return _compute_income_band(target, ctx)
+
+    # ── Council tax bands by region (VOA) ─────────────────────────
+    if name.startswith("voa/council_tax/"):
+        return _compute_council_tax_band(target, ctx)
+
+    # ── Vehicle ownership (NTS) ───────────────────────────────────
+    if name == "nts/households_no_vehicle":
+        return (ctx.pe("num_vehicles") == 0).astype(float)
+    if name == "nts/households_one_vehicle":
+        return (ctx.pe("num_vehicles") == 1).astype(float)
+    if name == "nts/households_two_plus_vehicles":
+        return (ctx.pe("num_vehicles") >= 2).astype(float)
+
+    # ── Housing targets ───────────────────────────────────────────
+    if name == "housing/total_mortgage":
+        return (
+            ctx.pe("mortgage_capital_repayment")
+            + ctx.pe("mortgage_interest_repayment")
+        )
+    if name == "housing/rent_private":
+        tenure = ctx.sim.calculate("tenure_type", map_to="household").values
+        return ctx.pe("rent") * (tenure == "RENT_PRIVATELY")
+
+    # ── Savings interest (ONS) ────────────────────────────────────
+    if name == "ons/savings_interest_income":
+        savings = ctx.sim.calculate("savings_interest_income")
+        return ctx.household_from_person(savings)
+
+    # ── Scottish child payment ────────────────────────────────────
+    if name == "sss/scottish_child_payment":
+        scp = ctx.sim.calculate("scottish_child_payment")
+        return ctx.household_from_person(scp)
+
+    # ── DWP PIP claimant splits ───────────────────────────────────
+    if name == "dwp/pip_dl_standard_claimants":
+        pip_dl = ctx.sim.calculate("pip_dl_category")
+        return ctx.sim.map_result(
+            pip_dl == "STANDARD", "person", "household"
+        )
+    if name == "dwp/pip_dl_enhanced_claimants":
+        pip_dl = ctx.sim.calculate("pip_dl_category")
+        return ctx.sim.map_result(
+            pip_dl == "ENHANCED", "person", "household"
+        )
+
+    # ── DWP benefit cap ───────────────────────────────────────────
+    if name == "dwp/benefit_capped_households":
+        reduction = ctx.sim.calculate(
+            "benefit_cap_reduction", map_to="household"
+        ).values
+        return (reduction > 0).astype(float)
+    if name == "dwp/benefit_cap_total_reduction":
+        return ctx.sim.calculate(
+            "benefit_cap_reduction", map_to="household"
+        ).values.astype(float)
+
+    # ── DWP Scotland UC + child under 1 ──────────────────────────
+    if name == "dwp/scotland_uc_households_child_under_1":
+        uc = ctx.sim.calculate("universal_credit")
+        on_uc = ctx.household_from_family(uc > 0) > 0
+        child_u1 = ctx.pe_person("is_child") & (ctx.age < 1)
+        has_child_u1 = ctx.household_from_person(child_u1) > 0
+        return (
+            (ctx.household_region == "SCOTLAND") & on_uc & has_child_u1
+        ).astype(float)
+
+    # ── UC claimants by number of children ─────────────────────────
+    if name.startswith("dwp/uc/claimants_with_") and "_children" in name:
+        return _compute_uc_by_children(target, ctx)
+
+    # ── UC claimants by family type ──────────────────────────────
+    if name.startswith("dwp/uc/claimants_") and not name.startswith(
+        "dwp/uc/claimants_with_"
+    ):
+        return _compute_uc_by_family_type(target, ctx)
+
+    # ── UC payment distribution ───────────────────────────────────
+    if name.startswith("dwp/uc_payment_dist/"):
+        return _compute_uc_payment_dist(target, ctx)
+
+    # ── Salary sacrifice IT relief by tax band ────────────────────
+    if name.startswith("hmrc/salary_sacrifice_it_relief_"):
+        return _compute_ss_it_relief(target, ctx)
+
+    # ── Salary sacrifice NI relief ────────────────────────────────
+    if name in (
+        "hmrc/salary_sacrifice_employee_nics_relief",
+        "obr/salary_sacrifice_employee_ni_relief",
+    ):
+        ni_base = ctx.sim.calculate("ni_employee")
+        ni_cf = ctx.counterfactual_sim.calculate(
+            "ni_employee", ctx.time_period
+        )
+        return ctx.household_from_person(ni_cf - ni_base)
+    if name in (
+        "hmrc/salary_sacrifice_employer_nics_relief",
+        "obr/salary_sacrifice_employer_ni_relief",
+    ):
+        ni_base = ctx.sim.calculate("ni_employer")
+        ni_cf = ctx.counterfactual_sim.calculate(
+            "ni_employer", ctx.time_period
+        )
+        return ctx.household_from_person(ni_cf - ni_base)
+
+    # ── UC jobseeker / non-jobseeker splits ───────────────────────
+    if name in (
+        "obr/universal_credit_jobseekers",
+        "obr/universal_credit_non_jobseekers",
+        "obr/universal_credit_jobseekers_count",
+        "obr/universal_credit_non_jobseekers_count",
+    ):
+        return _compute_uc_jobseeker(target, ctx)
+
+    # ── OBR UC outside benefit cap ────────────────────────────────
+    if name == "obr/universal_credit_outside_cap":
+        uc = ctx.sim.calculate("universal_credit")
+        uc_hh = ctx.household_from_family(uc)
+        cap_reduction = ctx.sim.calculate(
+            "benefit_cap_reduction", map_to="household"
+        ).values
+        not_capped = cap_reduction == 0
+        return uc_hh * not_capped
+
+    # ── Two-child limit targets ───────────────────────────────────
+    if "two_child_limit" in name:
+        return _compute_two_child_limit(target, ctx)
+
+    # ── OBR council tax by country ────────────────────────────────
+    if name.startswith("obr/council_tax"):
+        return _compute_obr_council_tax(target, ctx)
+
+    # ── Simple GBP sum targets ────────────────────────────────────
+    if target.unit == Unit.GBP and not target.is_count:
+        return _compute_simple_gbp(target, ctx)
+
+    # ── Simple count targets ──────────────────────────────────────
+    if target.is_count and target.unit == Unit.COUNT:
+        return _compute_simple_count(target, ctx)
+
+    logger.debug("No compute logic for target %s", name)
+    return None
+
+
+# ── Compute implementations ──────────────────────────────────────────
+
+def _compute_simple_gbp(target: Target, ctx: _SimContext) -> np.ndarray:
+    """Sum a variable at household level."""
+    variable = target.variable
+    try:
+        entity = ctx.sim.tax_benefit_system.variables[variable].entity.key
+    except KeyError:
+        return None
+    if entity == "household":
+        return ctx.pe(variable)
+    elif entity == "person":
+        return ctx.household_from_person(ctx.sim.calculate(variable))
+    elif entity == "benunit":
+        return ctx.household_from_family(ctx.sim.calculate(variable))
+    return None
+
+
+def _compute_simple_count(target: Target, ctx: _SimContext) -> np.ndarray:
+    """Count recipients of a variable, mapped to household."""
+    return ctx.pe_count(target.variable)
+
+
+def _compute_regional_age(
+    target: Target, ctx: _SimContext
+) -> np.ndarray:
+    """Compute person count in a region × age band."""
+    # Parse "ons/{region_name}_age_{lower}_{upper}" from the name
+    name = target.name.removeprefix("ons/")
+    # Find the _age_ part
+    idx = name.index("_age_")
+    region_name = name[:idx]
+    age_part = name[idx + 5:]  # e.g. "0_9"
+    lower, upper = age_part.split("_")
+    lower, upper = int(lower), int(upper)
+
+    pe_region = _REGION_INV.get(region_name)
+    if pe_region is None:
+        return None
+
+    person_match = (
+        (ctx.region.values == pe_region)
+        & (ctx.age >= lower)
+        & (ctx.age <= upper)
+    )
+    return ctx.household_from_person(person_match)
+
+
+def _compute_gender_age(
+    target: Target, ctx: _SimContext
+) -> np.ndarray:
+    """Compute person count in a gender × age band."""
+    name = target.name.removeprefix("ons/")
+    # "female_0_14" or "male_75_90"
+    parts = name.split("_")
+    sex = parts[0]
+    lower = int(parts[1])
+    upper = int(parts[2])
+
+    gender = ctx.sim.calculate("gender").values
+    sex_match = gender == ("FEMALE" if sex == "female" else "MALE")
+    age_match = (ctx.age >= lower) & (ctx.age <= upper)
+    return ctx.household_from_person(sex_match & age_match)
+
+
+def _compute_household_type(
+    target: Target, ctx: _SimContext
+) -> np.ndarray | None:
+    """Compute household type count from ONS families & households categories.
+
+    Maps ONS household categories to PE family_type enum values and
+    household composition conditions. family_type is a benunit variable
+    so we map boolean comparisons to household level.
+    """
+    name = target.name.removeprefix("ons/")
+    ft = ctx.sim.calculate("family_type").values  # benunit level
+    is_child = ctx.pe_person("is_child")
+    children_per_hh = ctx.household_from_person(is_child)
+    age_hh_head = ctx.pe("age")  # head of household age
+
+    def ft_hh(value):
+        """Map family_type == value from benunit to household (any)."""
+        return ctx.household_from_family(ft == value) > 0
+
+    if name == "lone_households_under_65":
+        return (
+            ft_hh("SINGLE")
+            & (children_per_hh == 0)
+            & (age_hh_head < 65)
+        ).astype(float)
+    if name == "lone_households_over_65":
+        return (
+            ft_hh("SINGLE")
+            & (children_per_hh == 0)
+            & (age_hh_head >= 65)
+        ).astype(float)
+    if name == "unrelated_adult_households":
+        people_per_hh = ctx.household_from_person(
+            np.ones_like(is_child)
+        )
+        return (
+            ft_hh("SINGLE")
+            & (children_per_hh == 0)
+            & (people_per_hh > 1)
+        ).astype(float)
+    if name == "couple_no_children_households":
+        return ft_hh("COUPLE_NO_CHILDREN").astype(float)
+    if name == "couple_under_3_children_households":
+        return (
+            ft_hh("COUPLE_WITH_CHILDREN")
+            & (children_per_hh >= 1)
+            & (children_per_hh <= 2)
+        ).astype(float)
+    if name == "couple_3_plus_children_households":
+        return (
+            ft_hh("COUPLE_WITH_CHILDREN")
+            & (children_per_hh >= 3)
+        ).astype(float)
+    if name == "couple_non_dependent_children_only_households":
+        people_per_hh = ctx.household_from_person(
+            np.ones_like(is_child)
+        )
+        return (
+            ft_hh("COUPLE_NO_CHILDREN")
+            & (people_per_hh > 2)
+        ).astype(float)
+    if name == "lone_parent_dependent_children_households":
+        return (
+            ft_hh("LONE_PARENT")
+            & (children_per_hh > 0)
+        ).astype(float)
+    if name == "lone_parent_non_dependent_children_households":
+        people_per_hh = ctx.household_from_person(
+            np.ones_like(is_child)
+        )
+        return (
+            ft_hh("SINGLE")
+            & (children_per_hh == 0)
+            & (people_per_hh > 1)
+            & (age_hh_head >= 40)
+        ).astype(float)
+    if name == "multi_family_households":
+        n_benunits = ctx.pe("household_num_benunits")
+        return (n_benunits > 1).astype(float)
+
+    return None
+
+
+def _compute_tenure(
+    target: Target, ctx: _SimContext
+) -> np.ndarray | None:
+    """Compute dwelling count by tenure type."""
+    # Map ONS target name suffixes to PE tenure_type enum values
+    _TENURE_MAP = {
+        "tenure_england_owned_outright": "OWNED_OUTRIGHT",
+        "tenure_england_owned_with_mortgage": "OWNED_WITH_MORTGAGE",
+        "tenure_england_rented_privately": "RENT_PRIVATELY",
+        "tenure_england_social_rent": ["RENT_FROM_COUNCIL", "RENT_FROM_HA"],
+        "tenure_england_total": None,  # all tenures
+    }
+    suffix = target.name.removeprefix("ons/")
+    pe_values = _TENURE_MAP.get(suffix)
+    if pe_values is None and suffix == "tenure_england_total":
+        # Total dwellings in England
+        return (ctx.country == "ENGLAND").astype(float)
+    if pe_values is None:
+        return None
+
+    tenure = ctx.sim.calculate("tenure_type", map_to="household").values
+    in_england = ctx.country == "ENGLAND"
+    if isinstance(pe_values, list):
+        match = np.zeros_like(tenure, dtype=bool)
+        for v in pe_values:
+            match = match | (tenure == v)
+    else:
+        match = tenure == pe_values
+    return (match & in_england).astype(float)
+
+
+def _compute_income_band(
+    target: Target, ctx: _SimContext
+) -> np.ndarray:
+    """Compute income variable within a total income band."""
+    variable = target.variable
+    lower = target.lower_bound
+    upper = target.upper_bound
+
+    income_df = ctx.sim.calculate_dataframe(
+        ["total_income", variable]
+    )
+    in_band = (income_df.total_income >= lower) & (
+        income_df.total_income < upper
+    )
+
+    if target.is_count:
+        return ctx.household_from_person(
+            (income_df[variable] > 0) * in_band
+        )
+    else:
+        return ctx.household_from_person(
+            income_df[variable] * in_band
+        )
+
+
+def _compute_council_tax_band(
+    target: Target, ctx: _SimContext
+) -> np.ndarray:
+    """Compute council tax band count for a region."""
+    # "voa/council_tax/{REGION}/{band}"
+    parts = target.name.split("/")
+    region = parts[2]
+    band = parts[3]
+
+    in_region = ctx.sim.calculate("region").values == region
+
+    if band == "total":
+        return in_region.astype(float)
+
+    in_band = ctx.sim.calculate("council_tax_band") == band
+    return (in_band * in_region).astype(float)
+
+
+def _compute_obr_council_tax(
+    target: Target, ctx: _SimContext
+) -> np.ndarray:
+    """Compute OBR council tax receipts, optionally by country."""
+    name = target.name
+    ct = ctx.pe("council_tax")
+
+    if name == "obr/council_tax":
+        return ct
+    if name == "obr/council_tax_england":
+        return ct * (ctx.country == "ENGLAND")
+    if name == "obr/council_tax_scotland":
+        return ct * (ctx.country == "SCOTLAND")
+    if name == "obr/council_tax_wales":
+        return ct * (ctx.country == "WALES")
+    return ct
+
+
+def _compute_uc_jobseeker(
+    target: Target, ctx: _SimContext
+) -> np.ndarray:
+    """Compute UC jobseeker / non-jobseeker splits."""
+    family = ctx.sim.populations["benunit"]
+    uc = ctx.sim.calculate("universal_credit")
+    on_uc = uc > 0
+    unemployed = family.any(
+        ctx.sim.calculate("employment_status") == "UNEMPLOYED"
+    )
+
+    if "non_jobseekers" in target.name:
+        mask = on_uc * ~unemployed
+    else:
+        mask = on_uc * unemployed
+
+    if "_count" in target.name:
+        return ctx.household_from_family(mask)
+    else:
+        return ctx.household_from_family(uc * mask)
+
+
+def _compute_uc_payment_dist(
+    target: Target, ctx: _SimContext
+) -> np.ndarray:
+    """Compute UC payment distribution band × family type."""
+    # Parse from name: "dwp/uc_payment_dist/{family_type}_annual_payment_{lower}_to_{upper}"
+    name = target.name.removeprefix("dwp/uc_payment_dist/")
+    # Find the _annual_payment_ separator
+    idx = name.index("_annual_payment_")
+    family_type = name[:idx]
+    payment_part = name[idx + 16:]  # e.g. "0_to_1_000"
+    lower = target.lower_bound
+    upper = target.upper_bound
+
+    uc_payments = ctx.sim.calculate(
+        "universal_credit", map_to="benunit"
+    ).values
+    uc_family_type = ctx.sim.calculate(
+        "family_type", map_to="benunit"
+    ).values
+
+    in_band = (
+        (uc_payments >= lower)
+        & (uc_payments < upper)
+        & (uc_family_type == family_type)
+    )
+    return ctx.household_from_family(in_band)
+
+
+def _compute_ss_it_relief(
+    target: Target, ctx: _SimContext
+) -> np.ndarray:
+    """Compute salary sacrifice IT relief by tax band."""
+    it_base = ctx.sim.calculate("income_tax")
+    it_cf = ctx.counterfactual_sim.calculate("income_tax", ctx.time_period)
+    it_relief = it_cf - it_base
+
+    adj_net_income_cf = ctx.counterfactual_sim.calculate(
+        "adjusted_net_income", ctx.time_period
+    )
+
+    params = ctx.sim.tax_benefit_system.parameters.gov.hmrc.income_tax.rates.uk
+    basic_thresh = params[0].threshold(ctx.time_period)
+    higher_thresh = params[1].threshold(ctx.time_period)
+    additional_thresh = params[2].threshold(ctx.time_period)
+
+    name = target.name
+    if "basic" in name:
+        mask = (adj_net_income_cf > basic_thresh) & (
+            adj_net_income_cf <= higher_thresh
+        )
+    elif "higher" in name:
+        mask = (adj_net_income_cf > higher_thresh) & (
+            adj_net_income_cf <= additional_thresh
+        )
+    elif "additional" in name:
+        mask = adj_net_income_cf > additional_thresh
+    else:
+        # Total — no mask
+        mask = np.ones_like(it_relief, dtype=bool)
+
+    return ctx.household_from_person(it_relief * mask)
+
+
+def _compute_two_child_limit(
+    target: Target, ctx: _SimContext
+) -> np.ndarray | None:
+    """Compute two-child limit targets.
+
+    These involve cross-tabulations of UC eligibility, child count,
+    disability status, etc. Complex enough to need specific logic
+    per target name.
+    """
+    name = target.name
+    sim = ctx.sim
+
+    is_child = sim.calculate("is_child").values
+    child_is_affected = (
+        sim.map_result(
+            sim.calculate("uc_is_child_limit_affected", map_to="household"),
+            "household",
+            "person",
+        )
+        > 0
+    ) * is_child
+    child_in_uc = sim.calculate("universal_credit", map_to="person").values > 0
+    children_in_capped = sim.map_result(
+        child_is_affected * child_in_uc, "person", "household"
+    )
+    capped_hh = (children_in_capped > 0) * 1.0
+
+    if name == "dwp/uc/two_child_limit/households_affected":
+        return capped_hh
+    if name == "dwp/uc/two_child_limit/children_affected":
+        return children_in_capped
+    if name == "dwp/uc/two_child_limit/children_in_affected_households":
+        # Total children (not just affected ones) in capped households
+        total_children = sim.map_result(
+            is_child * child_in_uc, "person", "household"
+        )
+        return total_children * capped_hh
+
+    # By number of children: "dwp/uc/two_child_limit/{n}_children_households"
+    if "_children_households_total_children" in name:
+        n = int(name.split("/")[-1].split("_")[0])
+        children_count = sim.map_result(is_child, "person", "household")
+        return (
+            capped_hh * (children_count == n) * children_count
+        ).astype(float)
+    if "_children_households" in name and "total" not in name:
+        n = int(name.split("/")[-1].split("_")[0])
+        children_count = sim.map_result(is_child, "person", "household")
+        match = n if n < 6 else slice(6, None)
+        if isinstance(match, int):
+            return (capped_hh * (children_count == n)).astype(float)
+        else:
+            return (capped_hh * (children_count >= 6)).astype(float)
+
+    # Disability cross-tabs
+    if "adult_pip_households" in name:
+        pip = sim.calculate("pip", map_to="household").values
+        return (capped_hh * (pip > 0)).astype(float)
+    if "adult_pip_children" in name:
+        pip = sim.calculate("pip", map_to="household").values
+        return (children_in_capped * (pip > 0)).astype(float)
+    if "disabled_child_element_households" in name:
+        dce = sim.calculate(
+            "uc_individual_disabled_child_element", map_to="household"
+        ).values
+        return (capped_hh * (dce > 0)).astype(float)
+    if "disabled_child_element_children" in name:
+        dce = sim.calculate(
+            "uc_individual_disabled_child_element", map_to="household"
+        ).values
+        return (children_in_capped * (dce > 0)).astype(float)
+
+    return None
+
+
+def _compute_uc_by_children(
+    target: Target, ctx: _SimContext
+) -> np.ndarray:
+    """Compute UC claimant households filtered by number of dependent children."""
+    # Parse "dwp/uc/claimants_with_{n}_children"
+    name = target.name
+    n_str = name.split("claimants_with_")[1].split("_children")[0]
+
+    uc = ctx.sim.calculate("universal_credit")
+    on_uc = ctx.household_from_family(uc > 0) > 0
+
+    is_child = ctx.pe_person("is_child")
+    children_per_hh = ctx.household_from_person(is_child)
+
+    if n_str.endswith("+"):
+        n = int(n_str[:-1])
+        match = children_per_hh >= n
+    else:
+        n = int(n_str)
+        match = children_per_hh == n
+
+    return (on_uc & match).astype(float)
+
+
+def _compute_uc_by_family_type(
+    target: Target, ctx: _SimContext
+) -> np.ndarray:
+    """Compute UC claimant households filtered by family type."""
+    name = target.name
+    ft_str = name.split("dwp/uc/claimants_")[1]
+
+    uc = ctx.sim.calculate("universal_credit")
+    on_uc = ctx.household_from_family(uc > 0) > 0
+
+    ft = ctx.sim.calculate("family_type").values  # benunit level
+
+    def ft_hh(value):
+        return ctx.household_from_family(ft == value) > 0
+
+    is_child = ctx.pe_person("is_child")
+    children_per_hh = ctx.household_from_person(is_child)
+
+    if ft_str == "single_no_children":
+        match = ft_hh("SINGLE") & (children_per_hh == 0)
+    elif ft_str == "single_with_children":
+        match = (ft_hh("SINGLE") | ft_hh("LONE_PARENT")) & (
+            children_per_hh > 0
+        )
+    elif ft_str == "couple_no_children":
+        match = ft_hh("COUPLE_NO_CHILDREN")
+    elif ft_str == "couple_with_children":
+        match = ft_hh("COUPLE_WITH_CHILDREN")
+    else:
+        return None
+
+    return (on_uc & match).astype(float)
diff --git a/policyengine_uk_data/targets/registry.py b/policyengine_uk_data/targets/registry.py
new file mode 100644
index 00000000..909fd85d
--- /dev/null
+++ b/policyengine_uk_data/targets/registry.py
@@ -0,0 +1,69 @@
+"""Target registry: discovers source modules and collects targets."""
+
+import importlib
+import pkgutil
+from pathlib import Path
+
+import yaml
+
+from policyengine_uk_data.targets.schema import (
+    GeographicLevel,
+    Target,
+)
+from policyengine_uk_data.targets import sources as sources_pkg
+
+
+def load_sources_config() -> dict:
+    """Load the sources.yaml URL configuration."""
+    config_path = Path(__file__).parent / "sources.yaml"
+    with open(config_path) as f:
+        return yaml.safe_load(f)
+
+
+def discover_source_modules() -> list:
+    """Import all modules under targets.sources."""
+    modules = []
+    package_path = Path(sources_pkg.__file__).parent
+    for importer, modname, ispkg in pkgutil.iter_modules(
+        [str(package_path)]
+    ):
+        mod = importlib.import_module(
+            f"policyengine_uk_data.targets.sources.{modname}"
+        )
+        if hasattr(mod, "get_targets"):
+            modules.append(mod)
+    return modules
+
+
+def get_all_targets(
+    year: int | None = None,
+    geographic_level: GeographicLevel | None = None,
+) -> list[Target]:
+    """Collect targets from all source modules.
+
+    Args:
+        year: if provided, only return targets that have a value for
+              this year.
+        geographic_level: if provided, filter to this geographic level.
+
+    Returns:
+        De-duplicated list of Target objects.
+    """
+    all_targets: list[Target] = []
+    seen_names: set[str] = set()
+
+    for mod in discover_source_modules():
+        for target in mod.get_targets():
+            if target.name in seen_names:
+                continue
+            if year is not None and year not in target.values:
+                continue
+            if (
+                geographic_level is not None
+                and target.geographic_level != geographic_level
+            ):
+                continue
+            seen_names.add(target.name)
+            all_targets.append(target)
+
+    return all_targets
diff --git a/policyengine_uk_data/targets/schema.py b/policyengine_uk_data/targets/schema.py
new file mode 100644
index 00000000..97b81467
--- /dev/null
+++ b/policyengine_uk_data/targets/schema.py
@@ -0,0 +1,49 @@
+"""Pydantic schema for calibration targets."""
+
+from enum import Enum
+from typing import Callable
+from pydantic import BaseModel, Field
+
+
+class GeographicLevel(str, Enum):
+    NATIONAL = "national"
+    COUNTRY = "country"
+    REGION = "region"
+    CONSTITUENCY = "constituency"
+    LOCAL_AUTHORITY = "local_authority"
+
+
+class Unit(str, Enum):
+    GBP = "gbp"
+    COUNT = "count"
+    RATE = "rate"
+
+
+class Target(BaseModel):
+    """A single calibration target from an official statistical source.
+
+    Each target represents one number that the microsimulation should
+    reproduce when household weights are correctly calibrated, e.g.
+    "total income tax receipts in 2025 = £328.4bn".
+    """
+
+    name: str
+    variable: str
+    source: str
+    unit: Unit
+    geographic_level: GeographicLevel = GeographicLevel.NATIONAL
+    geo_code: str | None = None
+    geo_name: str | None = None
+    values: dict[int, float]
+    breakdown_variable: str | None = None
+    lower_bound: float | None = None
+    upper_bound: float | None = None
+    is_count: bool = False
+    reference_url: str | None = None
+    forecast_vintage: str | None = None
+
+    # For targets needing custom simulation logic (UC splits,
+    # counterfactuals). Excluded from serialisation.
+    custom_compute: Callable | None = Field(default=None, exclude=True)
+
+    model_config = {"arbitrary_types_allowed": True}
diff --git a/policyengine_uk_data/targets/sources.yaml b/policyengine_uk_data/targets/sources.yaml
new file mode 100644
index 00000000..8bb87679
--- /dev/null
+++ b/policyengine_uk_data/targets/sources.yaml
@@ -0,0 +1,43 @@
+# Official source URLs for calibration targets.
+# Update these when new vintages are published.
+
+obr:
+  efo_receipts: "https://obr.uk/download/november-2025-economic-and-fiscal-outlook-detailed-forecast-tables-receipts/"
+  efo_expenditure: "https://obr.uk/download/november-2025-economic-and-fiscal-outlook-detailed-forecast-tables-expenditure/"
+  vintage: "november_2025"
+
+hmrc:
+  spi_collated: "https://assets.publishing.service.gov.uk/media/67cabb37ade26736dbf9ffe5/Collated_Tables_3_1_to_3_17_2223.ods"
+  spi_geography: "https://assets.publishing.service.gov.uk/media/67cabb7f8c1076c796a45bec/Collated_Tables_3_12_to_3_15a_2223.ods"
+  income_tax_liabilities: "https://www.gov.uk/government/statistics/income-tax-liabilities-statistics-tax-year-2022-to-2023-to-tax-year-2025-to-2026"
+  salary_sacrifice_table_6: "https://assets.publishing.service.gov.uk/media/687a294e312ee8a5f0806b6d/Tables_6_1_and_6_2.csv"
+
+dwp:
+  stat_xplore_api: "https://stat-xplore.dwp.gov.uk/webapi/rest/v1"
+  two_child_limit: "https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024"
+  benefit_cap: "https://www.gov.uk/government/statistics/benefit-cap-number-of-households-capped-to-february-2025"
+  uc_national_payment_dist: "https://stat-xplore.dwp.gov.uk"
+  uc_pc_households: "https://stat-xplore.dwp.gov.uk"
+  uc_la_households: "https://stat-xplore.dwp.gov.uk"
+  ni_uc_stats: "https://www.communities-ni.gov.uk/publications/universal-credit-statistics"
+
+ons:
+  population_projections: "https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationprojections/datasets/z1zippedpopulationprojectionsdatafilesuk"
+  savings_interest: "https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/haxv/ukea"
+  households: "https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/families/datasets/familiesandhouseholdsfamiliesandhouseholds"
+  la_income: "https://www.ons.gov.uk/employmentandlabourmarket/peopleinwork/earningsandworkinghours/datasets/smallareaincomeestimatesformiddlelayersuperoutputareasenglandandwales"
+
+voa:
+  council_tax: "https://www.gov.uk/government/statistics/council-tax-stock-of-properties-2024"
+
+nts:
+  vehicle_ownership: "https://www.gov.uk/government/statistics/national-travel-survey-2024"
+
+nrs:
+  population_estimates: "https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/population/population-estimates/mid-year-population-estimates"
+
+scottish_government:
+  budget: "https://www.gov.scot/publications/scottish-budget-2026-2027/pages/6/"
+
+nomis:
+  earnings_by_constituency: "https://www.nomisweb.co.uk/api/v01/"
diff --git a/policyengine_uk_data/targets/sources/__init__.py b/policyengine_uk_data/targets/sources/__init__.py
new file mode 100644
index 00000000..e161e156
--- /dev/null
+++ b/policyengine_uk_data/targets/sources/__init__.py
@@ -0,0 +1,4 @@
+"""Target source modules.
+
+Each module exposes get_targets() -> list[Target].
+"""
diff --git a/policyengine_uk_data/targets/sources/dwp.py b/policyengine_uk_data/targets/sources/dwp.py
new file mode 100644
index 00000000..67c23c0a
--- /dev/null
+++ b/policyengine_uk_data/targets/sources/dwp.py
@@ -0,0 +1,265 @@
+"""DWP benefit targets.
+
+PIP daily living standard/enhanced claimant counts, benefit cap,
+UC payment distribution, UC claimant counts by children/family type,
+two-child limit breakdowns, and Scotland UC households with child under 1.
+
+Sources:
+- DWP Stat-Xplore: https://stat-xplore.dwp.gov.uk
+- DWP benefit cap: https://www.gov.uk/government/statistics/benefit-cap-number-of-households-capped-to-february-2025
+- DWP two-child limit: https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024
+"""
+
+import pandas as pd
+from pathlib import Path
+
+from policyengine_uk_data.targets.schema import Target, Unit
+
+_STORAGE = Path(__file__).parents[2] / "storage"
+
+
+def get_targets() -> list[Target]:
+    targets = []
+
+    # PIP daily living standard and enhanced claimant counts
+    # From Disability Rights UK analysis of DWP data
+    targets.append(
+        Target(
+            name="dwp/pip_dl_standard_claimants",
+            variable="pip_dl_category",
+            source="dwp",
+            unit=Unit.COUNT,
+            values={2025: 1_283_000},
+            is_count=True,
+            reference_url="https://www.disabilityrightsuk.org/news/90-pip-standard-daily-living-component-recipients-would-fail-new-green-paper-test",
+        )
+    )
+    targets.append(
+        Target(
+            name="dwp/pip_dl_enhanced_claimants",
+            variable="pip_dl_category",
+            source="dwp",
+            unit=Unit.COUNT,
+            values={2025: 1_608_000},
+            is_count=True,
+            reference_url="https://www.disabilityrightsuk.org/news/90-pip-standard-daily-living-component-recipients-would-fail-new-green-paper-test",
+        )
+    )
+
+    # Benefit cap
+    targets.append(
+        Target(
+            name="dwp/benefit_capped_households",
+            variable="benefit_cap_reduction",
+            source="dwp",
+            unit=Unit.COUNT,
+            values={2025: 115_000},
+            is_count=True,
+            reference_url="https://www.gov.uk/government/statistics/benefit-cap-number-of-households-capped-to-february-2025/benefit-cap-number-of-households-capped-to-february-2025",
+        )
+    )
+    targets.append(
+        Target(
+            name="dwp/benefit_cap_total_reduction",
+            variable="benefit_cap_reduction",
+            source="dwp",
+            unit=Unit.GBP,
+            values={2025: 60 * 52 * 115_000},
+            reference_url="https://www.gov.uk/government/statistics/benefit-cap-number-of-households-capped-to-february-2025/benefit-cap-number-of-households-capped-to-february-2025",
+        )
+    )
+
+    # Scotland UC households with child under 1
+    targets.append(
+        Target(
+            name="dwp/scotland_uc_households_child_under_1",
+            variable="universal_credit",
+            source="dwp",
+            unit=Unit.COUNT,
+            values={2025: 14_000},
+            is_count=True,
+            reference_url="https://stat-xplore.dwp.gov.uk/",
+        )
+    )
+
+    # UC claimant counts by number of children
+    _UC_BY_CHILDREN = {
+        "1": 1_222_944,
+        "2": 1_058_967,
+        "3": 473_500,
+        "4": 166_790,
+        "5+": 74_050 + 1_860,
+    }
+    for num_children, count in _UC_BY_CHILDREN.items():
+        targets.append(
+            Target(
+                name=f"dwp/uc/claimants_with_{num_children}_children",
+                variable="universal_credit",
+                source="dwp",
+                unit=Unit.COUNT,
+                values={2025: count},
+                is_count=True,
+                reference_url="https://stat-xplore.dwp.gov.uk/",
+            )
+        )
+
+    # UC claimant counts by family type
+    _UC_BY_FAMILY_TYPE = {
+        "single_no_children": 2868.011,
+        "single_with_children": 2156.879,
+        "couple_no_children": 231.368,
+        "couple_with_children": 839.379,
+    }
+    undercount_relative = 1.27921 / sum(_UC_BY_FAMILY_TYPE.values())
+    for family_type, count_k in _UC_BY_FAMILY_TYPE.items():
+        targets.append(
+            Target(
+                name=f"dwp/uc/claimants_{family_type}",
+                variable="universal_credit",
+                source="dwp",
+                unit=Unit.COUNT,
+                values={
+                    2025: count_k * (1 + undercount_relative) * 1e3
+                },
+                is_count=True,
+                reference_url="https://stat-xplore.dwp.gov.uk/",
+            )
+        )
+
+    # Two-child limit statistics (2026 data)
+    targets.append(
+        Target(
+            name="dwp/uc/two_child_limit/households_affected",
+            variable="uc_is_child_limit_affected",
+            source="dwp",
+            unit=Unit.COUNT,
+            values={2026: 453_600},
+            is_count=True,
+            reference_url="https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024",
+        )
+    )
+    targets.append(
+        Target(
+            name="dwp/uc/two_child_limit/children_in_affected_households",
+            variable="is_child",
+            source="dwp",
+            unit=Unit.COUNT,
+            values={2026: 1_613_980},
+            is_count=True,
+            reference_url="https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024",
+        )
+    )
+    targets.append(
+        Target(
+            name="dwp/uc/two_child_limit/children_affected",
+            variable="uc_is_child_limit_affected",
+            source="dwp",
+            unit=Unit.COUNT,
+            values={2026: 580_400},
+            is_count=True,
+            reference_url="https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024",
+        )
+    )
+
+    # Two-child limit by number of children
+    _TCL_BY_CHILDREN = [
+        (3, 283_290, 849_860),
+        (4, 115_630, 462_520),
+        (5, 36_590, 182_940),
+        (6, 18_090, 118_670),
+    ]
+    for num_children, households, children in _TCL_BY_CHILDREN:
+        targets.append(
+            Target(
+                name=f"dwp/uc/two_child_limit/{num_children}_children_households",
+                variable="uc_is_child_limit_affected",
+                source="dwp",
+                unit=Unit.COUNT,
+                values={2026: households},
+                is_count=True,
+                reference_url="https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024",
+            )
+        )
+        targets.append(
+            Target(
+                name=f"dwp/uc/two_child_limit/{num_children}_children_households_total_children",
+                variable="is_child",
+                source="dwp",
+                unit=Unit.COUNT,
+                values={2026: children},
+                is_count=True,
+                reference_url="https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024",
+            )
+        )
+
+    # Two-child limit by disability
+    targets.extend([
+        Target(
+            name="dwp/uc/two_child_limit/adult_pip_households",
+            variable="pip",
+            source="dwp",
+            unit=Unit.COUNT,
+            values={2026: 62_260},
+            is_count=True,
+            reference_url="https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024",
+        ),
+        Target(
+            name="dwp/uc/two_child_limit/adult_pip_children",
+            variable="is_child",
+            source="dwp",
+            unit=Unit.COUNT,
+            values={2026: 225_320},
+            is_count=True,
+            reference_url="https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024",
+        ),
+        Target(
+            name="dwp/uc/two_child_limit/disabled_child_element_households",
+            variable="uc_individual_disabled_child_element",
+            source="dwp",
+            unit=Unit.COUNT,
+            values={2026: 124_560},
+            is_count=True,
+            reference_url="https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024",
+        ),
+        Target(
+            name="dwp/uc/two_child_limit/disabled_child_element_children",
+            variable="is_child",
+            source="dwp",
+            unit=Unit.COUNT,
+            values={2026: 462_660},
+            is_count=True,
+            reference_url="https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024",
+        ),
+    ])
+
+    # UC national payment distribution from xlsx
+    targets.extend(_uc_payment_distribution_targets())
+
+    return targets
+
+
+def _uc_payment_distribution_targets() -> list[Target]:
+    """Parse UC payment distribution from xlsx into Target objects."""
+    from policyengine_uk_data.utils.uc_data import uc_national_payment_dist
+
+    targets = []
+    for _, row in uc_national_payment_dist.iterrows():
+        lower = row.uc_annual_payment_min
+        upper = row.uc_annual_payment_max
+        family_type = row.family_type
+        name = f"dwp/uc_payment_dist/{family_type}_annual_payment_{lower:_.0f}_to_{upper:_.0f}"
+        targets.append(
+            Target(
+                name=name,
+                variable="universal_credit",
+                source="dwp",
+                unit=Unit.COUNT,
+                values={2025: float(row.household_count)},
+                is_count=True,
+                breakdown_variable="universal_credit",
+                lower_bound=float(lower),
+                upper_bound=float(upper),
+                reference_url="https://stat-xplore.dwp.gov.uk/",
+            )
+        )
+    return targets
diff --git a/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py b/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py
new file mode 100644
index 00000000..a5f40c0d
--- /dev/null
+++ b/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py
@@ -0,0 +1,135 @@
+"""HMRC salary sacrifice income tax and NICs relief targets.
+
+Downloads Table 6.2 CSV from HMRC to get salary sacrifice IT relief
+by tax rate band and NICs relief (employee + employer).
+
+Source: https://assets.publishing.service.gov.uk/media/687a294e312ee8a5f0806b6d/Tables_6_1_and_6_2.csv
+"""
+
+import io
+import logging
+from pathlib import Path
+
+import pandas as pd
+import requests
+import yaml
+
+from policyengine_uk_data.targets.schema import Target, Unit
+
+logger = logging.getLogger(__name__)
+
+_SOURCES_YAML = Path(__file__).parent.parent / "sources.yaml"
+_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+        "AppleWebKit/537.36"
+    ),
+}
+
+# Uprate 3% pa for wage growth from the base year
+_GROWTH = 1.03
+_BASE_YEAR = 2024  # 2023-24 tax year → calendar 2024
+
+
+def _load_config():
+    with open(_SOURCES_YAML) as f:
+        return yaml.safe_load(f)
+
+
+def _to_float(val) -> float:
+    """Convert CSV value to float, handling suppressed '[z]' etc."""
+    if isinstance(val, (int, float)):
+        return float(val)
+    try:
+        return float(val)
+    except (ValueError, TypeError):
+        return 0.0
+
+
+def get_targets() -> list[Target]:
+    config = _load_config()
+    ref = config["hmrc"]["salary_sacrifice_table_6"]
+    targets = []
+
+    try:
+        r = requests.get(
+            ref, headers=_HEADERS, allow_redirects=True, timeout=30
+        )
+        r.raise_for_status()
+        df = pd.read_csv(io.StringIO(r.content.decode("utf-8-sig")))
+
+        ss = df[df["contribution_type"] == "Salary sacrificed contributions"]
+
+        # IT relief by tax band
+        ss_it = ss[
+            (ss["income_tax_nics"] == "Income Tax")
+            & (ss["sector_scheme"] == "Total")
+            & (ss["scheme_type"] == "Total")
+        ]
+        for _, row in ss_it.iterrows():
+            rate = row["tax_rate"]
+            val = _to_float(row["value_of_relief"])
+            if val <= 0:
+                continue
+            rate_key = rate.lower().replace(" ", "_")
+            base = val * 1e6
+            targets.append(
+                Target(
+                    name=f"hmrc/salary_sacrifice_it_relief_{rate_key}",
+                    variable="income_tax",
+                    source="hmrc",
+                    unit=Unit.GBP,
+                    values={
+                        y: base * _GROWTH ** max(0, y - _BASE_YEAR)
+                        for y in range(_BASE_YEAR, 2032)
+                    },
+                    reference_url=ref,
+                )
+            )
+
+        # NICs relief (employee + employer)
+        ss_nics = ss[
+            (ss["income_tax_nics"] == "NICs")
+            & (ss["sector_scheme"] == "Total")
+            & (ss["scheme_type"] == "Total")
+        ]
+        for _, row in ss_nics.iterrows():
+            nics_class = row["nics_relief_class"]
+            val = _to_float(row["value_of_relief"])
+            if val <= 0:
+                continue
+            if "employee" in str(nics_class).lower():
+                name = "hmrc/salary_sacrifice_employee_nics_relief"
+                variable = "ni_employee"
+            elif "employer" in str(nics_class).lower():
+                name = "hmrc/salary_sacrifice_employer_nics_relief"
+                variable = "ni_employer"
+            else:
+                continue
+
+            # Only take the first (Total scheme) row for each class
+            existing = {t.name for t in targets}
+            if name in existing:
+                continue
+
+            base = val * 1e6
+            targets.append(
+                Target(
+                    name=name,
+                    variable=variable,
+                    source="hmrc",
+                    unit=Unit.GBP,
+                    values={
+                        y: base * _GROWTH ** max(0, y - _BASE_YEAR)
+                        for y in range(_BASE_YEAR, 2032)
+                    },
+                    reference_url=ref,
+                )
+            )
+
+    except Exception as e:
+        logger.error(
+            "Failed to download/parse HMRC salary sacrifice CSV: %s", e
+        )
+
+    return targets
diff --git a/policyengine_uk_data/targets/sources/hmrc_spi.py b/policyengine_uk_data/targets/sources/hmrc_spi.py
new file mode 100644
index 00000000..b78540c3
--- /dev/null
+++ b/policyengine_uk_data/targets/sources/hmrc_spi.py
@@ -0,0 +1,297 @@
+"""HMRC Survey of Personal Incomes targets.
+
+Downloads and parses the SPI ODS (Tables 3.6 and 3.7) to get income
+distributions by total income band and income type for 2022-23.
+
+For future year projections, the microsimulation uprates these base
+year distributions forward using PolicyEngine's uprating factors.
+That projection logic is in utils/incomes_projection.py and is not
+part of the target download — it's a simulation step.
+
+Source: https://www.gov.uk/government/statistics/income-tax-summarised-accounts-statistics
+"""
+
+import io
+import logging
+from functools import lru_cache
+from pathlib import Path
+
+import pandas as pd
+import requests
+import yaml
+
+from policyengine_uk_data.targets.schema import Target, Unit
+
+logger = logging.getLogger(__name__)
+
+_SOURCES_YAML = Path(__file__).parent.parent / "sources.yaml"
+_STORAGE = Path(__file__).parents[2] / "storage"
+
+_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+        "AppleWebKit/537.36"
+    ),
+}
+
+# Income bands in the SPI tables (lower bounds)
+_BAND_LOWER = [
+    12_570,
+    15_000,
+    20_000,
+    30_000,
+    40_000,
+    50_000,
+    70_000,
+    100_000,
+    150_000,
+    200_000,
+    300_000,
+    500_000,
+    1_000_000,
+]
+_BAND_UPPER = _BAND_LOWER[1:] + [float("inf")]
+
+# SPI year: the ODS is for tax year 2022-23, mapped to calendar 2023
+_SPI_YEAR = 2023
+
+
+def _load_config():
+    with open(_SOURCES_YAML) as f:
+        return yaml.safe_load(f)
+
+
+@lru_cache(maxsize=1)
+def _download_ods(url: str) -> bytes:
+    """Download an ODS file."""
+    r = requests.get(url, headers=_HEADERS, allow_redirects=True, timeout=60)
+    r.raise_for_status()
+    return r.content
+
+
+def _parse_table_36(ods_bytes: bytes) -> pd.DataFrame:
+    """Parse Table 3.6: employment, self-employment, pensions by band.
+
+    Columns: lower_bound, self_employment_income_count/amount,
+    employment_income_count/amount, state_pension_count/amount,
+    private_pension_income_count/amount.
+    """
+    df = pd.read_excel(
+        io.BytesIO(ods_bytes),
+        sheet_name="Table_3_6",
+        engine="odf",
+        header=None,
+    )
+    # Data rows start at row 5, end before "All ranges"
+    data_rows = []
+    for i in range(5, len(df)):
+        lower = df.iloc[i, 0]
+        if not isinstance(lower, (int, float)):
+            break
+        data_rows.append(
+            {
+                "lower_bound": int(lower),
+                "self_employment_income_count": _to_float(df.iloc[i, 1]),
+                "self_employment_income_amount": _to_float(df.iloc[i, 2]),
+                "employment_income_count": _to_float(df.iloc[i, 4]),
+                "employment_income_amount": _to_float(df.iloc[i, 5]),
+                "state_pension_count": _to_float(df.iloc[i, 7]),
+                "state_pension_amount": _to_float(df.iloc[i, 8]),
+                "private_pension_income_count": _to_float(df.iloc[i, 10]),
+                "private_pension_income_amount": _to_float(df.iloc[i, 11]),
+            }
+        )
+    return pd.DataFrame(data_rows)
+
+
+def _parse_table_37(ods_bytes: bytes) -> pd.DataFrame:
+    """Parse Table 3.7: property, interest, dividends by band.
+
+    Columns: lower_bound, property_income_count/amount,
+    savings_interest_income_count/amount, dividend_income_count/amount.
+    """
+    df = pd.read_excel(
+        io.BytesIO(ods_bytes),
+        sheet_name="Table_3_7",
+        engine="odf",
+        header=None,
+    )
+    data_rows = []
+    for i in range(5, len(df)):
+        lower = df.iloc[i, 0]
+        if not isinstance(lower, (int, float)):
+            break
+        data_rows.append(
+            {
+                "lower_bound": int(lower),
+                "property_income_count": _to_float(df.iloc[i, 1]),
+                "property_income_amount": _to_float(df.iloc[i, 2]),
+                "savings_interest_income_count": _to_float(df.iloc[i, 4]),
+                "savings_interest_income_amount": _to_float(df.iloc[i, 5]),
+                "dividend_income_count": _to_float(df.iloc[i, 7]),
+                "dividend_income_amount": _to_float(df.iloc[i, 8]),
+            }
+        )
+    return pd.DataFrame(data_rows)
+
+
+def _to_float(val) -> float:
+    """Convert cell value to float, handling '[Not available]' etc."""
+    if isinstance(val, (int, float)):
+        return float(val)
+    return 0.0
+
+
+INCOME_VARIABLES = [
+    "employment_income",
+    "self_employment_income",
+    "state_pension",
+    "private_pension_income",
+    "property_income",
+    "dividend_income",
+]
+
+
+def get_targets() -> list[Target]:
+    """Build income-band targets from the live HMRC SPI ODS.
+
+    Also reads incomes_projection.csv if available, which contains
+    projected future year data generated by the microsimulation.
+    """
+    config = _load_config()
+    ref = config["hmrc"]["spi_collated"]
+    targets = []
+
+    # Parse base year from official ODS
+    try:
+        ods_bytes = _download_ods(ref)
+        t36 = _parse_table_36(ods_bytes)
+        t37 = _parse_table_37(ods_bytes)
+        merged = t36.merge(t37, on="lower_bound", how="outer")
+
+        for idx, row in merged.iterrows():
+            lower = int(row["lower_bound"])
+            upper = (
+                _BAND_UPPER[idx] if idx < len(_BAND_UPPER) else float("inf")
+            )
+            band_label = f"{lower:_}_to_{upper:_}"
+
+            for variable in INCOME_VARIABLES:
+                amount_col = f"{variable}_amount"
+                count_col = f"{variable}_count"
+
+                if amount_col in row.index and row[amount_col] > 0:
+                    # SPI amounts are in £millions, counts in thousands
+                    targets.append(
+                        Target(
+                            name=f"hmrc/{variable}_income_band_{band_label}",
+                            variable=variable,
+                            source="hmrc_spi",
+                            unit=Unit.GBP,
+                            values={
+                                _SPI_YEAR: float(row[amount_col]) * 1e6
+                            },
+                            breakdown_variable="total_income",
+                            lower_bound=float(lower),
+                            upper_bound=float(upper),
+                            reference_url=ref,
+                        )
+                    )
+
+                if count_col in row.index and row[count_col] > 0:
+                    targets.append(
+                        Target(
+                            name=(
+                                f"hmrc/{variable}_count_income_band"
+                                f"_{band_label}"
+                            ),
+                            variable=variable,
+                            source="hmrc_spi",
+                            unit=Unit.COUNT,
+                            values={
+                                _SPI_YEAR: float(row[count_col]) * 1e3
+                            },
+                            is_count=True,
+                            breakdown_variable="total_income",
+                            lower_bound=float(lower),
+                            upper_bound=float(upper),
+                            reference_url=ref,
+                        )
+                    )
+    except Exception as e:
+        logger.error("Failed to download/parse HMRC SPI ODS: %s", e)
+
+    # Also read projected future years from incomes_projection.csv
+    # if it exists (generated by utils/incomes_projection.py)
+    proj_path = _STORAGE / "incomes_projection.csv"
+    if proj_path.exists():
+        targets.extend(_read_projection_csv(proj_path, ref))
+
+    return targets
+
+
+def _read_projection_csv(
+    csv_path: Path, ref: str
+) -> list[Target]:
+    """Read projected future year targets from incomes_projection.csv."""
+    incomes = pd.read_csv(csv_path)
+    targets = []
+
+    for year in incomes.year.unique():
+        if year <= _SPI_YEAR:
+            continue  # Skip base year — we have actuals from ODS
+        year_df = incomes[incomes.year == year]
+
+        for _, row in year_df.iterrows():
+            lower = row.total_income_lower_bound
+            upper = row.total_income_upper_bound
+            band_label = f"{lower:_.0f}_to_{upper:_.0f}"
+
+            for variable in INCOME_VARIABLES:
+                amount_col = f"{variable}_amount"
+                count_col = f"{variable}_count"
+
+                if amount_col in row.index and pd.notna(row[amount_col]):
+                    name = f"hmrc/{variable}_income_band_{band_label}"
+                    targets.append(
+                        Target(
+                            name=name,
+                            variable=variable,
+                            source="hmrc_spi",
+                            unit=Unit.GBP,
+                            values={int(year): float(row[amount_col])},
+                            breakdown_variable="total_income",
+                            lower_bound=float(lower),
+                            upper_bound=float(upper),
+                            reference_url=ref,
+                        )
+                    )
+
+                if count_col in row.index and pd.notna(row[count_col]):
+                    name = (
+                        f"hmrc/{variable}_count_income_band_{band_label}"
+                    )
+                    targets.append(
+                        Target(
+                            name=name,
+                            variable=variable,
+                            source="hmrc_spi",
+                            unit=Unit.COUNT,
+                            values={int(year): float(row[count_col])},
+                            is_count=True,
+                            breakdown_variable="total_income",
+                            lower_bound=float(lower),
+                            upper_bound=float(upper),
+                            reference_url=ref,
+                        )
+                    )
+
+    # Merge targets with the same name across years
+    merged: dict[str, Target] = {}
+    for t in targets:
+        if t.name in merged:
+            merged[t.name].values.update(t.values)
+        else:
+            merged[t.name] = t
+
+    return list(merged.values())
diff --git a/policyengine_uk_data/targets/sources/housing.py b/policyengine_uk_data/targets/sources/housing.py
new file mode 100644
index 00000000..98cff99b
--- /dev/null
+++ b/policyengine_uk_data/targets/sources/housing.py
@@ -0,0 +1,37 @@
+"""Housing affordability targets.
+
+Total mortgage payments and private rent from ONS/English Housing Survey.
+
+Sources:
+- ONS PRHI: https://www.ons.gov.uk/economy/inflationandpriceindices/bulletins/privaterentandhousepricesuk/january2025
+- English Housing Survey mortgage data
+"""
+
+from policyengine_uk_data.targets.schema import Target, Unit
+
+# Estimated total annual housing costs (£)
+# Private rent: avg £1,400/month × 12 × 4.7m private renters
+# Mortgage: avg £1,100/month × 12 × 7.5m owner-occupiers with mortgage
+_PRIVATE_RENT_TOTAL = 1_400 * 12 * 4.7e6
+_MORTGAGE_TOTAL = 1_100 * 12 * 7.5e6
+
+
+def get_targets() -> list[Target]:
+    return [
+        Target(
+            name="housing/total_mortgage",
+            variable="mortgage_capital_repayment",
+            source="ons",
+            unit=Unit.GBP,
+            values={2025: _MORTGAGE_TOTAL},
+            reference_url="https://www.ons.gov.uk/economy/inflationandpriceindices/bulletins/privaterentandhousepricesuk/january2025",
+        ),
+        Target(
+            name="housing/rent_private",
+            variable="rent",
+            source="ons",
+            unit=Unit.GBP,
+            values={2025: _PRIVATE_RENT_TOTAL},
+            reference_url="https://www.ons.gov.uk/economy/inflationandpriceindices/bulletins/privaterentandhousepricesuk/january2025",
+        ),
+    ]
diff --git a/policyengine_uk_data/targets/sources/local_age.py b/policyengine_uk_data/targets/sources/local_age.py
new file mode 100644
index 00000000..2276c173
--- /dev/null
+++ b/policyengine_uk_data/targets/sources/local_age.py
@@ -0,0 +1,100 @@
+"""Local area age band targets from ONS subnational population estimates.
+
+Reads pre-processed age CSV files for constituencies and local authorities,
+aggregates single-year ages into 10-year bands, and applies boundary
+change mapping (2010→2024) for constituencies.
+
+Source: ONS mid-year population estimates
+https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates
+"""
+
+import logging
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+_CONST_DIR = (
+    Path(__file__).parents[2]
+    / "datasets"
+    / "local_areas"
+    / "constituencies"
+    / "targets"
+)
+_LA_DIR = (
+    Path(__file__).parents[2]
+    / "datasets"
+    / "local_areas"
+    / "local_authorities"
+    / "targets"
+)
+_STORAGE = Path(__file__).parents[2] / "storage"
+
+_REF = (
+    "https://www.ons.gov.uk/peoplepopulationandcommunity/"
+    "populationandmigration/populationestimates"
+)
+
+_AGE_BANDS = list(range(0, 80, 10))  # [0, 10, 20, ..., 70]
+
+
+def _load_age_csv(path: Path) -> pd.DataFrame:
+    """Load age.csv, returning code + single-year columns."""
+    if not path.exists():
+        logger.warning("Age CSV not found: %s", path)
+        return pd.DataFrame()
+    return pd.read_csv(path)
+
+
+def _aggregate_to_bands(ages: pd.DataFrame) -> pd.DataFrame:
+    """Sum single-year ages into 10-year bands.
+
+    Returns DataFrame with columns: code, name, age/0_10, age/10_20, etc.
+    """
+    result = ages[["code", "name"]].copy()
+    for lower in _AGE_BANDS:
+        upper = lower + 10
+        cols = [str(a) for a in range(lower, upper) if str(a) in ages.columns]
+        result[f"age/{lower}_{upper}"] = ages[cols].sum(axis=1)
+    return result
+
+
+def get_constituency_age_targets() -> pd.DataFrame:
+    """Age targets for 650 constituencies (2010 boundary codes).
+
+    Returns DataFrame with 650 rows × (code, name, age/0_10, ..., age/70_80).
+    Caller must apply mapping_matrix to transform to 2024 boundaries.
+    """
+    ages = _load_age_csv(_CONST_DIR / "age.csv")
+    if ages.empty:
+        return ages
+    return _aggregate_to_bands(ages)
+
+
+def get_la_age_targets() -> pd.DataFrame:
+    """Age targets for 360 local authorities.
+
+    Returns DataFrame with 360 rows × (code, name, age/0_10, ..., age/70_80).
+    """
+    ages = _load_age_csv(_LA_DIR / "age.csv")
+    if ages.empty:
+        return ages
+    return _aggregate_to_bands(ages)
+
+
+def get_uk_total_population(year: int) -> float:
+    """UK total population from demographics.csv (in persons, not thousands)."""
+    csv_path = _STORAGE / "demographics.csv"
+    if not csv_path.exists():
+        return 69.9e6  # fallback
+    demographics = pd.read_csv(csv_path)
+    row = demographics[demographics.name == "uk_population"]
+    col = str(year)
+    if col in row.columns and not row[col].isna().all():
+        return float(row[col].values[0]) * 1e6
+    return 69.9e6
+
+
+REFERENCE_URL = _REF
diff --git a/policyengine_uk_data/targets/sources/local_income.py b/policyengine_uk_data/targets/sources/local_income.py
new file mode 100644
index 00000000..1e418313
--- /dev/null
+++ b/policyengine_uk_data/targets/sources/local_income.py
@@ -0,0 +1,96 @@
+"""Local area income targets from HMRC SPI table 3.15.
+
+Reads pre-processed SPI CSV files for constituencies and local authorities,
+extracting employment and self-employment income (count + amount) per area.
+
+National consistency adjustment (scaling local totals to match national SPI
+projections) is applied by the caller, not here.
+
+Source: HMRC self-assessment and PAYE statistics
+https://www.gov.uk/government/statistics/income-and-tax-by-county-and-region-and-by-parliamentary-constituency
+"""
+
+import logging
+from pathlib import Path
+
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+_CONST_DIR = (
+    Path(__file__).parents[2]
+    / "datasets"
+    / "local_areas"
+    / "constituencies"
+    / "targets"
+)
+_LA_DIR = (
+    Path(__file__).parents[2]
+    / "datasets"
+    / "local_areas"
+    / "local_authorities"
+    / "targets"
+)
+_STORAGE = Path(__file__).parents[2] / "storage"
+
+_REF = (
+    "https://www.gov.uk/government/statistics/"
+    "income-and-tax-by-county-and-region-and-by-parliamentary-constituency"
+)
+
+_INCOME_VARIABLES = ["self_employment_income", "employment_income"]
+
+
+def _load_spi(path: Path) -> pd.DataFrame:
+    if not path.exists():
+        logger.warning("SPI CSV not found: %s", path)
+        return pd.DataFrame()
+    return pd.read_csv(path)
+
+
+def get_constituency_income_targets() -> pd.DataFrame:
+    """Income targets for 650 constituencies (2010 codes).
+
+    Returns DataFrame with columns: code, name, and for each income
+    variable: {var}_count, {var}_amount.
+    """
+    spi = _load_spi(_CONST_DIR / "spi_by_constituency.csv")
+    if spi.empty:
+        return spi
+    cols = ["code", "name"]
+    for v in _INCOME_VARIABLES:
+        cols.extend([f"{v}_count", f"{v}_amount"])
+    return spi[cols]
+
+
+def get_la_income_targets() -> pd.DataFrame:
+    """Income targets for 360 local authorities.
+
+    Returns DataFrame with columns: code, name, and for each income
+    variable: {var}_count, {var}_amount.
+    """
+    spi = _load_spi(_LA_DIR / "spi_by_la.csv")
+    if spi.empty:
+        return spi
+    cols = ["code", "name"]
+    for v in _INCOME_VARIABLES:
+        cols.extend([f"{v}_count", f"{v}_amount"])
+    return spi[cols]
+
+
+def get_national_income_projections(year: int) -> pd.DataFrame:
+    """National income projections for consistency adjustment.
+
+    Returns the incomes_projection.csv rows for the requested year,
+    filtered to the above-personal-allowance band (12570+).
+    """
+    path = _STORAGE / "incomes_projection.csv"
+    if not path.exists():
+        return pd.DataFrame()
+    df = pd.read_csv(path)
+    df = df[df.year == max(df.year.min(), year)]
+    return df
+
+
+INCOME_VARIABLES = _INCOME_VARIABLES
+REFERENCE_URL = _REF
diff --git a/policyengine_uk_data/targets/sources/local_la_extras.py b/policyengine_uk_data/targets/sources/local_la_extras.py
new file mode 100644
index 00000000..c2a9e7d8
--- /dev/null
+++ b/policyengine_uk_data/targets/sources/local_la_extras.py
@@ -0,0 +1,129 @@
+"""Local authority extra targets: ONS income, tenure, private rent.
+
+These targets are only available at LA level (not constituency).
+
+Sources:
+- ONS small area income estimates:
+  https://www.ons.gov.uk/employmentandlabourmarket/peopleinwork/earningsandworkinghours/datasets/smallareaincomeestimatesformiddlelayersuperoutputareasenglandandwales
+- English Housing Survey tenure:
+  https://www.gov.uk/government/statistics/english-housing-survey-2023
+- VOA private rental market statistics:
+  https://www.ons.gov.uk/peoplepopulationandcommunity/housing/datasets/privaterentalmarketsummarystatisticsinengland
+"""
+
+import logging
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+_STORAGE = Path(__file__).parents[2] / "storage"
+
+# Uprating factors from FYE 2020 to 2025 (OBR Nov 2025 EFO)
+UPRATING_NET_INCOME_BHC_2020_TO_2025 = 1985.1 / 1467.6
+UPRATING_HOUSING_COSTS_2020_TO_2025 = 103.5 / 84.9
+
+_REF_ONS_INCOME = (
+    "https://www.ons.gov.uk/employmentandlabourmarket/peopleinwork/"
+    "earningsandworkinghours/datasets/"
+    "smallareaincomeestimatesformiddlelayersuperoutputareasenglandandwales"
+)
+_REF_TENURE = "https://www.gov.uk/government/statistics/english-housing-survey-2023"
+_REF_RENT = (
+    "https://www.ons.gov.uk/peoplepopulationandcommunity/housing/datasets/"
+    "privaterentalmarketsummarystatisticsinengland"
+)
+
+
+def load_ons_la_income() -> pd.DataFrame:
+    """Load ONS income estimates by local authority.
+
+    Returns DataFrame with columns: la_code, total_income, net_income_bhc,
+    net_income_ahc (mean income per household, FYE 2020).
+    """
+    xlsx_path = _STORAGE / "local_authority_ons_income.xlsx"
+    if not xlsx_path.exists():
+        logger.warning("ONS LA income file not found: %s", xlsx_path)
+        return pd.DataFrame()
+
+    xlsx = pd.ExcelFile(xlsx_path)
+
+    def load_sheet(sheet_name: str, value_col: str) -> pd.DataFrame:
+        df = pd.read_excel(xlsx, sheet_name=sheet_name, header=3)
+        df.columns = [
+            "msoa_code", "msoa_name", "la_code", "la_name",
+            "region_code", "region_name", value_col,
+            "upper_ci", "lower_ci", "ci_width",
+        ]
+        df = df.iloc[1:].dropna(subset=["msoa_code"])
+        df[value_col] = pd.to_numeric(df[value_col])
+        return df[["la_code", value_col]]
+
+    total = load_sheet("Total annual income", "total_income")
+    bhc = load_sheet("Net income before housing costs", "net_income_bhc")
+    ahc = load_sheet("Net income after housing costs", "net_income_ahc")
+
+    la_total = total.groupby("la_code")["total_income"].mean().reset_index()
+    la_bhc = bhc.groupby("la_code")["net_income_bhc"].mean().reset_index()
+    la_ahc = ahc.groupby("la_code")["net_income_ahc"].mean().reset_index()
+
+    return la_total.merge(la_bhc, on="la_code").merge(la_ahc, on="la_code")
+
+
+def load_household_counts() -> pd.DataFrame:
+    """Load household counts by LA (Census 2021).
+
+    Returns DataFrame with columns: la_code, households.
+    """
+    path = _STORAGE / "la_count_households.xlsx"
+    if not path.exists():
+        logger.warning("LA household count file not found: %s", path)
+        return pd.DataFrame()
+    df = pd.read_excel(path, sheet_name="Dataset")
+    df.columns = ["la_code", "la_name", "households"]
+    return df[["la_code", "households"]]
+
+
+def load_tenure_data() -> pd.DataFrame:
+    """Load tenure percentages by LA.
+
+    Returns DataFrame with columns: la_code, owned_outright_pct,
+    owned_mortgage_pct, private_rent_pct, social_rent_pct.
+    """
+    path = _STORAGE / "la_tenure.xlsx"
+    if not path.exists():
+        logger.warning("LA tenure file not found: %s", path)
+        return pd.DataFrame()
+    df = pd.read_excel(path, sheet_name="data download")
+    df.columns = [
+        "region_code", "region_name", "la_code", "la_name",
+        "owned_outright_pct", "owned_mortgage_pct",
+        "private_rent_pct", "social_rent_pct",
+    ]
+    return df[["la_code", "owned_outright_pct", "owned_mortgage_pct",
+               "private_rent_pct", "social_rent_pct"]]
+
+
+def load_private_rents() -> pd.DataFrame:
+    """Load median monthly private rents by LA.
+
+    Returns DataFrame with columns: area_code, median_annual_rent.
+    """
+    path = _STORAGE / "la_private_rents_median.xlsx"
+    if not path.exists():
+        logger.warning("LA private rent file not found: %s", path)
+        return pd.DataFrame()
+    df = pd.read_excel(path, sheet_name="Figure 3", header=5)
+    df.columns = [
+        "col0", "la_code_old", "area_code", "area_name", "room",
+        "studio", "one_bed", "two_bed", "three_bed", "four_plus",
+        "median_monthly_rent",
+    ]
+    df = df[df["area_code"].astype(str).str.match(r"^E0[6789]")]
+    df["median_monthly_rent"] = pd.to_numeric(
+        df["median_monthly_rent"], errors="coerce"
+    )
+    df["median_annual_rent"] = df["median_monthly_rent"] * 12
+    return df[["area_code", "median_annual_rent"]]
diff --git a/policyengine_uk_data/targets/sources/local_uc.py b/policyengine_uk_data/targets/sources/local_uc.py
new file mode 100644
index 00000000..c326498d
--- /dev/null
+++ b/policyengine_uk_data/targets/sources/local_uc.py
@@ -0,0 +1,42 @@
+"""Local area UC household targets from DWP Stat-Xplore.
+
+UC household counts by parliamentary constituency and local authority,
+loaded from pre-downloaded Stat-Xplore exports and scaled to match
+national UC payment distribution totals.
+
+Source: DWP Stat-Xplore
+https://stat-xplore.dwp.gov.uk
+"""
+
+import logging
+
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+_REF = "https://stat-xplore.dwp.gov.uk"
+
+
+def get_constituency_uc_targets() -> pd.Series:
+    """UC household counts for 650 constituencies (positional order).
+
+    Returns Series of household_count values, aligned to the same
+    ordering as the constituency age.csv.
+    """
+    from policyengine_uk_data.utils.uc_data import uc_pc_households
+
+    return uc_pc_households.household_count
+
+
+def get_la_uc_targets() -> pd.Series:
+    """UC household counts for 360 local authorities (positional order).
+
+    Returns Series of household_count values, aligned to the same
+    ordering as the LA age.csv.
+    """
+    from policyengine_uk_data.utils.uc_data import uc_la_households
+
+    return uc_la_households.household_count
+
+
+REFERENCE_URL = _REF
diff --git a/policyengine_uk_data/targets/sources/nts_vehicles.py b/policyengine_uk_data/targets/sources/nts_vehicles.py
new file mode 100644
index 00000000..18fd8df8
--- /dev/null
+++ b/policyengine_uk_data/targets/sources/nts_vehicles.py
@@ -0,0 +1,49 @@
+"""NTS vehicle ownership targets.
+
+From the National Travel Survey 2024.
+Source: https://www.gov.uk/government/statistics/national-travel-survey-2024
+"""
+
+from policyengine_uk_data.targets.schema import Target, Unit
+
+_REF = "https://www.gov.uk/government/statistics/national-travel-survey-2024"
+
+# NTS 2024: 22% no car, 44% one car, 34% two+ cars
+NTS_NO_VEHICLE_RATE = 0.22
+NTS_ONE_VEHICLE_RATE = 0.44
+NTS_TWO_PLUS_VEHICLE_RATE = 0.34
+
+# ~29.6m total UK households (from VOA/ONS council tax stock 2024)
+_TOTAL_HOUSEHOLDS = 29.6e6
+
+
+def get_targets() -> list[Target]:
+    return [
+        Target(
+            name="nts/households_no_vehicle",
+            variable="num_vehicles",
+            source="nts",
+            unit=Unit.COUNT,
+            values={2024: _TOTAL_HOUSEHOLDS * NTS_NO_VEHICLE_RATE},
+            is_count=True,
+            reference_url=_REF,
+        ),
+        Target(
+            name="nts/households_one_vehicle",
+            variable="num_vehicles",
+            source="nts",
+            unit=Unit.COUNT,
+            values={2024: _TOTAL_HOUSEHOLDS * NTS_ONE_VEHICLE_RATE},
+            is_count=True,
+            reference_url=_REF,
+        ),
+        Target(
+            name="nts/households_two_plus_vehicles",
+            variable="num_vehicles",
+            source="nts",
+            unit=Unit.COUNT,
+            values={2024: _TOTAL_HOUSEHOLDS * NTS_TWO_PLUS_VEHICLE_RATE},
+            is_count=True,
+            reference_url=_REF,
+        ),
+    ]
diff --git a/policyengine_uk_data/targets/sources/obr.py b/policyengine_uk_data/targets/sources/obr.py
new file mode 100644
index 00000000..6867eb1f
--- /dev/null
+++ b/policyengine_uk_data/targets/sources/obr.py
@@ -0,0 +1,504 @@
+"""OBR Economic and Fiscal Outlook targets.
+
+Downloads and parses the OBR's detailed supplementary tables (receipts
+and expenditure xlsx) to extract tax receipt forecasts, benefit
+expenditure, and benefit caseloads.
+
+Sources:
+- Receipts: https://obr.uk/download/november-2025-economic-and-fiscal-outlook-detailed-forecast-tables-receipts/
+- Expenditure: https://obr.uk/download/november-2025-economic-and-fiscal-outlook-detailed-forecast-tables-expenditure/
+"""
+
+import io
+import logging
+from functools import lru_cache
+from pathlib import Path
+
+import openpyxl
+import requests
+import yaml
+
+from policyengine_uk_data.targets.schema import Target, Unit
+
+logger = logging.getLogger(__name__)
+
+_SOURCES_YAML = Path(__file__).parent.parent / "sources.yaml"
+
+# Financial year columns in OBR tables: C=2024-25, D=2025-26, ..., I=2030-31
+# PolicyEngine convention: FY 2025-26 → calendar year 2025 (first year)
+_FY_COL_TO_YEAR = {
+    "C": 2024,
+    "D": 2025,
+    "E": 2026,
+    "F": 2027,
+    "G": 2028,
+    "H": 2029,
+    "I": 2030,
+}
+
+_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+        "AppleWebKit/537.36"
+    ),
+}
+
+
+def _load_config():
+    with open(_SOURCES_YAML) as f:
+        return yaml.safe_load(f)
+
+
+@lru_cache(maxsize=1)
+def _download_workbook(url: str) -> openpyxl.Workbook:
+    """Download an xlsx from OBR and return an openpyxl workbook."""
+    r = requests.get(url, headers=_HEADERS, allow_redirects=True, timeout=60)
+    r.raise_for_status()
+    return openpyxl.load_workbook(io.BytesIO(r.content), data_only=False)
+
+
+def _read_row_values(
+    ws, row_num: int, col_letters: list[str]
+) -> dict[int, float]:
+    """Read numeric values from a row, mapped to calendar years."""
+    result = {}
+    for col in col_letters:
+        cell = ws[f"{col}{row_num}"]
+        val = cell.value
+        if val is not None and isinstance(val, (int, float)):
+            result[_FY_COL_TO_YEAR[col]] = float(val) * 1e9
+    return result
+
+
+def _find_row(ws, label: str, col: str = "B", max_row: int = 80) -> int:
+    """Find the row number where a cell starts with label."""
+    for row in range(1, max_row + 1):
+        cell_val = ws[f"{col}{row}"].value
+        if cell_val and str(cell_val).strip().startswith(label):
+            return row
+    raise ValueError(f"Row '{label}' not found in sheet")
+
+
+def _parse_receipts(wb: openpyxl.Workbook) -> list[Target]:
+    """Parse tax receipts from the OBR EFO.
+
+    Income tax uses Table 3.4 (accrued basis) for consistency with
+    the standard fiscal forecasting convention. Other receipts use
+    Table 3.9 (cash basis) since they only appear there.
+    """
+    config = _load_config()
+    vintage = config["obr"]["vintage"]
+    ref = config["obr"]["efo_receipts"]
+    cols_34 = list(_FY_COL_TO_YEAR.keys())
+
+    # Table 3.9 columns are shifted right by one vs 3.4
+    cols_39 = ["D", "E", "F", "G", "H", "I", "J"]
+    fy_39 = {
+        "D": 2024,
+        "E": 2025,
+        "F": 2026,
+        "G": 2027,
+        "H": 2028,
+        "I": 2029,
+        "J": 2030,
+    }
+
+    def read_39(ws, row_num: int) -> dict[int, float]:
+        result = {}
+        for col in cols_39:
+            cell = ws[f"{col}{row_num}"]
+            val = cell.value
+            if val is not None and isinstance(val, (int, float)):
+                result[fy_39[col]] = float(val) * 1e9
+        return result
+
+    targets = []
+
+    # Income tax from Table 3.4 (accrued basis)
+    try:
+        ws34 = wb["3.4"]
+        row_num = _find_row(ws34, "Income tax (gross of tax credits)", col="B", max_row=30)
+        values = _read_row_values(ws34, row_num, cols_34)
+        if values:
+            targets.append(
+                Target(
+                    name="obr/income_tax",
+                    variable="income_tax",
+                    source="obr",
+                    unit=Unit.GBP,
+                    values=values,
+                    reference_url=ref,
+                    forecast_vintage=vintage,
+                )
+            )
+    except ValueError:
+        logger.warning("OBR receipts: income tax row not found in 3.4")
+
+    # Other receipts from Table 3.9 (cash basis)
+    ws39 = wb["3.9"]
+    cash_rows = {
+        "ni": ("National insurance contributions", "ni_employee"),
+        "vat": ("Value added tax", "vat"),
+        "fuel_duties": ("Fuel duties", "fuel_duty"),
+        "capital_gains_tax": ("Capital gains tax", "capital_gains_tax"),
+        "sdlt": ("Stamp duty land tax", "stamp_duty_land_tax"),
+    }
+
+    for name, (label, variable) in cash_rows.items():
+        try:
+            row_num = _find_row(ws39, label, col="B", max_row=80)
+            values = read_39(ws39, row_num)
+            if values:
+                targets.append(
+                    Target(
+                        name=f"obr/{name}",
+                        variable=variable,
+                        source="obr",
+                        unit=Unit.GBP,
+                        values=values,
+                        reference_url=ref,
+                        forecast_vintage=vintage,
+                    )
+                )
+        except ValueError:
+            logger.warning("OBR receipts: row '%s' not found", label)
+
+    return targets
+
+
+def _parse_council_tax(wb: openpyxl.Workbook) -> list[Target]:
+    """Parse Table 4.1 (council tax receipts) from expenditure xlsx."""
+    config = _load_config()
+    vintage = config["obr"]["vintage"]
+    ref = config["obr"]["efo_expenditure"]
+    ws = wb["4.1"]
+
+    cols = ["C", "D", "E", "F", "G", "H", "I"]
+    fy = {
+        "C": 2024,
+        "D": 2025,
+        "E": 2026,
+        "F": 2027,
+        "G": 2028,
+        "H": 2029,
+        "I": 2030,
+    }
+
+    def read_41(row_num: int) -> dict[int, float]:
+        result = {}
+        for col in cols:
+            cell = ws[f"{col}{row_num}"]
+            val = cell.value
+            if val is not None and isinstance(val, (int, float)):
+                result[fy[col]] = float(val) * 1e9
+        return result
+
+    ct_rows = {
+        "council_tax": ("Total net council tax receipts", "council_tax"),
+        "council_tax_england": (
+            "England council tax receipts",
+            "council_tax",
+        ),
+        "council_tax_scotland": (
+            "Scotland council tax receipts",
+            "council_tax",
+        ),
+        "council_tax_wales": ("Wales council tax receipts", "council_tax"),
+        "domestic_rates": ("NI domestic rates", "domestic_rates"),
+    }
+
+    targets = []
+    for name, (label, variable) in ct_rows.items():
+        try:
+            row_num = _find_row(ws, label, col="B", max_row=30)
+            values = read_41(row_num)
+            if values:
+                targets.append(
+                    Target(
+                        name=f"obr/{name}",
+                        variable=variable,
+                        source="obr",
+                        unit=Unit.GBP,
+                        values=values,
+                        reference_url=ref,
+                        forecast_vintage=vintage,
+                    )
+                )
+        except ValueError:
+            logger.warning("OBR council tax: row '%s' not found", label)
+
+    return targets
+
+
+def _parse_nics(wb: openpyxl.Workbook) -> list[Target]:
+    """Parse Table 3.4 (income tax and NICs detail) for employee/employer."""
+    config = _load_config()
+    vintage = config["obr"]["vintage"]
+    ref = config["obr"]["efo_receipts"]
+    ws = wb["3.4"]
+    cols = list(_FY_COL_TO_YEAR.keys())
+
+    nic_rows = {
+        "ni_employee": (
+            "Class 1 Employee NICs",
+            "ni_employee",
+        ),
+        "ni_employer": (
+            "Class 1 Employer NICs",
+            "ni_employer",
+        ),
+    }
+
+    targets = []
+    for name, (label, variable) in nic_rows.items():
+        try:
+            row_num = _find_row(ws, label, col="B", max_row=30)
+            values = _read_row_values(ws, row_num, cols)
+            if values:
+                targets.append(
+                    Target(
+                        name=f"obr/{name}",
+                        variable=variable,
+                        source="obr",
+                        unit=Unit.GBP,
+                        values=values,
+                        reference_url=ref,
+                        forecast_vintage=vintage,
+                    )
+                )
+        except ValueError:
+            logger.warning("OBR NICs: row '%s' not found", label)
+
+    return targets
+
+
+def _parse_welfare(wb: openpyxl.Workbook) -> list[Target]:
+    """Parse Table 4.9 (welfare spending) from expenditure xlsx."""
+    config = _load_config()
+    vintage = config["obr"]["vintage"]
+    ref = config["obr"]["efo_expenditure"]
+    ws = wb["4.9"]
+
+    cols = ["C", "D", "E", "F", "G", "H", "I"]
+    fy = {
+        "C": 2024,
+        "D": 2025,
+        "E": 2026,
+        "F": 2027,
+        "G": 2028,
+        "H": 2029,
+        "I": 2030,
+    }
+
+    def read_49(row_num: int) -> dict[int, float]:
+        result = {}
+        for col in cols:
+            cell = ws[f"{col}{row_num}"]
+            val = cell.value
+            if val is not None and isinstance(val, (int, float)):
+                result[fy[col]] = float(val) * 1e9
+        return result
+
+    benefit_rows = {
+        "housing_benefit": (
+            "Housing benefit (not on JSA)",
+            "housing_benefit",
+        ),
+        "pip": (
+            "Disability living allowance and personal independence p",
+            "pip",
+        ),
+        "esa": ("Incapacity benefits", "esa_income"),
+        "attendance_allowance": (
+            "Attendance allowance",
+            "attendance_allowance",
+        ),
+        "pension_credit": ("Pension credit", "pension_credit"),
+        "carers_allowance": ("Carer's allowance", "carers_allowance"),
+        "statutory_maternity_pay": (
+            "Statutory maternity pay",
+            "statutory_maternity_pay",
+        ),
+        "winter_fuel_allowance": (
+            "Winter fuel payment",
+            "winter_fuel_allowance",
+        ),
+        "universal_credit_in_cap": (
+            "Universal credit",
+            "universal_credit",
+        ),
+        "child_benefit": ("Child benefit", "child_benefit"),
+        "state_pension": ("State pension", "state_pension"),
+        "jobseekers_allowance": (
+            "Jobseeker's allowance",
+            "jsa_income",
+        ),
+    }
+
+    targets = []
+    # Welfare cap section (rows 6-36)
+    for name, (label, variable) in benefit_rows.items():
+        try:
+            row_num = _find_row(ws, label, col="B", max_row=55)
+            values = read_49(row_num)
+            if values:
+                targets.append(
+                    Target(
+                        name=f"obr/{name}",
+                        variable=variable,
+                        source="obr",
+                        unit=Unit.GBP,
+                        values=values,
+                        reference_url=ref,
+                        forecast_vintage=vintage,
+                    )
+                )
+        except ValueError:
+            logger.warning("OBR welfare: row '%s' not found", label)
+
+    # Universal credit outside cap (row 43) is jobseekers UC
+    try:
+        # UC outside cap = predominantly JSA-conditionality UC
+        uc_outside_row = _find_row(
+            ws, "Universal credit", col="B", max_row=55
+        )
+        # Find the second UC row (outside cap section)
+        for row in range(uc_outside_row + 1, 55):
+            cell_val = ws[f"B{row}"].value
+            if cell_val and str(cell_val).strip().startswith(
+                "Universal credit"
+            ):
+                values = read_49(row)
+                if values:
+                    targets.append(
+                        Target(
+                            name="obr/universal_credit_outside_cap",
+                            variable="universal_credit",
+                            source="obr",
+                            unit=Unit.GBP,
+                            values=values,
+                            reference_url=ref,
+                            forecast_vintage=vintage,
+                        )
+                    )
+                break
+    except ValueError:
+        logger.warning("OBR welfare: UC outside cap not found")
+
+    return targets
+
+
+def _parse_tv_licence(wb: openpyxl.Workbook) -> list[Target]:
+    """Parse Table 4.19 (BBC) from expenditure xlsx."""
+    config = _load_config()
+    vintage = config["obr"]["vintage"]
+    ref = config["obr"]["efo_expenditure"]
+
+    try:
+        ws = wb["4.19"]
+        cols = ["C", "D", "E", "F", "G", "H", "I"]
+        fy = {
+            "C": 2024,
+            "D": 2025,
+            "E": 2026,
+            "F": 2027,
+            "G": 2028,
+            "H": 2029,
+            "I": 2030,
+        }
+
+        # Find "Licence fee receipts" or "BBC licence fee"
+        for row_num in range(1, 30):
+            val = ws[f"B{row_num}"].value
+            if val and "licence fee" in str(val).lower():
+                values = {}
+                for col in cols:
+                    cell = ws[f"{col}{row_num}"]
+                    v = cell.value
+                    if v is not None and isinstance(v, (int, float)):
+                        values[fy[col]] = float(v) * 1e9
+                if values:
+                    return [
+                        Target(
+                            name="obr/tv_licence_fee",
+                            variable="tv_licence",
+                            source="obr",
+                            unit=Unit.GBP,
+                            values=values,
+                            reference_url=ref,
+                            forecast_vintage=vintage,
+                        )
+                    ]
+    except Exception:
+        logger.warning("OBR: TV licence table not found")
+
+    return []
+
+
+# ISC census: private school students (roughly constant at ~557k)
+_PRIVATE_SCHOOL = {y: 557_000 for y in range(2018, 2032)}
+
+# SPP Review: salary sacrifice NI relief (uprated 3% pa from 2024 base)
+_SS_EMPLOYEE_NI = {
+    y: 1.2e9 * 1.03 ** max(0, y - 2024) for y in range(2024, 2032)
+}
+_SS_EMPLOYER_NI = {
+    y: 2.9e9 * 1.03 ** max(0, y - 2024) for y in range(2024, 2032)
+}
+
+
+def get_targets() -> list[Target]:
+    config = _load_config()
+    targets = []
+
+    try:
+        receipts_wb = _download_workbook(config["obr"]["efo_receipts"])
+        targets.extend(_parse_receipts(receipts_wb))
+        targets.extend(_parse_nics(receipts_wb))
+    except Exception as e:
+        logger.error("Failed to download/parse OBR receipts: %s", e)
+
+    try:
+        expenditure_wb = _download_workbook(
+            config["obr"]["efo_expenditure"]
+        )
+        targets.extend(_parse_council_tax(expenditure_wb))
+        targets.extend(_parse_welfare(expenditure_wb))
+        targets.extend(_parse_tv_licence(expenditure_wb))
+    except Exception as e:
+        logger.error("Failed to download/parse OBR expenditure: %s", e)
+
+    # Static targets that don't come from the xlsx
+    targets.append(
+        Target(
+            name="obr/private_school_students",
+            variable="attends_private_school",
+            source="obr",
+            unit=Unit.COUNT,
+            values=_PRIVATE_SCHOOL,
+            is_count=True,
+            reference_url="https://www.isc.co.uk/research/annual-census/",
+        )
+    )
+    targets.append(
+        Target(
+            name="obr/salary_sacrifice_employee_ni_relief",
+            variable="ni_employee",
+            source="obr",
+            unit=Unit.GBP,
+            values=_SS_EMPLOYEE_NI,
+            reference_url="https://assets.publishing.service.gov.uk/media/67ce0e7c08e764d17a5d3c21/2025_SPP_Review.pdf",
+        )
+    )
+    targets.append(
+        Target(
+            name="obr/salary_sacrifice_employer_ni_relief",
+            variable="ni_employer",
+            source="obr",
+            unit=Unit.GBP,
+            values=_SS_EMPLOYER_NI,
+            reference_url="https://assets.publishing.service.gov.uk/media/67ce0e7c08e764d17a5d3c21/2025_SPP_Review.pdf",
+        )
+    )
+
+    return targets
diff --git a/policyengine_uk_data/targets/sources/ons_demographics.py b/policyengine_uk_data/targets/sources/ons_demographics.py
new file mode 100644
index 00000000..1cf0a23b
--- /dev/null
+++ b/policyengine_uk_data/targets/sources/ons_demographics.py
@@ -0,0 +1,331 @@
+"""ONS population projections and demographic targets.
+
+Downloads the ONS 2022-based principal population projection for the
+UK to extract total population and gender × age band targets.
+
+For regional age breakdowns (12 regions × 9 age bands), reads the
+pre-existing demographics.csv which was extracted from ONS subnational
+projections. The subnational projections don't have a stable machine-
+readable download URL, so this is the pragmatic compromise.
+
+Household type and tenure targets are from ONS families & households
+datasets (also lacking stable machine-readable URLs).
+
+Sources:
+- UK projections: https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationprojections/datasets/z1zippedpopulationprojectionsdatafilesuk
+- NRS Scotland: https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/population/population-estimates/mid-year-population-estimates
+"""
+
+import io
+import logging
+import zipfile
+from functools import lru_cache
+from pathlib import Path
+
+import pandas as pd
+import requests
+import yaml
+
+from policyengine_uk_data.targets.schema import (
+    GeographicLevel,
+    Target,
+    Unit,
+)
+
+logger = logging.getLogger(__name__)
+
+_SOURCES_YAML = Path(__file__).parent.parent / "sources.yaml"
+_STORAGE = Path(__file__).parents[2] / "storage"
+
+_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+        "AppleWebKit/537.36"
+    ),
+}
+
+_UK_ZIP_URL = (
+    "https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/"
+    "populationandmigration/populationprojections/datasets/"
+    "z1zippedpopulationprojectionsdatafilesuk/2022based/uk.zip"
+)
+
+_REF_REGION = (
+    "https://www.ons.gov.uk/peoplepopulationandcommunity/"
+    "populationandmigration/populationprojections/datasets/"
+    "z1zippedpopulationprojectionsdatafilesuk"
+)
+_REF_NRS = (
+    "https://www.nrscotland.gov.uk/statistics-and-data/statistics/"
+    "statistics-by-theme/population/population-estimates/"
+    "mid-year-population-estimates"
+)
+
+_YEARS = list(range(2022, 2030))
+
+# Age band boundaries
+_AGE_BANDS = [
+    (0, 9),
+    (10, 19),
+    (20, 29),
+    (30, 39),
+    (40, 49),
+    (50, 59),
+    (60, 69),
+    (70, 79),
+    (80, 89),
+]
+
+_GENDER_BANDS = [
+    (0, 14),
+    (15, 29),
+    (30, 44),
+    (45, 59),
+    (60, 74),
+    (75, 90),
+]
+
+
+@lru_cache(maxsize=1)
+def _download_uk_projection() -> pd.DataFrame:
+    """Download and parse the UK principal population projection."""
+    r = requests.get(
+        _UK_ZIP_URL, headers=_HEADERS, allow_redirects=True, timeout=120
+    )
+    r.raise_for_status()
+    z = zipfile.ZipFile(io.BytesIO(r.content))
+    with z.open("uk/uk_ppp_machine_readable.xlsx") as f:
+        df = pd.read_excel(
+            io.BytesIO(f.read()),
+            sheet_name="Population",
+            engine="openpyxl",
+        )
+    return df
+
+
+def _aggregate_ages(
+    df: pd.DataFrame, sex: str, low: int, high: int, years: list[int]
+) -> dict[int, float]:
+    """Sum population for a sex and age range across years."""
+    sex_filter = "Females" if sex == "female" else "Males"
+    mask = (df["Sex"] == sex_filter) & (
+        df["Age"].apply(lambda a: isinstance(a, int) and low <= a <= high)
+    )
+    subset = df[mask]
+    result = {}
+    for y in years:
+        if y in subset.columns:
+            result[y] = float(subset[y].sum())
+    return result
+
+
+def _parse_uk_totals(df: pd.DataFrame) -> list[Target]:
+    """Extract UK total population and gender × age bands."""
+    targets = []
+
+    # UK total
+    uk_pop = {}
+    for y in _YEARS:
+        if y in df.columns:
+            uk_pop[y] = float(df[y].sum())
+    if uk_pop:
+        targets.append(
+            Target(
+                name="ons/uk_population",
+                variable="age",
+                source="ons",
+                unit=Unit.COUNT,
+                values=uk_pop,
+                is_count=True,
+                reference_url=_REF_REGION,
+            )
+        )
+
+    # Gender × age bands
+    for sex in ["female", "male"]:
+        for low, high in _GENDER_BANDS:
+            values = _aggregate_ages(df, sex, low, high, _YEARS)
+            if values:
+                targets.append(
+                    Target(
+                        name=f"ons/{sex}_{low}_{high}",
+                        variable="age",
+                        source="ons",
+                        unit=Unit.COUNT,
+                        values=values,
+                        is_count=True,
+                        reference_url=_REF_REGION,
+                    )
+                )
+
+    return targets
+
+
+def _parse_regional_from_csv() -> list[Target]:
+    """Read regional age band targets from demographics.csv.
+
+    This CSV was extracted from ONS subnational projections which
+    lack a stable machine-readable download URL.
+    """
+    csv_path = _STORAGE / "demographics.csv"
+    if not csv_path.exists():
+        logger.warning("demographics.csv not found, skipping regional")
+        return []
+
+    demographics = pd.read_csv(csv_path)
+    targets = []
+
+    # Skip rows now handled by dedicated modules (ons_households.py,
+    # ons_tenure.py) and rows handled elsewhere in this module
+    _SKIP_PREFIXES = ("tenure_", "scotland_households")
+    _SKIP_NAMES = {
+        "couple_3_plus_children_households",
+        "couple_no_children_households",
+        "couple_non_dependent_children_only_households",
+        "couple_under_3_children_households",
+        "lone_households_over_65",
+        "lone_households_under_65",
+        "lone_parent_dependent_children_households",
+        "lone_parent_non_dependent_children_households",
+        "multi_family_households",
+        "unrelated_adult_households",
+    }
+
+    for _, row in demographics.iterrows():
+        name = row["name"]
+        if name in _SKIP_NAMES or any(
+            name.startswith(p) for p in _SKIP_PREFIXES
+        ):
+            continue
+        values = {}
+        for y in _YEARS:
+            col = str(y)
+            if col in row.index and pd.notna(row[col]):
+                # Values in CSV are in thousands
+                values[y] = float(row[col]) * 1e3
+        if values:
+            targets.append(
+                Target(
+                    name=f"ons/{name}",
+                    variable="age",
+                    source="ons",
+                    unit=Unit.COUNT,
+                    geographic_level=GeographicLevel.REGION,
+                    values=values,
+                    is_count=True,
+                    reference_url=_REF_REGION,
+                )
+            )
+
+    return targets
+
+
+# Scotland-specific (from NRS/census — not in ONS projections)
+_SCOTLAND_CHILDREN_UNDER_16 = {
+    y: v * 1e3
+    for y, v in {
+        2022: 904,
+        2023: 900,
+        2024: 896,
+        2025: 892,
+        2026: 888,
+        2027: 884,
+        2028: 880,
+    }.items()
+}
+
+_SCOTLAND_BABIES_UNDER_1 = {
+    y: v * 1e3
+    for y, v in {
+        2022: 46,
+        2023: 46,
+        2024: 46,
+        2025: 46,
+        2026: 46,
+        2027: 46,
+        2028: 46,
+    }.items()
+}
+
+_SCOTLAND_HOUSEHOLDS_3PLUS_CHILDREN = {
+    y: v * 1e3
+    for y, v in {
+        2022: 82,
+        2023: 82,
+        2024: 82,
+        2025: 82,
+        2026: 82,
+        2027: 82,
+        2028: 82,
+    }.items()
+}
+
+
+# Household types and tenure are now scraped from ONS in
+# ons_households.py and ons_tenure.py respectively.
+
+
+def get_targets() -> list[Target]:
+    targets = []
+
+    # UK total + gender × age from live download
+    try:
+        df = _download_uk_projection()
+        targets.extend(_parse_uk_totals(df))
+    except Exception as e:
+        logger.error("Failed to download ONS UK projections: %s", e)
+
+    # Regional age bands from demographics.csv
+    targets.extend(_parse_regional_from_csv())
+
+    # Scotland-specific (NRS/census — small number of static values)
+    targets.append(
+        Target(
+            name="ons/scotland_children_under_16",
+            variable="age",
+            source="nrs",
+            unit=Unit.COUNT,
+            values=_SCOTLAND_CHILDREN_UNDER_16,
+            is_count=True,
+            geographic_level=GeographicLevel.COUNTRY,
+            geo_code="S",
+            geo_name="Scotland",
+            reference_url=_REF_NRS,
+        )
+    )
+    targets.append(
+        Target(
+            name="ons/scotland_babies_under_1",
+            variable="age",
+            source="nrs",
+            unit=Unit.COUNT,
+            values=_SCOTLAND_BABIES_UNDER_1,
+            is_count=True,
+            geographic_level=GeographicLevel.COUNTRY,
+            geo_code="S",
+            geo_name="Scotland",
+            reference_url=(
+                "https://www.nrscotland.gov.uk/publications/"
+                "vital-events-reference-tables-2024/"
+            ),
+        )
+    )
+    targets.append(
+        Target(
+            name="ons/scotland_households_3plus_children",
+            variable="is_child",
+            source="scotland_census",
+            unit=Unit.COUNT,
+            values=_SCOTLAND_HOUSEHOLDS_3PLUS_CHILDREN,
+            is_count=True,
+            geographic_level=GeographicLevel.COUNTRY,
+            geo_code="S",
+            geo_name="Scotland",
+            reference_url=(
+                "https://www.scotlandscensus.gov.uk/census-results/"
+                "at-a-glance/household-composition/"
+            ),
+        )
+    )
+
+    return targets
diff --git a/policyengine_uk_data/targets/sources/ons_households.py b/policyengine_uk_data/targets/sources/ons_households.py
new file mode 100644
index 00000000..4c68b714
--- /dev/null
+++ b/policyengine_uk_data/targets/sources/ons_households.py
@@ -0,0 +1,114 @@
+"""ONS families & households targets.
+
+Downloads Table 7 from the ONS Families and Households dataset to
+get household counts by type (one-person, couples, lone parents, etc).
+
+Source: https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/families/datasets/familiesandhouseholdsfamiliesandhouseholds
+"""
+
+import io
+import logging
+from functools import lru_cache
+
+import openpyxl
+import requests
+
+from policyengine_uk_data.targets.schema import Target, Unit
+
+logger = logging.getLogger(__name__)
+
+_URL = (
+    "https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/"
+    "birthsdeathsandmarriages/families/datasets/"
+    "familiesandhouseholdsfamiliesandhouseholds/"
+    "current/familiesandhouseholdsuk2024.xlsx"
+)
+_REF = (
+    "https://www.ons.gov.uk/peoplepopulationandcommunity/"
+    "birthsdeathsandmarriages/families/datasets/"
+    "familiesandhouseholdsfamiliesandhouseholds"
+)
+_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+        "AppleWebKit/537.36"
+    ),
+}
+
+# Table 7 rows: (row_number, target_name)
+# Row numbers are 1-indexed in the xlsx
+_TABLE7_ROWS = {
+    14: "lone_households_under_65",
+    15: "lone_households_over_65",
+    16: "unrelated_adult_households",
+    19: "couple_no_children_households",
+    20: "couple_under_3_children_households",
+    21: "couple_3_plus_children_households",
+    22: "couple_non_dependent_children_only_households",
+    24: "lone_parent_dependent_children_households",
+    25: "lone_parent_non_dependent_children_households",
+    26: "multi_family_households",
+}
+
+# Years we want (columns follow pattern: year_col, cv_col, ci_col,
+# repeating every 3 columns from col 2 for year 1996)
+_MIN_YEAR = 2018
+
+
+@lru_cache(maxsize=1)
+def _download_workbook() -> openpyxl.Workbook:
+    r = requests.get(
+        _URL, headers=_HEADERS, allow_redirects=True, timeout=60
+    )
+    r.raise_for_status()
+    return openpyxl.load_workbook(
+        io.BytesIO(r.content), data_only=True
+    )
+
+
+def _find_year_columns(ws) -> dict[int, int]:
+    """Map calendar year -> column index for Estimate columns in Table 7."""
+    year_cols = {}
+    for col in range(2, ws.max_column + 1):
+        header = ws.cell(row=12, column=col).value
+        if header and "Estimate" in str(header):
+            year_str = str(header).split(" ")[0]
+            try:
+                year = int(year_str)
+                if year >= _MIN_YEAR:
+                    year_cols[year] = col
+            except ValueError:
+                continue
+    return year_cols
+
+
+def get_targets() -> list[Target]:
+    targets = []
+    try:
+        wb = _download_workbook()
+        ws = wb["7"]
+        year_cols = _find_year_columns(ws)
+
+        for row_num, name in _TABLE7_ROWS.items():
+            values = {}
+            for year, col in year_cols.items():
+                val = ws.cell(row=row_num, column=col).value
+                if val is not None and isinstance(val, (int, float)):
+                    values[year] = float(val) * 1e3  # thousands → count
+            if values:
+                targets.append(
+                    Target(
+                        name=f"ons/{name}",
+                        variable="family_type",
+                        source="ons",
+                        unit=Unit.COUNT,
+                        values=values,
+                        is_count=True,
+                        reference_url=_REF,
+                    )
+                )
+
+    except Exception as e:
+        logger.error("Failed to download ONS households xlsx: %s", e)
+
+    return targets
diff --git a/policyengine_uk_data/targets/sources/ons_savings.py b/policyengine_uk_data/targets/sources/ons_savings.py
new file mode 100644
index 00000000..5f49d8c5
--- /dev/null
+++ b/policyengine_uk_data/targets/sources/ons_savings.py
@@ -0,0 +1,72 @@
+"""ONS savings interest income targets.
+
+Downloads the HAXV timeseries from the ONS National Accounts:
+D.41g — Households (S.14): Interest resources.
+
+SPI significantly underestimates savings income because it only
+captures taxable interest, not tax-free ISAs/NS&I.
+
+Source: https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/haxv/ukea
+"""
+
+import logging
+
+import requests
+
+from policyengine_uk_data.targets.schema import Target, Unit
+
+logger = logging.getLogger(__name__)
+
+_API_URL = (
+    "https://www.ons.gov.uk/economy/grossdomesticproductgdp/"
+    "timeseries/haxv/ukea/data"
+)
+_REF = (
+    "https://www.ons.gov.uk/economy/grossdomesticproductgdp/"
+    "timeseries/haxv/ukea"
+)
+_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+        "AppleWebKit/537.36"
+    ),
+}
+
+
+def get_targets() -> list[Target]:
+    try:
+        r = requests.get(
+            _API_URL, headers=_HEADERS, allow_redirects=True, timeout=30
+        )
+        r.raise_for_status()
+        data = r.json()
+
+        values = {}
+        for item in data.get("years", []):
+            year = int(item["year"])
+            if 2018 <= year <= 2029:
+                values[year] = float(item["value"]) * 1e6
+
+        # Hold flat from last actual year for projections
+        if values:
+            last_year = max(values.keys())
+            last_val = values[last_year]
+            for y in range(last_year + 1, 2030):
+                values[y] = last_val
+
+        if values:
+            return [
+                Target(
+                    name="ons/savings_interest_income",
+                    variable="savings_interest_income",
+                    source="ons",
+                    unit=Unit.GBP,
+                    values=values,
+                    reference_url=_REF,
+                )
+            ]
+
+    except Exception as e:
+        logger.error("Failed to download ONS savings timeseries: %s", e)
+
+    return []
diff --git a/policyengine_uk_data/targets/sources/ons_tenure.py b/policyengine_uk_data/targets/sources/ons_tenure.py
new file mode 100644
index 00000000..841e3f4f
--- /dev/null
+++ b/policyengine_uk_data/targets/sources/ons_tenure.py
@@ -0,0 +1,119 @@
+"""ONS subnational dwelling stock by tenure targets.
+
+Downloads the ONS SPREE tenure estimates to get England-level tenure
+breakdowns (owned outright, owned with mortgage, private rent, social
+rent) summed across all local authorities.
+
+Source: https://www.ons.gov.uk/peoplepopulationandcommunity/housing/datasets/subnationaldwellingstockbytenureestimates
+"""
+
+import io
+import logging
+from functools import lru_cache
+
+import openpyxl
+import requests
+
+from policyengine_uk_data.targets.schema import (
+    GeographicLevel,
+    Target,
+    Unit,
+)
+
+logger = logging.getLogger(__name__)
+
+_URL = (
+    "https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/"
+    "housing/datasets/subnationaldwellingstockbytenureestimates/"
+    "current/subnationaldwellingsbytenure2024.xlsx"
+)
+_REF = (
+    "https://www.ons.gov.uk/peoplepopulationandcommunity/"
+    "housing/datasets/subnationaldwellingstockbytenureestimates"
+)
+_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+        "AppleWebKit/537.36"
+    ),
+}
+
+# Tenure categories in the xlsx header → target name suffix
+_TENURE_COLS = {
+    "Owned Outright": "tenure_england_owned_outright",
+    "Owned with Mortgage or Loan": "tenure_england_owned_with_mortgage",
+    "Private Rent": "tenure_england_rented_privately",
+    "Social Rent": "tenure_england_social_rent",
+    "Total Dwellings": "tenure_england_total",
+}
+
+
+@lru_cache(maxsize=1)
+def _download_workbook() -> openpyxl.Workbook:
+    r = requests.get(
+        _URL, headers=_HEADERS, allow_redirects=True, timeout=60
+    )
+    r.raise_for_status()
+    return openpyxl.load_workbook(
+        io.BytesIO(r.content), data_only=True
+    )
+
+
+def _parse_header_columns(ws) -> dict[tuple[int, str], int]:
+    """Map (year, tenure_category) → column index from row 4 headers."""
+    mapping = {}
+    for col in range(5, ws.max_column + 1):
+        header = ws.cell(row=4, column=col).value
+        if not header:
+            continue
+        header = str(header)
+        for tenure_suffix in _TENURE_COLS:
+            if header.endswith(tenure_suffix):
+                year = int(header.split(" ")[0])
+                mapping[(year, tenure_suffix)] = col
+                break
+    return mapping
+
+
+def get_targets() -> list[Target]:
+    targets = []
+    try:
+        wb = _download_workbook()
+        ws = wb["1a"]
+        col_map = _parse_header_columns(ws)
+
+        # Sum across all local authorities for each (year, tenure)
+        totals: dict[tuple[int, str], float] = {}
+        for row in range(5, ws.max_row + 1):
+            for (year, tenure), col in col_map.items():
+                val = ws.cell(row=row, column=col).value
+                if val is not None and isinstance(val, (int, float)):
+                    key = (year, tenure)
+                    totals[key] = totals.get(key, 0) + float(val)
+
+        # Build targets
+        for tenure_col, target_name in _TENURE_COLS.items():
+            values = {}
+            for (year, tenure), total in totals.items():
+                if tenure == tenure_col:
+                    values[year] = total
+            if values:
+                targets.append(
+                    Target(
+                        name=f"ons/{target_name}",
+                        variable="tenure_type",
+                        source="ons",
+                        unit=Unit.COUNT,
+                        geographic_level=GeographicLevel.COUNTRY,
+                        geo_code="E",
+                        geo_name="England",
+                        values=values,
+                        is_count=True,
+                        reference_url=_REF,
+                    )
+                )
+
+    except Exception as e:
+        logger.error("Failed to download ONS tenure data: %s", e)
+
+    return targets
diff --git a/policyengine_uk_data/targets/sources/scottish_government.py b/policyengine_uk_data/targets/sources/scottish_government.py
new file mode 100644
index 00000000..93423a04
--- /dev/null
+++ b/policyengine_uk_data/targets/sources/scottish_government.py
@@ -0,0 +1,37 @@
+"""Scottish Government targets.
+
+Scottish Child Payment spend from Scottish Budget.
+Source: https://www.gov.scot/publications/scottish-budget-2026-2027/pages/6/
+"""
+
+from policyengine_uk_data.targets.schema import (
+    GeographicLevel,
+    Target,
+    Unit,
+)
+
+
+def get_targets() -> list[Target]:
+    # Scottish Child Payment from Scottish Budget 2026-27 Table 5.08
+    scp_spend = {
+        2024: 455.8e6,
+        2025: 471.0e6,
+        2026: 484.8e6,
+    }
+    # Extrapolate other years at 3% growth
+    for y in range(2027, 2030):
+        scp_spend[y] = 471.0e6 * (1.03 ** (y - 2025))
+
+    return [
+        Target(
+            name="sss/scottish_child_payment",
+            variable="scottish_child_payment",
+            source="scottish_government",
+            unit=Unit.GBP,
+            geographic_level=GeographicLevel.COUNTRY,
+            geo_code="S",
+            geo_name="Scotland",
+            values=scp_spend,
+            reference_url="https://www.gov.scot/publications/scottish-budget-2026-2027/pages/6/",
+        )
+    ]
diff --git a/policyengine_uk_data/targets/sources/voa_council_tax.py b/policyengine_uk_data/targets/sources/voa_council_tax.py
new file mode 100644
index 00000000..19c3d92a
--- /dev/null
+++ b/policyengine_uk_data/targets/sources/voa_council_tax.py
@@ -0,0 +1,63 @@
+"""VOA council tax band targets.
+
+Council tax band counts (A-H + total) by region from VOA stock of
+properties data.
+
+Source: https://www.gov.uk/government/statistics/council-tax-stock-of-properties-2024
+Scotland: https://www.gov.scot/publications/council-tax-datasets/
+"""
+
+import pandas as pd
+from pathlib import Path
+
+from policyengine_uk_data.targets.schema import (
+    GeographicLevel,
+    Target,
+    Unit,
+)
+
+_STORAGE = Path(__file__).parents[2] / "storage"
+_REF = "https://www.gov.uk/government/statistics/council-tax-stock-of-properties-2024"
+
+
+def get_targets() -> list[Target]:
+    """Build council tax band targets from the CSV."""
+    csv_path = _STORAGE / "council_tax_bands_2024.csv"
+    if not csv_path.exists():
+        return []
+
+    ct_data = pd.read_csv(csv_path)
+    targets = []
+
+    for _, row in ct_data.iterrows():
+        region = row["Region"]
+        for band in ["A", "B", "C", "D", "E", "F", "G", "H"]:
+            targets.append(
+                Target(
+                    name=f"voa/council_tax/{region}/{band}",
+                    variable="council_tax_band",
+                    source="voa",
+                    unit=Unit.COUNT,
+                    geographic_level=GeographicLevel.REGION,
+                    geo_name=region,
+                    values={2024: float(row[band])},
+                    is_count=True,
+                    reference_url=_REF,
+                )
+            )
+        # Total row
+        targets.append(
+            Target(
+                name=f"voa/council_tax/{region}/total",
+                variable="council_tax_band",
+                source="voa",
+                unit=Unit.COUNT,
+                geographic_level=GeographicLevel.REGION,
+                geo_name=region,
+                values={2024: float(row["Total"])},
+                is_count=True,
+                reference_url=_REF,
+            )
+        )
+
+    return targets
diff --git a/policyengine_uk_data/tests/test_target_registry.py b/policyengine_uk_data/tests/test_target_registry.py
new file mode 100644
index 00000000..c6f78bdd
--- /dev/null
+++ b/policyengine_uk_data/tests/test_target_registry.py
@@ -0,0 +1,103 @@
+"""Tests for the targets registry.
+
+Verifies that:
+1. All source modules load without error
+2. No duplicate target names
+3. Core targets exist for key years
+4. Target values match the current system's hardcoded values
+"""
+
+import pytest
+from policyengine_uk_data.targets import get_all_targets, Target
+
+
+def test_registry_loads():
+    """All source modules should load and return targets."""
+    targets = get_all_targets()
+    assert len(targets) > 0, "Registry returned no targets"
+
+
+def test_no_duplicate_names():
+    """Target names should be unique across all sources."""
+    targets = get_all_targets()
+    names = [t.name for t in targets]
+    duplicates = [n for n in names if names.count(n) > 1]
+    assert len(duplicates) == 0, f"Duplicate target names: {set(duplicates)}"
+
+
+def test_obr_income_tax_exists():
+    """OBR income tax target should exist for 2025."""
+    targets = get_all_targets(year=2025)
+    names = {t.name for t in targets}
+    assert "obr/income_tax" in names
+
+
+def test_obr_income_tax_value():
+    """OBR income tax for 2025 should be ~£329bn (Table 3.4 accrued basis)."""
+    targets = get_all_targets(year=2025)
+    it = next(t for t in targets if t.name == "obr/income_tax")
+    # Table 3.4 D6 = 328.96bn for FY 2025-26 → calendar 2025
+    assert abs(it.values[2025] - 329e9) < 1e9
+
+
+def test_ons_uk_population_exists():
+    """UK population target should exist."""
+    targets = get_all_targets(year=2025)
+    names = {t.name for t in targets}
+    assert "ons/uk_population" in names
+
+
+def test_hmrc_spi_targets_exist():
+    """HMRC SPI income band targets should exist."""
+    targets = get_all_targets(year=2025)
+    spi_targets = [t for t in targets if t.source == "hmrc_spi"]
+    # 13 bands × 6 income types × 2 (count + amount) = 156 per year
+    assert len(spi_targets) >= 100, (
+        f"Expected 100+ SPI targets, got {len(spi_targets)}"
+    )
+
+
+def test_dwp_pip_targets():
+    """DWP PIP targets should exist."""
+    targets = get_all_targets(year=2025)
+    names = {t.name for t in targets}
+    assert "dwp/pip_dl_standard_claimants" in names
+    assert "dwp/pip_dl_enhanced_claimants" in names
+
+
+def test_voa_council_tax_targets():
+    """VOA council tax band targets should exist."""
+    targets = get_all_targets(year=2024)
+    voa = [t for t in targets if t.source == "voa"]
+    # 11 regions × 9 (8 bands + total) = 99
+    assert len(voa) >= 90, f"Expected 90+ VOA targets, got {len(voa)}"
+
+
+def test_core_target_count():
+    """Total target count should be substantial."""
+    targets = get_all_targets(year=2025)
+    assert len(targets) >= 200, (
+        f"Expected 200+ targets for 2025, got {len(targets)}"
+    )
+
+
+def test_two_child_limit_targets():
+    """Two-child limit targets should exist."""
+    targets = get_all_targets(year=2026)
+    names = {t.name for t in targets}
+    assert "dwp/uc/two_child_limit/households_affected" in names
+    assert "dwp/uc/two_child_limit/children_affected" in names
+
+
+def test_scottish_child_payment():
+    """Scottish child payment should exist."""
+    targets = get_all_targets(year=2025)
+    names = {t.name for t in targets}
+    assert "sss/scottish_child_payment" in names
+
+
+def test_savings_interest():
+    """ONS savings interest target should exist."""
+    targets = get_all_targets(year=2025)
+    names = {t.name for t in targets}
+    assert "ons/savings_interest_income" in names
diff --git a/policyengine_uk_data/tests/test_vehicle_ownership.py b/policyengine_uk_data/tests/test_vehicle_ownership.py
index 1bf2a5fa..32d6f88f 100644
--- a/policyengine_uk_data/tests/test_vehicle_ownership.py
+++ b/policyengine_uk_data/tests/test_vehicle_ownership.py
@@ -1,4 +1,4 @@
-from policyengine_uk_data.utils.loss import (
+from policyengine_uk_data.targets.sources.nts_vehicles import (
     NTS_NO_VEHICLE_RATE,
     NTS_ONE_VEHICLE_RATE,
     NTS_TWO_PLUS_VEHICLE_RATE,
diff --git a/policyengine_uk_data/utils/loss.py b/policyengine_uk_data/utils/loss.py
index 1987b586..3c240ff6 100644
--- a/policyengine_uk_data/utils/loss.py
+++ b/policyengine_uk_data/utils/loss.py
@@ -1,699 +1,27 @@
-"""
-Loss functions and target matrices for dataset calibration.
+"""Loss functions and target matrices for dataset calibration.
 
-This module creates target matrices comparing PolicyEngine UK model outputs
-against official statistics from OBR, ONS, HMRC, DWP and other sources.
-Used for calibrating household weights to match aggregate targets.
+Delegates to the targets registry and build_loss_matrix module
+for all target definitions and simulation column construction.
 """
 
 import numpy as np
 import pandas as pd
-from policyengine_uk_data.storage import STORAGE_FOLDER
-from policyengine_uk_data.utils import uprate_values
-from policyengine_uk.data import UKSingleYearDataset
-from policyengine_uk_data.utils.uc_data import uc_national_payment_dist
-
-tax_benefit = pd.read_csv(STORAGE_FOLDER / "tax_benefit.csv")
-tax_benefit["name"] = tax_benefit["name"].apply(lambda x: f"obr/{x}")
-demographics = pd.read_csv(STORAGE_FOLDER / "demographics.csv")
-demographics["name"] = demographics["name"].apply(lambda x: f"ons/{x}")
-statistics = pd.concat([tax_benefit, demographics])
-dfs = []
-
-MIN_YEAR = 2018
-MAX_YEAR = 2029
-
-# NTS 2024 vehicle ownership targets
-# https://www.gov.uk/government/statistics/national-travel-survey-2024
-NTS_NO_VEHICLE_RATE = 0.22
-NTS_ONE_VEHICLE_RATE = 0.44
-NTS_TWO_PLUS_VEHICLE_RATE = 0.34
-
-for time_period in range(MIN_YEAR, MAX_YEAR + 1):
-    time_period_df = statistics[
-        ["name", "unit", "reference", str(time_period)]
-    ].rename(columns={str(time_period): "value"})
-    time_period_df["time_period"] = time_period
-    dfs.append(time_period_df)
-
-statistics = pd.concat(dfs)
-statistics = statistics[statistics.value.notnull()]
-
-
-def create_target_matrix(
-    dataset: UKSingleYearDataset,
-    time_period: str = None,
-    reform=None,
-) -> np.ndarray:
-    """
-    Create target matrix for calibration against official statistics.
-
-    Creates a matrix A such that for household weights w, target vector b
-    and a perfectly calibrated PolicyEngine UK: A * w = b
-
-    Compares model outputs against:
-    - OBR tax and benefit aggregates
-    - ONS demographic and regional statistics
-    - HMRC income distribution data
-    - DWP benefit caseload data
-    - VOA council tax statistics
-
-    Args:
-        dataset: PolicyEngine UK dataset to analyse.
-        time_period: Year for target statistics (uses dataset default if None).
-        reform: Policy reform to apply during analysis.
-
-    Returns:
-        Tuple of (target_matrix, target_values) for calibration.
-    """
-
-    # First- tax-benefit outcomes from the DWP and OBR.
-
-    from policyengine_uk import Microsimulation
-
-    if time_period is None:
-        time_period = dataset.time_period
-
-    sim = Microsimulation(dataset=dataset, reform=reform)
-    sim.default_calculation_period = time_period
-
-    family = sim.populations["benunit"]
-
-    pe = lambda variable: sim.calculate(variable, map_to="household").values
-
-    household_from_family = lambda values: sim.map_result(
-        values, "benunit", "household"
-    )
-    household_from_person = lambda values: sim.map_result(
-        values, "person", "household"
-    )
-
-    def pe_count(*variables):
-        total = 0
-        for variable in variables:
-            entity = sim.tax_benefit_system.variables[variable].entity.key
-            total += sim.map_result(
-                sim.calculate(variable) > 0,
-                entity,
-                "household",
-            )
-
-        return total
-
-    df = pd.DataFrame()
-
-    df["obr/attendance_allowance"] = pe("attendance_allowance")
-    df["obr/carers_allowance"] = pe("carers_allowance")
-    df["obr/dla"] = pe("dla")
-    df["obr/esa"] = pe("esa_income") + pe("esa_contrib")
-    df["obr/esa_contrib"] = pe("esa_contrib")
-    df["obr/esa_income"] = pe("esa_income")
-    df["obr/housing_benefit"] = pe("housing_benefit")
-    df["obr/pip"] = pe("pip")
-    df["obr/statutory_maternity_pay"] = pe("statutory_maternity_pay")
-    df["obr/attendance_allowance_count"] = pe_count("attendance_allowance")
-    df["obr/carers_allowance_count"] = pe_count("carers_allowance")
-    df["obr/dla_count"] = pe_count("dla")
-    df["obr/esa_count"] = pe_count("esa_income", "esa_contrib")
-    df["obr/housing_benefit_count"] = pe_count("housing_benefit")
-    df["obr/pension_credit_count"] = pe_count("pension_credit")
-    df["obr/pip_count"] = pe_count("pip")
-
-    on_uc = sim.calculate("universal_credit") > 0
-    unemployed = family.any(sim.calculate("employment_status") == "UNEMPLOYED")
-
-    df["obr/universal_credit_jobseekers_count"] = household_from_family(
-        on_uc * unemployed
-    )
-    df["obr/universal_credit_non_jobseekers_count"] = household_from_family(
-        on_uc * ~unemployed
-    )
-
-    # df["obr/winter_fuel_allowance_count"] = pe_count("winter_fuel_allowance")
-    df["obr/capital_gains_tax"] = pe("capital_gains_tax")
-    df["obr/child_benefit"] = pe("child_benefit")
-
-    country = sim.calculate("country")
-    ct = pe("council_tax")
-    df["obr/council_tax"] = ct
-    df["obr/council_tax_england"] = ct * (country == "ENGLAND")
-    df["obr/council_tax_scotland"] = ct * (country == "SCOTLAND")
-    df["obr/council_tax_wales"] = ct * (country == "WALES")
-
-    df["obr/domestic_rates"] = pe("domestic_rates")
-    df["obr/fuel_duties"] = pe("fuel_duty")
-    df["obr/income_tax"] = pe("income_tax")
-    df["obr/jobseekers_allowance"] = pe("jsa_income") + pe("jsa_contrib")
-    df["obr/pension_credit"] = pe("pension_credit")
-    df["obr/state_pension"] = pe("state_pension")
-    # df["obr/tax_credits"] = pe("tax_credits")
-    df["obr/tv_licence_fee"] = pe("tv_licence")
-
-    uc = sim.calculate("universal_credit")
-    df["obr/universal_credit"] = household_from_family(uc)
-    df["obr/universal_credit_jobseekers"] = household_from_family(
-        uc * unemployed
-    )
-    df["obr/universal_credit_non_jobseekers"] = household_from_family(
-        uc * ~unemployed
-    )
-
-    df["obr/vat"] = pe("vat")
-    # df["obr/winter_fuel_allowance"] = pe("winter_fuel_allowance")
-
-    # Not strictly from the OBR but from the 2024 Independent Schools Council census. OBR will be using that.
-    df["obr/private_school_students"] = pe("attends_private_school")
-
-    # Salary sacrifice NI relief - SPP estimates £4.1bn total (£1.2bn employee + £2.9bn employer)
-    # Calculate relief via counterfactual: what additional NI would be paid if SS became income
-    ss_contributions = sim.calculate(
-        "pension_contributions_via_salary_sacrifice"
-    )
-    employment_income = sim.calculate("employment_income")
-
-    # Run counterfactual simulation with SS converted to employment income
-    counterfactual_sim = Microsimulation(dataset=dataset, reform=reform)
-    counterfactual_sim.set_input(
-        "pension_contributions_via_salary_sacrifice",
-        time_period,
-        np.zeros_like(ss_contributions),
-    )
-    counterfactual_sim.set_input(
-        "employment_income",
-        time_period,
-        employment_income + ss_contributions,
-    )
-
-    # NI relief = counterfactual NI - baseline NI
-    ni_employee_baseline = sim.calculate("ni_employee")
-    ni_employer_baseline = sim.calculate("ni_employer")
-    ni_employee_cf = counterfactual_sim.calculate("ni_employee", time_period)
-    ni_employer_cf = counterfactual_sim.calculate("ni_employer", time_period)
-
-    employee_ni_relief = ni_employee_cf - ni_employee_baseline
-    employer_ni_relief = ni_employer_cf - ni_employer_baseline
-
-    df["obr/salary_sacrifice_employee_ni_relief"] = household_from_person(
-        employee_ni_relief
-    )
-    df["obr/salary_sacrifice_employer_ni_relief"] = household_from_person(
-        employer_ni_relief
-    )
-
-    # Population statistics from the ONS.
-
-    region = sim.calculate("region", map_to="person")
-    region_to_target_name_map = {
-        "NORTH_EAST": "north_east",
-        "SOUTH_EAST": "south_east",
-        "EAST_MIDLANDS": "east_midlands",
-        "WEST_MIDLANDS": "west_midlands",
-        "YORKSHIRE": "yorkshire_and_the_humber",
-        "EAST_OF_ENGLAND": "east",
-        "LONDON": "london",
-        "SOUTH_WEST": "south_west",
-        "NORTH_WEST": "north_west",
-        "WALES": "wales",
-        "SCOTLAND": "scotland",
-        "NORTHERN_IRELAND": "northern_ireland",
-    }
-    age = sim.calculate("age")
-
-    # Ensure local populations are consistent with national population
-    local_population_total = 0
-    for pe_region_name, region_name in region_to_target_name_map.items():
-        for lower_age in range(0, 90, 10):
-            upper_age = lower_age + 10
-            name = f"ons/{region_name}_age_{lower_age}_{upper_age - 1}"
-            local_population_total += (
-                demographics[demographics.name == name][
-                    str(time_period)
-                ].values[0]
-                * 1e3
-            )
-
-    population_scaling_factor = (
-        demographics[demographics.name == "ons/uk_population"][
-            str(time_period)
-        ].values[0]
-        * 1e6
-        / local_population_total
-    ) * 0.9
-
-    for pe_region_name, region_name in region_to_target_name_map.items():
-        for lower_age in range(0, 90, 10):
-            upper_age = lower_age + 10
-            name = f"ons/{region_name}_age_{lower_age}_{upper_age - 1}"
-            statistics.loc[
-                (statistics.name == name)
-                & (statistics.time_period == int(time_period)),
-                "value",
-            ] *= population_scaling_factor
-
-    for pe_region_name, region_name in region_to_target_name_map.items():
-        for lower_age in range(0, 90, 10):
-            upper_age = lower_age + 10
-            name = f"ons/{region_name}_age_{lower_age}_{upper_age - 1}"
-            person_in_criteria = (
-                (region == pe_region_name)
-                & (age >= lower_age)
-                & (age < upper_age)
-            )
-            df[name] = household_from_person(person_in_criteria)
-
-    df["ons/uk_population"] = household_from_person(age >= 0)
-
-    # Scotland-specific calibration targets
-    # Children under 16 in Scotland
-    # Source: NRS mid-year population estimates
-    # https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/population/population-estimates/mid-year-population-estimates
-    scotland_children_under_16 = (region.values == "SCOTLAND") & (age < 16)
-    df["ons/scotland_children_under_16"] = household_from_person(
-        scotland_children_under_16
-    )
-
-    # Babies under 1 in Scotland
-    # Source: NRS Vital Events - births registered in Scotland
-    # https://www.nrscotland.gov.uk/publications/vital-events-reference-tables-2024/
-    # ~46,000 births per year (45,763 in 2024)
-    scotland_babies_under_1 = (region.values == "SCOTLAND") & (age < 1)
-    df["ons/scotland_babies_under_1"] = household_from_person(
-        scotland_babies_under_1
-    )
-
-    # Households with 3+ children in Scotland
-    # Source: Scotland Census 2022 - Household composition
-    # https://www.scotlandscensus.gov.uk/census-results/at-a-glance/household-composition/
-    # Count children per household, filter to Scotland households with 3+
-    is_child = sim.calculate("is_child").values
-    children_per_household = household_from_person(is_child)
-    household_region = sim.calculate("region", map_to="household").values
-    scotland_3plus_children = (household_region == "SCOTLAND") & (
-        children_per_household >= 3
-    )
-    df["ons/scotland_households_3plus_children"] = (
-        scotland_3plus_children.astype(float)
-    )
-
-    targets = (
-        statistics[statistics.time_period == int(time_period)]
-        .set_index("name")
-        .loc[df.columns]
-    )
-
-    targets.value = np.select(
-        [
-            targets.unit == "gbp-bn",
-            targets.unit == "person-m",
-            targets.unit == "person-k",
-            targets.unit == "benefit-unit-m",
-            targets.unit == "household-k",
-        ],
-        [
-            targets.value * 1e9,
-            targets.value * 1e6,
-            targets.value * 1e3,
-            targets.value * 1e6,
-            targets.value * 1e3,
-        ],
-    )
-
-    # Finally, incomes from HMRC
-
-    target_names = []
-    target_values = []
 
-    # Note: savings_interest_income is excluded because SPI significantly
-    # underestimates it. Savings income is calibrated from ONS National
-    # Accounts D.41g household interest data separately below.
-    INCOME_VARIABLES = [
-        "employment_income",
-        "self_employment_income",
-        "state_pension",
-        "private_pension_income",
-        "property_income",
-        "dividend_income",
-    ]
-
-    income_df = sim.calculate_dataframe(["total_income"] + INCOME_VARIABLES)
-
-    incomes = pd.read_csv(STORAGE_FOLDER / "incomes_projection.csv")
-    incomes = incomes[incomes.year.astype(str) == str(time_period)]
-    for i, row in incomes.iterrows():
-        lower = row.total_income_lower_bound
-        upper = row.total_income_upper_bound
-        in_income_band = (income_df.total_income >= lower) & (
-            income_df.total_income < upper
-        )
-        for variable in INCOME_VARIABLES:
-            name_amount = (
-                "hmrc/"
-                + variable
-                + f"_income_band_{i}_{lower:_.0f}_to_{upper:_.0f}"
-            )
-            df[name_amount] = household_from_person(
-                income_df[variable] * in_income_band
-            )
-            target_values.append(row[variable + "_amount"])
-            target_names.append(name_amount)
-            name_count = (
-                "hmrc/"
-                + variable
-                + f"_count_income_band_{i}_{lower:_.0f}_to_{upper:_.0f}"
-            )
-            df[name_count] = household_from_person(
-                (income_df[variable] > 0) * in_income_band
-            )
-            target_values.append(row[variable + "_count"])
-            target_names.append(name_count)
-
-    # Savings interest income from ONS National Accounts D.41
-    # Source: ONS HAXV - Households (S.14): Interest (D.41) Resources
-    # https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/haxv/ukea
-    # SPI significantly underestimates savings income (~£3bn vs £43-98bn actual)
-    # because it only captures taxable interest, not tax-free ISAs/NS&I
-    ONS_SAVINGS_INCOME = {
-        2020: 16.0e9,
-        2021: 19.6e9,
-        2022: 43.3e9,
-        2023: 86.0e9,
-        2024: 98.2e9,
-        2025: 98.2e9,  # Projected (held flat)
-        2026: 98.2e9,
-        2027: 98.2e9,
-        2028: 98.2e9,
-        2029: 98.2e9,
-    }
-    savings_income = sim.calculate("savings_interest_income")
-    df["ons/savings_interest_income"] = household_from_person(savings_income)
-    target_names.append("ons/savings_interest_income")
-    target_values.append(ONS_SAVINGS_INCOME.get(int(time_period), 55.0e9))
-
-    # HMRC Table 6.2 - Salary sacrifice income tax relief by tax rate
-    # This helps calibrate the distribution of SS users by income level
-    # 2023-24 values (£m): Basic £1,600, Higher £4,400, Additional £1,200
-    # Total IT relief from SS: £7,200m
-    # Use true counterfactual: IT relief = counterfactual IT - baseline IT
-    income_tax_baseline = sim.calculate("income_tax")
-    income_tax_cf = counterfactual_sim.calculate("income_tax", time_period)
-    it_relief = income_tax_cf - income_tax_baseline
-
-    # Get tax band from counterfactual adjusted net income (where SS is wages)
-    adjusted_net_income_cf = counterfactual_sim.calculate(
-        "adjusted_net_income", time_period
-    )
-    basic_rate_threshold = (
-        sim.tax_benefit_system.parameters.gov.hmrc.income_tax.rates.uk[
-            0
-        ].threshold(time_period)
-    )
-    higher_rate_threshold = (
-        sim.tax_benefit_system.parameters.gov.hmrc.income_tax.rates.uk[
-            1
-        ].threshold(time_period)
-    )
-    additional_rate_threshold = (
-        sim.tax_benefit_system.parameters.gov.hmrc.income_tax.rates.uk[
-            2
-        ].threshold(time_period)
-    )
-
-    # Determine tax band for each person based on counterfactual income
-    is_basic_rate = (adjusted_net_income_cf > basic_rate_threshold) & (
-        adjusted_net_income_cf <= higher_rate_threshold
-    )
-    is_higher_rate = (adjusted_net_income_cf > higher_rate_threshold) & (
-        adjusted_net_income_cf <= additional_rate_threshold
-    )
-    is_additional_rate = adjusted_net_income_cf > additional_rate_threshold
-
-    # Allocate the true IT relief to tax bands
-    ss_it_relief_basic = it_relief * is_basic_rate
-    ss_it_relief_higher = it_relief * is_higher_rate
-    ss_it_relief_additional = it_relief * is_additional_rate
-
-    df["hmrc/salary_sacrifice_it_relief_basic"] = household_from_person(
-        ss_it_relief_basic
-    )
-    df["hmrc/salary_sacrifice_it_relief_higher"] = household_from_person(
-        ss_it_relief_higher
-    )
-    df["hmrc/salary_sacrifice_it_relief_additional"] = household_from_person(
-        ss_it_relief_additional
-    )
-
-    # Total gross salary sacrifice contributions
-    # This is derived from the IT relief: £7.2bn IT relief at ~30% avg rate
-    # implies ~£24bn gross contributions (but we target the relief directly)
-    df["hmrc/salary_sacrifice_contributions"] = household_from_person(
-        ss_contributions
-    )
-
-    # HMRC Table 6.2 - Salary sacrifice income tax relief by tax rate (2023-24)
-    # https://assets.publishing.service.gov.uk/media/687a294e312ee8a5f0806b6d/Tables_6_1_and_6_2.csv
-    # Values in £bn
-    SS_IT_RELIEF_BASIC_2024 = 1.6e9
-    SS_IT_RELIEF_HIGHER_2024 = 4.4e9
-    SS_IT_RELIEF_ADDITIONAL_2024 = 1.2e9
-    SS_CONTRIBUTIONS_2024 = 24e9  # £7.2bn IT relief / 0.30 avg rate
-
-    # Uprate by ~3% per year for wage growth
-    years_from_2024 = max(0, int(time_period) - 2024)
-    uprating_factor = 1.03**years_from_2024
-
-    target_names.append("hmrc/salary_sacrifice_it_relief_basic")
-    target_values.append(SS_IT_RELIEF_BASIC_2024 * uprating_factor)
-
-    target_names.append("hmrc/salary_sacrifice_it_relief_higher")
-    target_values.append(SS_IT_RELIEF_HIGHER_2024 * uprating_factor)
-
-    target_names.append("hmrc/salary_sacrifice_it_relief_additional")
-    target_values.append(SS_IT_RELIEF_ADDITIONAL_2024 * uprating_factor)
-
-    target_names.append("hmrc/salary_sacrifice_contributions")
-    target_values.append(SS_CONTRIBUTIONS_2024 * uprating_factor)
-
-    # Add two-child limit targets.
-    child_is_affected = (
-        sim.map_result(
-            sim.calculate("uc_is_child_limit_affected", map_to="household"),
-            "household",
-            "person",
-        )
-        > 0
-    ) * sim.calculate("is_child", map_to="person").values
-    child_in_uc_household = (
-        sim.calculate("universal_credit", map_to="person").values > 0
-    )
-    children_in_capped_households = sim.map_result(
-        child_is_affected * child_in_uc_household, "person", "household"
-    )
-    capped_households = (children_in_capped_households > 0) * 1.0
-    df["dwp/uc_two_child_limit_affected_child_count"] = (
-        children_in_capped_households
-    )
-    target_names.append("dwp/uc_two_child_limit_affected_child_count")
-    UPRATING_24_25 = 1.12  # https://ifs.org.uk/articles/two-child-limit-poverty-incentives-and-cost, table at the end
-    target_values.append(1.6e6 * UPRATING_24_25)  # DWP statistics for 2024/25
-    # https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-two-children-april-2024
-    df["dwp/uc_two_child_limit_affected_household_count"] = capped_households
-    target_names.append("dwp/uc_two_child_limit_affected_household_count")
-    target_values.append(440e3 * UPRATING_24_25)  # DWP statistics for 2024/25
-
-    # PIP daily living standard and enhanced claimant counts
-    # https://www.disabilityrightsuk.org/news/90-pip-standard-daily-living-component-recipients-would-fail-new-green-paper-test?srsltid=AfmBOoqSq3cQwtZnQBe-qLN7PT1mUBVtZ0ZINYtoG5bG5O9_ObQ90Y0n
-
-    pip_dl_category = sim.calculate("pip_dl_category")
-    on_standard = sim.map_result(
-        pip_dl_category == "STANDARD", "person", "household"
-    )
-    on_enhanced = sim.map_result(
-        pip_dl_category == "ENHANCED", "person", "household"
-    )
-
-    df["dwp/pip_dl_standard_claimants"] = on_standard
-    target_names.append("dwp/pip_dl_standard_claimants")
-    target_values.append(1_283_000)
-
-    df["dwp/pip_dl_enhanced_claimants"] = on_enhanced
-    target_names.append("dwp/pip_dl_enhanced_claimants")
-    target_values.append(1_608_000)
-
-    # Scottish Child Payment total spend
-    # Source: Scottish Budget 2026-27, Table 5.08
-    # https://www.gov.scot/publications/scottish-budget-2026-2027/pages/6/
-    scp = sim.calculate("scottish_child_payment")
-    df["sss/scottish_child_payment"] = household_from_person(scp)
-    SCP_SPEND = {
-        2024: 455.8e6,
-        2025: 471.0e6,
-        2026: 484.8e6,
-    }
-    # Extrapolate for other years using 3% annual growth
-    scp_target = SCP_SPEND.get(
-        int(time_period), 471.0e6 * (1.03 ** (int(time_period) - 2025))
-    )
-    target_names.append("sss/scottish_child_payment")
-    target_values.append(scp_target)
-
-    # UC households in Scotland with child under 1
-    # Source: DWP Stat-Xplore, UC Households dataset, November 2023
-    # https://stat-xplore.dwp.gov.uk/
-    # Filters: Scotland, Age of Youngest Child = 0
-    # ~14,000 households (13,992 in November 2023)
-    uc_amount = sim.calculate("universal_credit")
-    on_uc_family = uc_amount > 0
-    on_uc_household = household_from_family(on_uc_family) > 0
-
-    child_under_1 = is_child & (age < 1)
-    has_child_under_1 = household_from_person(child_under_1) > 0
-
-    scotland_uc_child_under_1 = (
-        (household_region == "SCOTLAND") & on_uc_household & has_child_under_1
-    )
-    df["dwp/scotland_uc_households_child_under_1"] = (
-        scotland_uc_child_under_1.astype(float)
-    )
-    target_names.append("dwp/scotland_uc_households_child_under_1")
-    target_values.append(14_000)  # 13,992 rounded, November 2023
-
-    # Council Tax band counts
-
-    ct_data = pd.read_csv(STORAGE_FOLDER / "council_tax_bands_2024.csv")
-    uk_population = (
-        sim.tax_benefit_system.parameters.gov.economic_assumptions.indices.ons.population
-    )
-    uprating = uk_population(time_period) / uk_population(2024)
-
-    # England and Wales data from https://www.gov.uk/government/statistics/council-tax-stock-of-properties-2024
-
-    # Scotland data from https://www.gov.scot/publications/council-tax-datasets/ (Number of chargeable dwellings, 2024)
-
-    for i, row in ct_data.iterrows():
-        selected_region = row["Region"]
-        in_region = sim.calculate("region").values == selected_region
-        for band in ["A", "B", "C", "D", "E", "F", "G", "H"]:
-            name = f"voa/council_tax/{selected_region}/{band}"
-            in_band = sim.calculate("council_tax_band") == band
-            df[name] = (in_band * in_region).astype(float)
-            target_names.append(name)
-            target_values.append(float(row[band]) * uprating)
-        # Add total row
-        name = f"voa/council_tax/{selected_region}/total"
-        df[name] = (in_region).astype(float)
-        target_names.append(name)
-        target_values.append(float(row["Total"]) * uprating)
-
-    # Benefit cap counts
-
-    benefit_cap_reduction = sim.calculate(
-        "benefit_cap_reduction", map_to="household"
-    ).values
-    df["dwp/benefit_capped_households"] = (benefit_cap_reduction > 0).astype(
-        float
-    )
-    target_names.append("dwp/benefit_capped_households")
-    target_values.append(
-        115_000
-    )  # https://www.gov.uk/government/statistics/benefit-cap-number-of-households-capped-to-february-2025/benefit-cap-number-of-households-capped-to-february-2025
-
-    df["dwp/benefit_cap_total_reduction"] = benefit_cap_reduction.astype(float)
-    target_names.append("dwp/benefit_cap_total_reduction")
-    target_values.append(
-        60 * 52 * 115_000
-    )  # same source as above, multiply avg cap amount by total capped population
-
-    # UC national payment distribution
-
-    uc_payment_dist = uc_national_payment_dist
-    uc_payments = sim.calculate("universal_credit", map_to="benunit").values
-    uc_family_type = sim.calculate("family_type", map_to="benunit").values
-
-    for i, row in uc_payment_dist.iterrows():
-        lower = row.uc_annual_payment_min
-        upper = row.uc_annual_payment_max
-        family_type = row.family_type
-        in_band = (
-            (uc_payments >= lower)
-            & (uc_payments < upper)
-            & (uc_family_type == family_type)
-        )
-        name = f"dwp/uc_payment_dist/{family_type}_annual_payment_{lower:_.0f}_to_{upper:_.0f}"
-        df[name] = household_from_family(in_band)
-        target_names.append(name)
-        target_values.append(row.household_count)
-
-    # Vehicle ownership calibration targets
-    # NTS 2024: 22% no car, 44% one car, 34% two+ cars
-    # https://www.gov.uk/government/statistics/national-travel-survey-2024
-    # Total households (~29.6m) from council tax data (consistent with other calibration)
-    total_households = ct_data["Total"].sum() * uprating
-    num_vehicles = pe("num_vehicles")
-
-    df["nts/households_no_vehicle"] = (num_vehicles == 0).astype(float)
-    target_names.append("nts/households_no_vehicle")
-    target_values.append(total_households * NTS_NO_VEHICLE_RATE)
-
-    df["nts/households_one_vehicle"] = (num_vehicles == 1).astype(float)
-    target_names.append("nts/households_one_vehicle")
-    target_values.append(total_households * NTS_ONE_VEHICLE_RATE)
-
-    df["nts/households_two_plus_vehicles"] = (num_vehicles >= 2).astype(float)
-    target_names.append("nts/households_two_plus_vehicles")
-    target_values.append(total_households * NTS_TWO_PLUS_VEHICLE_RATE)
-
-    RENT_ESTIMATE = {
-        "private_renter": 1_400
-        * 12
-        * 4.7e6,  # https://www.ons.gov.uk/economy/inflationandpriceindices/bulletins/privaterentandhousepricesuk/january2025
-        "owner_mortgage": 1_100 * 12 * 7.5e6,
-    }
-
-    # Housing affordability targets
-    # Total mortgage payments (capital + interest)
-    mortgage_capital = pe("mortgage_capital_repayment")
-    mortgage_interest = pe("mortgage_interest_repayment")
-    total_mortgage = mortgage_capital + mortgage_interest
-    df["housing/total_mortgage"] = total_mortgage
-    target_names.append("housing/total_mortgage")
-    target_values.append(RENT_ESTIMATE["owner_mortgage"])
-
-    # Total rent by tenure type
-    rent = pe("rent")
-    tenure_type = sim.calculate("tenure_type", map_to="household").values
-
-    df["housing/rent_private"] = rent * (tenure_type == "RENT_PRIVATELY")
-    target_names.append("housing/rent_private")
-    target_values.append(RENT_ESTIMATE["private_renter"])
-
-    combined_targets = pd.concat(
-        [
-            targets,
-            pd.DataFrame(
-                {
-                    "value": target_values,
-                },
-                index=target_names,
-            ),
-        ]
-    )
-
-    combined_targets.to_csv("test.csv")
-
-    return df, combined_targets.value
+from policyengine_uk_data.targets.build_loss_matrix import (
+    create_target_matrix,
+)
 
 
 def get_loss_results(
     dataset, time_period, reform=None, household_weights=None
 ):
-    """
-    Calculate loss metrics comparing model outputs to targets.
+    """Calculate loss metrics comparing model outputs to targets.
 
     Args:
         dataset: PolicyEngine UK dataset to evaluate.
-        time_period: Year for comparison.
-        reform: Policy reform to apply.
-        household_weights: Custom weights (uses dataset weights if None).
+        time_period: year for comparison.
+        reform: policy reform to apply.
+        household_weights: custom weights (uses dataset weights if None).
 
     Returns:
         DataFrame with estimate vs target comparisons and error metrics.
diff --git a/pyproject.toml b/pyproject.toml
index 55255e38..9ce780d4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,6 +29,8 @@ dependencies = [
     "odfpy",
     "pandas",
     "openpyxl",
+    "pydantic>=2.0",
+    "pyyaml",
 ]
 
 [project.optional-dependencies]

From 482f87d47cffe424b86f28a6a2818cf0561eb9c6 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil@policyengine.org>
Date: Sun, 15 Feb 2026 13:42:20 +0000
Subject: [PATCH 2/6] Fix lint errors and add changelog entry

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
---
 changelog_entry.yaml                          |   4 +
 .../datasets/create_datasets.py               |  11 +-
 .../local_areas/constituencies/calibrate.py   |  16 +-
 .../local_authorities/calibrate.py            |  22 +-
 .../targets/build_loss_matrix.py              | 211 +++++-------------
 policyengine_uk_data/targets/registry.py      |   8 +-
 policyengine_uk_data/targets/sources/dwp.py   |  83 ++++---
 .../targets/sources/hmrc_salary_sacrifice.py  |  11 +-
 .../targets/sources/hmrc_spi.py               |  28 +--
 .../targets/sources/local_age.py              |   1 -
 .../targets/sources/local_la_extras.py        |  48 +++-
 policyengine_uk_data/targets/sources/obr.py   |  31 +--
 .../targets/sources/ons_demographics.py       |  12 +-
 .../targets/sources/ons_households.py         |  11 +-
 .../targets/sources/ons_savings.py            |  15 +-
 .../targets/sources/ons_tenure.py             |  11 +-
 policyengine_uk_data/utils/loss.py            |   5 +-
 17 files changed, 192 insertions(+), 336 deletions(-)

diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29b..5a854246 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: minor
+  changes:
+    changed:
+    - Replaced ad-hoc calibration targets with structured registry and source modules.
diff --git a/policyengine_uk_data/datasets/create_datasets.py b/policyengine_uk_data/datasets/create_datasets.py
index ed969d07..ded6210a 100644
--- a/policyengine_uk_data/datasets/create_datasets.py
+++ b/policyengine_uk_data/datasets/create_datasets.py
@@ -2,7 +2,6 @@
 from policyengine_uk_data.storage import STORAGE_FOLDER
 import logging
 import os
-from policyengine_uk.data import UKSingleYearDataset
 from policyengine_uk_data.utils.uprating import uprate_dataset
 from policyengine_uk_data.utils.progress import (
     ProcessingProgress,
@@ -44,7 +43,6 @@ def main():
             update_dataset,
             nested_progress,
         ):
-
             # Create base FRS dataset
             update_dataset("Create base FRS dataset", "processing")
             frs = create_frs(
@@ -107,9 +105,6 @@ def main():
             update_dataset("Uprate to 2025", "completed")
 
             # Calibrate constituency weights with nested progress
-            from policyengine_uk_data.datasets.local_areas.constituencies.calibrate import (
-                calibrate,
-            )
 
             update_dataset("Calibrate constituency weights", "processing")
 
@@ -151,7 +146,7 @@ def main():
             )
 
             # Run calibration with verbose progress
-            frs_calibrated_las = calibrate_local_areas(
+            calibrate_local_areas(
                 dataset=frs,
                 epochs=epochs,
                 matrix_fn=create_local_authority_target_matrix,
@@ -170,9 +165,7 @@ def main():
 
             # Downrate and save
             update_dataset("Downrate to 2023", "processing")
-            frs_calibrated = uprate_dataset(
-                frs_calibrated_constituencies, 2023
-            )
+            frs_calibrated = uprate_dataset(frs_calibrated_constituencies, 2023)
             update_dataset("Downrate to 2023", "completed")
 
             update_dataset("Save final dataset", "processing")
diff --git a/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py b/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py
index 6ea99677..24aa3c30 100644
--- a/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py
+++ b/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py
@@ -73,9 +73,9 @@ def get_performance(weights, m_c, y_c, m_n, y_n, excluded_targets):
         constituency_target_validation["estimate"]
         - constituency_target_validation["target"]
     )
-    constituency_target_validation["abs_error"] = (
-        constituency_target_validation["error"].abs()
-    )
+    constituency_target_validation["abs_error"] = constituency_target_validation[
+        "error"
+    ].abs()
     constituency_target_validation["rel_abs_error"] = (
         constituency_target_validation["abs_error"]
         / constituency_target_validation["target"]
@@ -91,15 +91,11 @@ def get_performance(weights, m_c, y_c, m_n, y_n, excluded_targets):
     national_target_validation["target"] = national_actuals.values
 
     national_target_validation["error"] = (
-        national_target_validation["estimate"]
-        - national_target_validation["target"]
+        national_target_validation["estimate"] - national_target_validation["target"]
     )
-    national_target_validation["abs_error"] = national_target_validation[
-        "error"
-    ].abs()
+    national_target_validation["abs_error"] = national_target_validation["error"].abs()
     national_target_validation["rel_abs_error"] = (
-        national_target_validation["abs_error"]
-        / national_target_validation["target"]
+        national_target_validation["abs_error"] / national_target_validation["target"]
     )
 
     df = pd.concat(
diff --git a/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py b/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py
index 588f2955..746d94e7 100644
--- a/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py
+++ b/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py
@@ -18,12 +18,8 @@ def calibrate(
 ):
     return calibrate_local_areas(
         dataset=dataset,
-        matrix_fn=lambda ds: create_local_authority_target_matrix(
-            ds, ds.time_period
-        ),
-        national_matrix_fn=lambda ds: create_national_target_matrix(
-            ds, ds.time_period
-        ),
+        matrix_fn=lambda ds: create_local_authority_target_matrix(ds, ds.time_period),
+        national_matrix_fn=lambda ds: create_national_target_matrix(ds, ds.time_period),
         area_count=360,
         weight_file="local_authority_weights.h5",
         excluded_training_targets=excluded_training_targets,
@@ -37,9 +33,7 @@ def calibrate(
 def get_performance(weights, m_c, y_c, m_n, y_n, excluded_targets):
     la_target_matrix, la_actuals = m_c, y_c
     national_target_matrix, national_actuals = m_n, y_n
-    local_authorities = pd.read_csv(
-        STORAGE_FOLDER / "local_authorities_2021.csv"
-    )
+    local_authorities = pd.read_csv(STORAGE_FOLDER / "local_authorities_2021.csv")
     la_wide = weights @ la_target_matrix
     la_wide.index = local_authorities.code.values
     la_wide["name"] = local_authorities.name.values
@@ -93,15 +87,11 @@ def get_performance(weights, m_c, y_c, m_n, y_n, excluded_targets):
     national_target_validation["target"] = national_actuals.values
 
     national_target_validation["error"] = (
-        national_target_validation["estimate"]
-        - national_target_validation["target"]
+        national_target_validation["estimate"] - national_target_validation["target"]
     )
-    national_target_validation["abs_error"] = national_target_validation[
-        "error"
-    ].abs()
+    national_target_validation["abs_error"] = national_target_validation["error"].abs()
     national_target_validation["rel_abs_error"] = (
-        national_target_validation["abs_error"]
-        / national_target_validation["target"]
+        national_target_validation["abs_error"] / national_target_validation["target"]
     )
 
     df = pd.concat(
diff --git a/policyengine_uk_data/targets/build_loss_matrix.py b/policyengine_uk_data/targets/build_loss_matrix.py
index 6b366594..e76ff50c 100644
--- a/policyengine_uk_data/targets/build_loss_matrix.py
+++ b/policyengine_uk_data/targets/build_loss_matrix.py
@@ -114,9 +114,7 @@ def pe(self, variable: str):
         """Calculate variable mapped to household level."""
         key = ("pe", variable)
         if key not in self._cache:
-            self._cache[key] = self.sim.calculate(
-                variable, map_to="household"
-            ).values
+            self._cache[key] = self.sim.calculate(variable, map_to="household").values
         return self._cache[key]
 
     def pe_person(self, variable: str):
@@ -147,9 +145,7 @@ def household_from_family(self, values):
     @property
     def region(self):
         if "region" not in self._cache:
-            self._cache["region"] = self.sim.calculate(
-                "region", map_to="person"
-            )
+            self._cache["region"] = self.sim.calculate("region", map_to="person")
         return self._cache["region"]
 
     @property
@@ -178,13 +174,9 @@ def counterfactual_sim(self):
         if "counterfactual_sim" not in self._cache:
             from policyengine_uk import Microsimulation
 
-            ss = self.sim.calculate(
-                "pension_contributions_via_salary_sacrifice"
-            )
+            ss = self.sim.calculate("pension_contributions_via_salary_sacrifice")
             emp = self.sim.calculate("employment_income")
-            cf_sim = Microsimulation(
-                dataset=self.dataset, reform=self.reform
-            )
+            cf_sim = Microsimulation(dataset=self.dataset, reform=self.reform)
             cf_sim.set_input(
                 "pension_contributions_via_salary_sacrifice",
                 self.time_period,
@@ -221,9 +213,8 @@ def counterfactual_sim(self):
 
 # ── Column computation dispatch ──────────────────────────────────────
 
-def _compute_column(
-    target: Target, ctx: _SimContext, year: int
-) -> np.ndarray | None:
+
+def _compute_column(target: Target, ctx: _SimContext, year: int) -> np.ndarray | None:
     """Compute the household-level column for a target.
 
     Returns None if the target can't be computed (e.g. missing
@@ -264,9 +255,9 @@ def _compute_column(
     if name == "ons/scotland_households_3plus_children":
         is_child = ctx.pe_person("is_child")
         children_per_hh = ctx.household_from_person(is_child)
-        return (
-            (ctx.household_region == "SCOTLAND") & (children_per_hh >= 3)
-        ).astype(float)
+        return ((ctx.household_region == "SCOTLAND") & (children_per_hh >= 3)).astype(
+            float
+        )
 
     # ── Household type targets ────────────────────────────────────
     if target.variable == "family_type" and target.is_count:
@@ -294,9 +285,8 @@ def _compute_column(
 
     # ── Housing targets ───────────────────────────────────────────
     if name == "housing/total_mortgage":
-        return (
-            ctx.pe("mortgage_capital_repayment")
-            + ctx.pe("mortgage_interest_repayment")
+        return ctx.pe("mortgage_capital_repayment") + ctx.pe(
+            "mortgage_interest_repayment"
         )
     if name == "housing/rent_private":
         tenure = ctx.sim.calculate("tenure_type", map_to="household").values
@@ -315,14 +305,10 @@ def _compute_column(
     # ── DWP PIP claimant splits ───────────────────────────────────
     if name == "dwp/pip_dl_standard_claimants":
         pip_dl = ctx.sim.calculate("pip_dl_category")
-        return ctx.sim.map_result(
-            pip_dl == "STANDARD", "person", "household"
-        )
+        return ctx.sim.map_result(pip_dl == "STANDARD", "person", "household")
     if name == "dwp/pip_dl_enhanced_claimants":
         pip_dl = ctx.sim.calculate("pip_dl_category")
-        return ctx.sim.map_result(
-            pip_dl == "ENHANCED", "person", "household"
-        )
+        return ctx.sim.map_result(pip_dl == "ENHANCED", "person", "household")
 
     # ── DWP benefit cap ───────────────────────────────────────────
     if name == "dwp/benefit_capped_households":
@@ -341,9 +327,9 @@ def _compute_column(
         on_uc = ctx.household_from_family(uc > 0) > 0
         child_u1 = ctx.pe_person("is_child") & (ctx.age < 1)
         has_child_u1 = ctx.household_from_person(child_u1) > 0
-        return (
-            (ctx.household_region == "SCOTLAND") & on_uc & has_child_u1
-        ).astype(float)
+        return ((ctx.household_region == "SCOTLAND") & on_uc & has_child_u1).astype(
+            float
+        )
 
     # ── UC claimants by number of children ─────────────────────────
     if name.startswith("dwp/uc/claimants_with_") and "_children" in name:
@@ -369,18 +355,14 @@ def _compute_column(
         "obr/salary_sacrifice_employee_ni_relief",
     ):
         ni_base = ctx.sim.calculate("ni_employee")
-        ni_cf = ctx.counterfactual_sim.calculate(
-            "ni_employee", ctx.time_period
-        )
+        ni_cf = ctx.counterfactual_sim.calculate("ni_employee", ctx.time_period)
         return ctx.household_from_person(ni_cf - ni_base)
     if name in (
         "hmrc/salary_sacrifice_employer_nics_relief",
         "obr/salary_sacrifice_employer_ni_relief",
     ):
         ni_base = ctx.sim.calculate("ni_employer")
-        ni_cf = ctx.counterfactual_sim.calculate(
-            "ni_employer", ctx.time_period
-        )
+        ni_cf = ctx.counterfactual_sim.calculate("ni_employer", ctx.time_period)
         return ctx.household_from_person(ni_cf - ni_base)
 
     # ── UC jobseeker / non-jobseeker splits ───────────────────────
@@ -424,6 +406,7 @@ def _compute_column(
 
 # ── Compute implementations ──────────────────────────────────────────
 
+
 def _compute_simple_gbp(target: Target, ctx: _SimContext) -> np.ndarray:
     """Sum a variable at household level."""
     variable = target.variable
@@ -445,16 +428,14 @@ def _compute_simple_count(target: Target, ctx: _SimContext) -> np.ndarray:
     return ctx.pe_count(target.variable)
 
 
-def _compute_regional_age(
-    target: Target, ctx: _SimContext
-) -> np.ndarray:
+def _compute_regional_age(target: Target, ctx: _SimContext) -> np.ndarray:
     """Compute person count in a region × age band."""
     # Parse "ons/{region_name}_age_{lower}_{upper}" from the name
     name = target.name.removeprefix("ons/")
     # Find the _age_ part
     idx = name.index("_age_")
     region_name = name[:idx]
-    age_part = name[idx + 5:]  # e.g. "0_9"
+    age_part = name[idx + 5 :]  # e.g. "0_9"
     lower, upper = age_part.split("_")
     lower, upper = int(lower), int(upper)
 
@@ -463,16 +444,12 @@ def _compute_regional_age(
         return None
 
     person_match = (
-        (ctx.region.values == pe_region)
-        & (ctx.age >= lower)
-        & (ctx.age <= upper)
+        (ctx.region.values == pe_region) & (ctx.age >= lower) & (ctx.age <= upper)
     )
     return ctx.household_from_person(person_match)
 
 
-def _compute_gender_age(
-    target: Target, ctx: _SimContext
-) -> np.ndarray:
+def _compute_gender_age(target: Target, ctx: _SimContext) -> np.ndarray:
     """Compute person count in a gender × age band."""
     name = target.name.removeprefix("ons/")
     # "female_0_14" or "male_75_90"
@@ -487,9 +464,7 @@ def _compute_gender_age(
     return ctx.household_from_person(sex_match & age_match)
 
 
-def _compute_household_type(
-    target: Target, ctx: _SimContext
-) -> np.ndarray | None:
+def _compute_household_type(target: Target, ctx: _SimContext) -> np.ndarray | None:
     """Compute household type count from ONS families & households categories.
 
     Maps ONS household categories to PE family_type enum values and
@@ -507,26 +482,18 @@ def ft_hh(value):
         return ctx.household_from_family(ft == value) > 0
 
     if name == "lone_households_under_65":
-        return (
-            ft_hh("SINGLE")
-            & (children_per_hh == 0)
-            & (age_hh_head < 65)
-        ).astype(float)
+        return (ft_hh("SINGLE") & (children_per_hh == 0) & (age_hh_head < 65)).astype(
+            float
+        )
     if name == "lone_households_over_65":
-        return (
-            ft_hh("SINGLE")
-            & (children_per_hh == 0)
-            & (age_hh_head >= 65)
-        ).astype(float)
+        return (ft_hh("SINGLE") & (children_per_hh == 0) & (age_hh_head >= 65)).astype(
+            float
+        )
     if name == "unrelated_adult_households":
-        people_per_hh = ctx.household_from_person(
-            np.ones_like(is_child)
+        people_per_hh = ctx.household_from_person(np.ones_like(is_child))
+        return (ft_hh("SINGLE") & (children_per_hh == 0) & (people_per_hh > 1)).astype(
+            float
         )
-        return (
-            ft_hh("SINGLE")
-            & (children_per_hh == 0)
-            & (people_per_hh > 1)
-        ).astype(float)
     if name == "couple_no_children_households":
         return ft_hh("COUPLE_NO_CHILDREN").astype(float)
     if name == "couple_under_3_children_households":
@@ -536,27 +503,14 @@ def ft_hh(value):
             & (children_per_hh <= 2)
         ).astype(float)
     if name == "couple_3_plus_children_households":
-        return (
-            ft_hh("COUPLE_WITH_CHILDREN")
-            & (children_per_hh >= 3)
-        ).astype(float)
+        return (ft_hh("COUPLE_WITH_CHILDREN") & (children_per_hh >= 3)).astype(float)
     if name == "couple_non_dependent_children_only_households":
-        people_per_hh = ctx.household_from_person(
-            np.ones_like(is_child)
-        )
-        return (
-            ft_hh("COUPLE_NO_CHILDREN")
-            & (people_per_hh > 2)
-        ).astype(float)
+        people_per_hh = ctx.household_from_person(np.ones_like(is_child))
+        return (ft_hh("COUPLE_NO_CHILDREN") & (people_per_hh > 2)).astype(float)
     if name == "lone_parent_dependent_children_households":
-        return (
-            ft_hh("LONE_PARENT")
-            & (children_per_hh > 0)
-        ).astype(float)
+        return (ft_hh("LONE_PARENT") & (children_per_hh > 0)).astype(float)
     if name == "lone_parent_non_dependent_children_households":
-        people_per_hh = ctx.household_from_person(
-            np.ones_like(is_child)
-        )
+        people_per_hh = ctx.household_from_person(np.ones_like(is_child))
         return (
             ft_hh("SINGLE")
             & (children_per_hh == 0)
@@ -570,9 +524,7 @@ def ft_hh(value):
     return None
 
 
-def _compute_tenure(
-    target: Target, ctx: _SimContext
-) -> np.ndarray | None:
+def _compute_tenure(target: Target, ctx: _SimContext) -> np.ndarray | None:
     """Compute dwelling count by tenure type."""
     # Map ONS target name suffixes to PE tenure_type enum values
     _TENURE_MAP = {
@@ -601,34 +553,22 @@ def _compute_tenure(
     return (match & in_england).astype(float)
 
 
-def _compute_income_band(
-    target: Target, ctx: _SimContext
-) -> np.ndarray:
+def _compute_income_band(target: Target, ctx: _SimContext) -> np.ndarray:
     """Compute income variable within a total income band."""
     variable = target.variable
     lower = target.lower_bound
     upper = target.upper_bound
 
-    income_df = ctx.sim.calculate_dataframe(
-        ["total_income", variable]
-    )
-    in_band = (income_df.total_income >= lower) & (
-        income_df.total_income < upper
-    )
+    income_df = ctx.sim.calculate_dataframe(["total_income", variable])
+    in_band = (income_df.total_income >= lower) & (income_df.total_income < upper)
 
     if target.is_count:
-        return ctx.household_from_person(
-            (income_df[variable] > 0) * in_band
-        )
+        return ctx.household_from_person((income_df[variable] > 0) * in_band)
     else:
-        return ctx.household_from_person(
-            income_df[variable] * in_band
-        )
+        return ctx.household_from_person(income_df[variable] * in_band)
 
 
-def _compute_council_tax_band(
-    target: Target, ctx: _SimContext
-) -> np.ndarray:
+def _compute_council_tax_band(target: Target, ctx: _SimContext) -> np.ndarray:
     """Compute council tax band count for a region."""
     # "voa/council_tax/{REGION}/{band}"
     parts = target.name.split("/")
@@ -644,9 +584,7 @@ def _compute_council_tax_band(
     return (in_band * in_region).astype(float)
 
 
-def _compute_obr_council_tax(
-    target: Target, ctx: _SimContext
-) -> np.ndarray:
+def _compute_obr_council_tax(target: Target, ctx: _SimContext) -> np.ndarray:
     """Compute OBR council tax receipts, optionally by country."""
     name = target.name
     ct = ctx.pe("council_tax")
@@ -662,16 +600,12 @@ def _compute_obr_council_tax(
     return ct
 
 
-def _compute_uc_jobseeker(
-    target: Target, ctx: _SimContext
-) -> np.ndarray:
+def _compute_uc_jobseeker(target: Target, ctx: _SimContext) -> np.ndarray:
     """Compute UC jobseeker / non-jobseeker splits."""
     family = ctx.sim.populations["benunit"]
     uc = ctx.sim.calculate("universal_credit")
     on_uc = uc > 0
-    unemployed = family.any(
-        ctx.sim.calculate("employment_status") == "UNEMPLOYED"
-    )
+    unemployed = family.any(ctx.sim.calculate("employment_status") == "UNEMPLOYED")
 
     if "non_jobseekers" in target.name:
         mask = on_uc * ~unemployed
@@ -684,37 +618,26 @@ def _compute_uc_jobseeker(
         return ctx.household_from_family(uc * mask)
 
 
-def _compute_uc_payment_dist(
-    target: Target, ctx: _SimContext
-) -> np.ndarray:
+def _compute_uc_payment_dist(target: Target, ctx: _SimContext) -> np.ndarray:
     """Compute UC payment distribution band × family type."""
     # Parse from name: "dwp/uc_payment_dist/{family_type}_annual_payment_{lower}_to_{upper}"
     name = target.name.removeprefix("dwp/uc_payment_dist/")
     # Find the _annual_payment_ separator
     idx = name.index("_annual_payment_")
     family_type = name[:idx]
-    payment_part = name[idx + 16:]  # e.g. "0_to_1_000"
     lower = target.lower_bound
     upper = target.upper_bound
 
-    uc_payments = ctx.sim.calculate(
-        "universal_credit", map_to="benunit"
-    ).values
-    uc_family_type = ctx.sim.calculate(
-        "family_type", map_to="benunit"
-    ).values
+    uc_payments = ctx.sim.calculate("universal_credit", map_to="benunit").values
+    uc_family_type = ctx.sim.calculate("family_type", map_to="benunit").values
 
     in_band = (
-        (uc_payments >= lower)
-        & (uc_payments < upper)
-        & (uc_family_type == family_type)
+        (uc_payments >= lower) & (uc_payments < upper) & (uc_family_type == family_type)
     )
     return ctx.household_from_family(in_band)
 
 
-def _compute_ss_it_relief(
-    target: Target, ctx: _SimContext
-) -> np.ndarray:
+def _compute_ss_it_relief(target: Target, ctx: _SimContext) -> np.ndarray:
     """Compute salary sacrifice IT relief by tax band."""
     it_base = ctx.sim.calculate("income_tax")
     it_cf = ctx.counterfactual_sim.calculate("income_tax", ctx.time_period)
@@ -731,9 +654,7 @@ def _compute_ss_it_relief(
 
     name = target.name
     if "basic" in name:
-        mask = (adj_net_income_cf > basic_thresh) & (
-            adj_net_income_cf <= higher_thresh
-        )
+        mask = (adj_net_income_cf > basic_thresh) & (adj_net_income_cf <= higher_thresh)
     elif "higher" in name:
         mask = (adj_net_income_cf > higher_thresh) & (
             adj_net_income_cf <= additional_thresh
@@ -747,9 +668,7 @@ def _compute_ss_it_relief(
     return ctx.household_from_person(it_relief * mask)
 
 
-def _compute_two_child_limit(
-    target: Target, ctx: _SimContext
-) -> np.ndarray | None:
+def _compute_two_child_limit(target: Target, ctx: _SimContext) -> np.ndarray | None:
     """Compute two-child limit targets.
 
     These involve cross-tabulations of UC eligibility, child count,
@@ -780,18 +699,14 @@ def _compute_two_child_limit(
         return children_in_capped
     if name == "dwp/uc/two_child_limit/children_in_affected_households":
         # Total children (not just affected ones) in capped households
-        total_children = sim.map_result(
-            is_child * child_in_uc, "person", "household"
-        )
+        total_children = sim.map_result(is_child * child_in_uc, "person", "household")
         return total_children * capped_hh
 
     # By number of children: "dwp/uc/two_child_limit/{n}_children_households"
     if "_children_households_total_children" in name:
         n = int(name.split("/")[-1].split("_")[0])
         children_count = sim.map_result(is_child, "person", "household")
-        return (
-            capped_hh * (children_count == n) * children_count
-        ).astype(float)
+        return (capped_hh * (children_count == n) * children_count).astype(float)
     if "_children_households" in name and "total" not in name:
         n = int(name.split("/")[-1].split("_")[0])
         children_count = sim.map_result(is_child, "person", "household")
@@ -822,9 +737,7 @@ def _compute_two_child_limit(
     return None
 
 
-def _compute_uc_by_children(
-    target: Target, ctx: _SimContext
-) -> np.ndarray:
+def _compute_uc_by_children(target: Target, ctx: _SimContext) -> np.ndarray:
     """Compute UC claimant households filtered by number of dependent children."""
     # Parse "dwp/uc/claimants_with_{n}_children"
     name = target.name
@@ -846,9 +759,7 @@ def _compute_uc_by_children(
     return (on_uc & match).astype(float)
 
 
-def _compute_uc_by_family_type(
-    target: Target, ctx: _SimContext
-) -> np.ndarray:
+def _compute_uc_by_family_type(target: Target, ctx: _SimContext) -> np.ndarray:
     """Compute UC claimant households filtered by family type."""
     name = target.name
     ft_str = name.split("dwp/uc/claimants_")[1]
@@ -867,9 +778,7 @@ def ft_hh(value):
     if ft_str == "single_no_children":
         match = ft_hh("SINGLE") & (children_per_hh == 0)
     elif ft_str == "single_with_children":
-        match = (ft_hh("SINGLE") | ft_hh("LONE_PARENT")) & (
-            children_per_hh > 0
-        )
+        match = (ft_hh("SINGLE") | ft_hh("LONE_PARENT")) & (children_per_hh > 0)
     elif ft_str == "couple_no_children":
         match = ft_hh("COUPLE_NO_CHILDREN")
     elif ft_str == "couple_with_children":
diff --git a/policyengine_uk_data/targets/registry.py b/policyengine_uk_data/targets/registry.py
index 909fd85d..b4c97108 100644
--- a/policyengine_uk_data/targets/registry.py
+++ b/policyengine_uk_data/targets/registry.py
@@ -24,12 +24,8 @@ def discover_source_modules() -> list:
     """Import all modules under targets.sources."""
     modules = []
     package_path = Path(sources_pkg.__file__).parent
-    for importer, modname, ispkg in pkgutil.iter_modules(
-        [str(package_path)]
-    ):
-        mod = importlib.import_module(
-            f"policyengine_uk_data.targets.sources.{modname}"
-        )
+    for importer, modname, ispkg in pkgutil.iter_modules([str(package_path)]):
+        mod = importlib.import_module(f"policyengine_uk_data.targets.sources.{modname}")
         if hasattr(mod, "get_targets"):
             modules.append(mod)
     return modules
diff --git a/policyengine_uk_data/targets/sources/dwp.py b/policyengine_uk_data/targets/sources/dwp.py
index 67c23c0a..ff4441b2 100644
--- a/policyengine_uk_data/targets/sources/dwp.py
+++ b/policyengine_uk_data/targets/sources/dwp.py
@@ -10,7 +10,6 @@
 - DWP two-child limit: https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024
 """
 
-import pandas as pd
 from pathlib import Path
 
 from policyengine_uk_data.targets.schema import Target, Unit
@@ -118,9 +117,7 @@ def get_targets() -> list[Target]:
                 variable="universal_credit",
                 source="dwp",
                 unit=Unit.COUNT,
-                values={
-                    2025: count_k * (1 + undercount_relative) * 1e3
-                },
+                values={2025: count_k * (1 + undercount_relative) * 1e3},
                 is_count=True,
                 reference_url="https://stat-xplore.dwp.gov.uk/",
             )
@@ -193,44 +190,46 @@ def get_targets() -> list[Target]:
         )
 
     # Two-child limit by disability
-    targets.extend([
-        Target(
-            name="dwp/uc/two_child_limit/adult_pip_households",
-            variable="pip",
-            source="dwp",
-            unit=Unit.COUNT,
-            values={2026: 62_260},
-            is_count=True,
-            reference_url="https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024",
-        ),
-        Target(
-            name="dwp/uc/two_child_limit/adult_pip_children",
-            variable="is_child",
-            source="dwp",
-            unit=Unit.COUNT,
-            values={2026: 225_320},
-            is_count=True,
-            reference_url="https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024",
-        ),
-        Target(
-            name="dwp/uc/two_child_limit/disabled_child_element_households",
-            variable="uc_individual_disabled_child_element",
-            source="dwp",
-            unit=Unit.COUNT,
-            values={2026: 124_560},
-            is_count=True,
-            reference_url="https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024",
-        ),
-        Target(
-            name="dwp/uc/two_child_limit/disabled_child_element_children",
-            variable="is_child",
-            source="dwp",
-            unit=Unit.COUNT,
-            values={2026: 462_660},
-            is_count=True,
-            reference_url="https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024",
-        ),
-    ])
+    targets.extend(
+        [
+            Target(
+                name="dwp/uc/two_child_limit/adult_pip_households",
+                variable="pip",
+                source="dwp",
+                unit=Unit.COUNT,
+                values={2026: 62_260},
+                is_count=True,
+                reference_url="https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024",
+            ),
+            Target(
+                name="dwp/uc/two_child_limit/adult_pip_children",
+                variable="is_child",
+                source="dwp",
+                unit=Unit.COUNT,
+                values={2026: 225_320},
+                is_count=True,
+                reference_url="https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024",
+            ),
+            Target(
+                name="dwp/uc/two_child_limit/disabled_child_element_households",
+                variable="uc_individual_disabled_child_element",
+                source="dwp",
+                unit=Unit.COUNT,
+                values={2026: 124_560},
+                is_count=True,
+                reference_url="https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024",
+            ),
+            Target(
+                name="dwp/uc/two_child_limit/disabled_child_element_children",
+                variable="is_child",
+                source="dwp",
+                unit=Unit.COUNT,
+                values={2026: 462_660},
+                is_count=True,
+                reference_url="https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024",
+            ),
+        ]
+    )
 
     # UC national payment distribution from xlsx
     targets.extend(_uc_payment_distribution_targets())
diff --git a/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py b/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py
index a5f40c0d..1ff2ac82 100644
--- a/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py
+++ b/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py
@@ -21,8 +21,7 @@
 _SOURCES_YAML = Path(__file__).parent.parent / "sources.yaml"
 _HEADERS = {
     "User-Agent": (
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
-        "AppleWebKit/537.36"
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
     ),
 }
 
@@ -52,9 +51,7 @@ def get_targets() -> list[Target]:
     targets = []
 
     try:
-        r = requests.get(
-            ref, headers=_HEADERS, allow_redirects=True, timeout=30
-        )
+        r = requests.get(ref, headers=_HEADERS, allow_redirects=True, timeout=30)
         r.raise_for_status()
         df = pd.read_csv(io.StringIO(r.content.decode("utf-8-sig")))
 
@@ -128,8 +125,6 @@ def get_targets() -> list[Target]:
             )
 
     except Exception as e:
-        logger.error(
-            "Failed to download/parse HMRC salary sacrifice CSV: %s", e
-        )
+        logger.error("Failed to download/parse HMRC salary sacrifice CSV: %s", e)
 
     return targets
diff --git a/policyengine_uk_data/targets/sources/hmrc_spi.py b/policyengine_uk_data/targets/sources/hmrc_spi.py
index b78540c3..296965dc 100644
--- a/policyengine_uk_data/targets/sources/hmrc_spi.py
+++ b/policyengine_uk_data/targets/sources/hmrc_spi.py
@@ -29,8 +29,7 @@
 
 _HEADERS = {
     "User-Agent": (
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
-        "AppleWebKit/537.36"
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
     ),
 }
 
@@ -171,9 +170,7 @@ def get_targets() -> list[Target]:
 
         for idx, row in merged.iterrows():
             lower = int(row["lower_bound"])
-            upper = (
-                _BAND_UPPER[idx] if idx < len(_BAND_UPPER) else float("inf")
-            )
+            upper = _BAND_UPPER[idx] if idx < len(_BAND_UPPER) else float("inf")
             band_label = f"{lower:_}_to_{upper:_}"
 
             for variable in INCOME_VARIABLES:
@@ -188,9 +185,7 @@ def get_targets() -> list[Target]:
                             variable=variable,
                             source="hmrc_spi",
                             unit=Unit.GBP,
-                            values={
-                                _SPI_YEAR: float(row[amount_col]) * 1e6
-                            },
+                            values={_SPI_YEAR: float(row[amount_col]) * 1e6},
                             breakdown_variable="total_income",
                             lower_bound=float(lower),
                             upper_bound=float(upper),
@@ -201,16 +196,11 @@ def get_targets() -> list[Target]:
                 if count_col in row.index and row[count_col] > 0:
                     targets.append(
                         Target(
-                            name=(
-                                f"hmrc/{variable}_count_income_band"
-                                f"_{band_label}"
-                            ),
+                            name=(f"hmrc/{variable}_count_income_band_{band_label}"),
                             variable=variable,
                             source="hmrc_spi",
                             unit=Unit.COUNT,
-                            values={
-                                _SPI_YEAR: float(row[count_col]) * 1e3
-                            },
+                            values={_SPI_YEAR: float(row[count_col]) * 1e3},
                             is_count=True,
                             breakdown_variable="total_income",
                             lower_bound=float(lower),
@@ -230,9 +220,7 @@ def get_targets() -> list[Target]:
     return targets
 
 
-def _read_projection_csv(
-    csv_path: Path, ref: str
-) -> list[Target]:
+def _read_projection_csv(csv_path: Path, ref: str) -> list[Target]:
     """Read projected future year targets from incomes_projection.csv."""
     incomes = pd.read_csv(csv_path)
     targets = []
@@ -268,9 +256,7 @@ def _read_projection_csv(
                     )
 
                 if count_col in row.index and pd.notna(row[count_col]):
-                    name = (
-                        f"hmrc/{variable}_count_income_band_{band_label}"
-                    )
+                    name = f"hmrc/{variable}_count_income_band_{band_label}"
                     targets.append(
                         Target(
                             name=name,
diff --git a/policyengine_uk_data/targets/sources/local_age.py b/policyengine_uk_data/targets/sources/local_age.py
index 2276c173..5cd7f744 100644
--- a/policyengine_uk_data/targets/sources/local_age.py
+++ b/policyengine_uk_data/targets/sources/local_age.py
@@ -11,7 +11,6 @@
 import logging
 from pathlib import Path
 
-import numpy as np
 import pandas as pd
 
 logger = logging.getLogger(__name__)
diff --git a/policyengine_uk_data/targets/sources/local_la_extras.py b/policyengine_uk_data/targets/sources/local_la_extras.py
index c2a9e7d8..193daa53 100644
--- a/policyengine_uk_data/targets/sources/local_la_extras.py
+++ b/policyengine_uk_data/targets/sources/local_la_extras.py
@@ -14,7 +14,6 @@
 import logging
 from pathlib import Path
 
-import numpy as np
 import pandas as pd
 
 logger = logging.getLogger(__name__)
@@ -53,9 +52,16 @@ def load_ons_la_income() -> pd.DataFrame:
     def load_sheet(sheet_name: str, value_col: str) -> pd.DataFrame:
         df = pd.read_excel(xlsx, sheet_name=sheet_name, header=3)
         df.columns = [
-            "msoa_code", "msoa_name", "la_code", "la_name",
-            "region_code", "region_name", value_col,
-            "upper_ci", "lower_ci", "ci_width",
+            "msoa_code",
+            "msoa_name",
+            "la_code",
+            "la_name",
+            "region_code",
+            "region_name",
+            value_col,
+            "upper_ci",
+            "lower_ci",
+            "ci_width",
         ]
         df = df.iloc[1:].dropna(subset=["msoa_code"])
         df[value_col] = pd.to_numeric(df[value_col])
@@ -98,12 +104,24 @@ def load_tenure_data() -> pd.DataFrame:
         return pd.DataFrame()
     df = pd.read_excel(path, sheet_name="data download")
     df.columns = [
-        "region_code", "region_name", "la_code", "la_name",
-        "owned_outright_pct", "owned_mortgage_pct",
-        "private_rent_pct", "social_rent_pct",
+        "region_code",
+        "region_name",
+        "la_code",
+        "la_name",
+        "owned_outright_pct",
+        "owned_mortgage_pct",
+        "private_rent_pct",
+        "social_rent_pct",
+    ]
+    return df[
+        [
+            "la_code",
+            "owned_outright_pct",
+            "owned_mortgage_pct",
+            "private_rent_pct",
+            "social_rent_pct",
+        ]
     ]
-    return df[["la_code", "owned_outright_pct", "owned_mortgage_pct",
-               "private_rent_pct", "social_rent_pct"]]
 
 
 def load_private_rents() -> pd.DataFrame:
@@ -117,8 +135,16 @@ def load_private_rents() -> pd.DataFrame:
         return pd.DataFrame()
     df = pd.read_excel(path, sheet_name="Figure 3", header=5)
     df.columns = [
-        "col0", "la_code_old", "area_code", "area_name", "room",
-        "studio", "one_bed", "two_bed", "three_bed", "four_plus",
+        "col0",
+        "la_code_old",
+        "area_code",
+        "area_name",
+        "room",
+        "studio",
+        "one_bed",
+        "two_bed",
+        "three_bed",
+        "four_plus",
         "median_monthly_rent",
     ]
     df = df[df["area_code"].astype(str).str.match(r"^E0[6789]")]
diff --git a/policyengine_uk_data/targets/sources/obr.py b/policyengine_uk_data/targets/sources/obr.py
index 6867eb1f..5a5bccc3 100644
--- a/policyengine_uk_data/targets/sources/obr.py
+++ b/policyengine_uk_data/targets/sources/obr.py
@@ -38,8 +38,7 @@
 
 _HEADERS = {
     "User-Agent": (
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
-        "AppleWebKit/537.36"
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
     ),
 }
 
@@ -57,9 +56,7 @@ def _download_workbook(url: str) -> openpyxl.Workbook:
     return openpyxl.load_workbook(io.BytesIO(r.content), data_only=False)
 
 
-def _read_row_values(
-    ws, row_num: int, col_letters: list[str]
-) -> dict[int, float]:
+def _read_row_values(ws, row_num: int, col_letters: list[str]) -> dict[int, float]:
     """Read numeric values from a row, mapped to calendar years."""
     result = {}
     for col in col_letters:
@@ -117,7 +114,9 @@ def read_39(ws, row_num: int) -> dict[int, float]:
     # Income tax from Table 3.4 (accrued basis)
     try:
         ws34 = wb["3.4"]
-        row_num = _find_row(ws34, "Income tax (gross of tax credits)", col="B", max_row=30)
+        row_num = _find_row(
+            ws34, "Income tax (gross of tax credits)", col="B", max_row=30
+        )
         values = _read_row_values(ws34, row_num, cols_34)
         if values:
             targets.append(
@@ -359,15 +358,11 @@ def read_49(row_num: int) -> dict[int, float]:
     # Universal credit outside cap (row 43) is jobseekers UC
     try:
         # UC outside cap = predominantly JSA-conditionality UC
-        uc_outside_row = _find_row(
-            ws, "Universal credit", col="B", max_row=55
-        )
+        uc_outside_row = _find_row(ws, "Universal credit", col="B", max_row=55)
         # Find the second UC row (outside cap section)
         for row in range(uc_outside_row + 1, 55):
             cell_val = ws[f"B{row}"].value
-            if cell_val and str(cell_val).strip().startswith(
-                "Universal credit"
-            ):
+            if cell_val and str(cell_val).strip().startswith("Universal credit"):
                 values = read_49(row)
                 if values:
                     targets.append(
@@ -439,12 +434,8 @@ def _parse_tv_licence(wb: openpyxl.Workbook) -> list[Target]:
 _PRIVATE_SCHOOL = {y: 557_000 for y in range(2018, 2032)}
 
 # SPP Review: salary sacrifice NI relief (uprated 3% pa from 2024 base)
-_SS_EMPLOYEE_NI = {
-    y: 1.2e9 * 1.03 ** max(0, y - 2024) for y in range(2024, 2032)
-}
-_SS_EMPLOYER_NI = {
-    y: 2.9e9 * 1.03 ** max(0, y - 2024) for y in range(2024, 2032)
-}
+_SS_EMPLOYEE_NI = {y: 1.2e9 * 1.03 ** max(0, y - 2024) for y in range(2024, 2032)}
+_SS_EMPLOYER_NI = {y: 2.9e9 * 1.03 ** max(0, y - 2024) for y in range(2024, 2032)}
 
 
 def get_targets() -> list[Target]:
@@ -459,9 +450,7 @@ def get_targets() -> list[Target]:
         logger.error("Failed to download/parse OBR receipts: %s", e)
 
     try:
-        expenditure_wb = _download_workbook(
-            config["obr"]["efo_expenditure"]
-        )
+        expenditure_wb = _download_workbook(config["obr"]["efo_expenditure"])
         targets.extend(_parse_council_tax(expenditure_wb))
         targets.extend(_parse_welfare(expenditure_wb))
         targets.extend(_parse_tv_licence(expenditure_wb))
diff --git a/policyengine_uk_data/targets/sources/ons_demographics.py b/policyengine_uk_data/targets/sources/ons_demographics.py
index 1cf0a23b..0a88d54b 100644
--- a/policyengine_uk_data/targets/sources/ons_demographics.py
+++ b/policyengine_uk_data/targets/sources/ons_demographics.py
@@ -24,7 +24,6 @@
 
 import pandas as pd
 import requests
-import yaml
 
 from policyengine_uk_data.targets.schema import (
     GeographicLevel,
@@ -39,8 +38,7 @@
 
 _HEADERS = {
     "User-Agent": (
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
-        "AppleWebKit/537.36"
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
     ),
 }
 
@@ -89,9 +87,7 @@
 @lru_cache(maxsize=1)
 def _download_uk_projection() -> pd.DataFrame:
     """Download and parse the UK principal population projection."""
-    r = requests.get(
-        _UK_ZIP_URL, headers=_HEADERS, allow_redirects=True, timeout=120
-    )
+    r = requests.get(_UK_ZIP_URL, headers=_HEADERS, allow_redirects=True, timeout=120)
     r.raise_for_status()
     z = zipfile.ZipFile(io.BytesIO(r.content))
     with z.open("uk/uk_ppp_machine_readable.xlsx") as f:
@@ -193,9 +189,7 @@ def _parse_regional_from_csv() -> list[Target]:
 
     for _, row in demographics.iterrows():
         name = row["name"]
-        if name in _SKIP_NAMES or any(
-            name.startswith(p) for p in _SKIP_PREFIXES
-        ):
+        if name in _SKIP_NAMES or any(name.startswith(p) for p in _SKIP_PREFIXES):
             continue
         values = {}
         for y in _YEARS:
diff --git a/policyengine_uk_data/targets/sources/ons_households.py b/policyengine_uk_data/targets/sources/ons_households.py
index 4c68b714..88d51cb8 100644
--- a/policyengine_uk_data/targets/sources/ons_households.py
+++ b/policyengine_uk_data/targets/sources/ons_households.py
@@ -30,8 +30,7 @@
 )
 _HEADERS = {
     "User-Agent": (
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
-        "AppleWebKit/537.36"
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
     ),
 }
 
@@ -57,13 +56,9 @@
 
 @lru_cache(maxsize=1)
 def _download_workbook() -> openpyxl.Workbook:
-    r = requests.get(
-        _URL, headers=_HEADERS, allow_redirects=True, timeout=60
-    )
+    r = requests.get(_URL, headers=_HEADERS, allow_redirects=True, timeout=60)
     r.raise_for_status()
-    return openpyxl.load_workbook(
-        io.BytesIO(r.content), data_only=True
-    )
+    return openpyxl.load_workbook(io.BytesIO(r.content), data_only=True)
 
 
 def _find_year_columns(ws) -> dict[int, int]:
diff --git a/policyengine_uk_data/targets/sources/ons_savings.py b/policyengine_uk_data/targets/sources/ons_savings.py
index 5f49d8c5..a2984713 100644
--- a/policyengine_uk_data/targets/sources/ons_savings.py
+++ b/policyengine_uk_data/targets/sources/ons_savings.py
@@ -18,26 +18,19 @@
 logger = logging.getLogger(__name__)
 
 _API_URL = (
-    "https://www.ons.gov.uk/economy/grossdomesticproductgdp/"
-    "timeseries/haxv/ukea/data"
-)
-_REF = (
-    "https://www.ons.gov.uk/economy/grossdomesticproductgdp/"
-    "timeseries/haxv/ukea"
+    "https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/haxv/ukea/data"
 )
+_REF = "https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/haxv/ukea"
 _HEADERS = {
     "User-Agent": (
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
-        "AppleWebKit/537.36"
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
     ),
 }
 
 
 def get_targets() -> list[Target]:
     try:
-        r = requests.get(
-            _API_URL, headers=_HEADERS, allow_redirects=True, timeout=30
-        )
+        r = requests.get(_API_URL, headers=_HEADERS, allow_redirects=True, timeout=30)
         r.raise_for_status()
         data = r.json()
 
diff --git a/policyengine_uk_data/targets/sources/ons_tenure.py b/policyengine_uk_data/targets/sources/ons_tenure.py
index 841e3f4f..0ae4ccdd 100644
--- a/policyengine_uk_data/targets/sources/ons_tenure.py
+++ b/policyengine_uk_data/targets/sources/ons_tenure.py
@@ -33,8 +33,7 @@
 )
 _HEADERS = {
     "User-Agent": (
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
-        "AppleWebKit/537.36"
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
     ),
 }
 
@@ -50,13 +49,9 @@
 
 @lru_cache(maxsize=1)
 def _download_workbook() -> openpyxl.Workbook:
-    r = requests.get(
-        _URL, headers=_HEADERS, allow_redirects=True, timeout=60
-    )
+    r = requests.get(_URL, headers=_HEADERS, allow_redirects=True, timeout=60)
     r.raise_for_status()
-    return openpyxl.load_workbook(
-        io.BytesIO(r.content), data_only=True
-    )
+    return openpyxl.load_workbook(io.BytesIO(r.content), data_only=True)
 
 
 def _parse_header_columns(ws) -> dict[tuple[int, str], int]:
diff --git a/policyengine_uk_data/utils/loss.py b/policyengine_uk_data/utils/loss.py
index 3c240ff6..27eb919f 100644
--- a/policyengine_uk_data/utils/loss.py
+++ b/policyengine_uk_data/utils/loss.py
@@ -4,7 +4,6 @@
 for all target definitions and simulation column construction.
 """
 
-import numpy as np
 import pandas as pd
 
 from policyengine_uk_data.targets.build_loss_matrix import (
@@ -12,9 +11,7 @@
 )
 
 
-def get_loss_results(
-    dataset, time_period, reform=None, household_weights=None
-):
+def get_loss_results(dataset, time_period, reform=None, household_weights=None):
     """Calculate loss metrics comparing model outputs to targets.
 
     Args:

From 68b31d49c34624fd7271b32c423ad26afefc7b93 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil@policyengine.org>
Date: Sun, 15 Feb 2026 13:44:08 +0000
Subject: [PATCH 3/6] Format with black -l 79

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
---
 .../datasets/create_datasets.py               |   4 +-
 .../local_areas/constituencies/calibrate.py   |  16 ++-
 .../local_authorities/calibrate.py            |  22 +++-
 .../local_areas/local_authorities/loss.py     |  25 ++---
 .../targets/build_loss_matrix.py              | 106 ++++++++++++------
 policyengine_uk_data/targets/registry.py      |   4 +-
 .../targets/sources/hmrc_salary_sacrifice.py  |   8 +-
 .../targets/sources/hmrc_spi.py               |   8 +-
 .../targets/sources/local_la_extras.py        |   4 +-
 policyengine_uk_data/targets/sources/obr.py   |  16 ++-
 .../targets/sources/ons_demographics.py       |   8 +-
 .../targets/sources/ons_savings.py            |   8 +-
 .../tests/test_target_registry.py             |  12 +-
 policyengine_uk_data/utils/loss.py            |   4 +-
 14 files changed, 161 insertions(+), 84 deletions(-)

diff --git a/policyengine_uk_data/datasets/create_datasets.py b/policyengine_uk_data/datasets/create_datasets.py
index ded6210a..24efd652 100644
--- a/policyengine_uk_data/datasets/create_datasets.py
+++ b/policyengine_uk_data/datasets/create_datasets.py
@@ -165,7 +165,9 @@ def main():
 
             # Downrate and save
             update_dataset("Downrate to 2023", "processing")
-            frs_calibrated = uprate_dataset(frs_calibrated_constituencies, 2023)
+            frs_calibrated = uprate_dataset(
+                frs_calibrated_constituencies, 2023
+            )
             update_dataset("Downrate to 2023", "completed")
 
             update_dataset("Save final dataset", "processing")
diff --git a/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py b/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py
index 24aa3c30..6ea99677 100644
--- a/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py
+++ b/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py
@@ -73,9 +73,9 @@ def get_performance(weights, m_c, y_c, m_n, y_n, excluded_targets):
         constituency_target_validation["estimate"]
         - constituency_target_validation["target"]
     )
-    constituency_target_validation["abs_error"] = constituency_target_validation[
-        "error"
-    ].abs()
+    constituency_target_validation["abs_error"] = (
+        constituency_target_validation["error"].abs()
+    )
     constituency_target_validation["rel_abs_error"] = (
         constituency_target_validation["abs_error"]
         / constituency_target_validation["target"]
@@ -91,11 +91,15 @@ def get_performance(weights, m_c, y_c, m_n, y_n, excluded_targets):
     national_target_validation["target"] = national_actuals.values
 
     national_target_validation["error"] = (
-        national_target_validation["estimate"] - national_target_validation["target"]
+        national_target_validation["estimate"]
+        - national_target_validation["target"]
     )
-    national_target_validation["abs_error"] = national_target_validation["error"].abs()
+    national_target_validation["abs_error"] = national_target_validation[
+        "error"
+    ].abs()
     national_target_validation["rel_abs_error"] = (
-        national_target_validation["abs_error"] / national_target_validation["target"]
+        national_target_validation["abs_error"]
+        / national_target_validation["target"]
     )
 
     df = pd.concat(
diff --git a/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py b/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py
index 746d94e7..588f2955 100644
--- a/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py
+++ b/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py
@@ -18,8 +18,12 @@ def calibrate(
 ):
     return calibrate_local_areas(
         dataset=dataset,
-        matrix_fn=lambda ds: create_local_authority_target_matrix(ds, ds.time_period),
-        national_matrix_fn=lambda ds: create_national_target_matrix(ds, ds.time_period),
+        matrix_fn=lambda ds: create_local_authority_target_matrix(
+            ds, ds.time_period
+        ),
+        national_matrix_fn=lambda ds: create_national_target_matrix(
+            ds, ds.time_period
+        ),
         area_count=360,
         weight_file="local_authority_weights.h5",
         excluded_training_targets=excluded_training_targets,
@@ -33,7 +37,9 @@ def calibrate(
 def get_performance(weights, m_c, y_c, m_n, y_n, excluded_targets):
     la_target_matrix, la_actuals = m_c, y_c
     national_target_matrix, national_actuals = m_n, y_n
-    local_authorities = pd.read_csv(STORAGE_FOLDER / "local_authorities_2021.csv")
+    local_authorities = pd.read_csv(
+        STORAGE_FOLDER / "local_authorities_2021.csv"
+    )
     la_wide = weights @ la_target_matrix
     la_wide.index = local_authorities.code.values
     la_wide["name"] = local_authorities.name.values
@@ -87,11 +93,15 @@ def get_performance(weights, m_c, y_c, m_n, y_n, excluded_targets):
     national_target_validation["target"] = national_actuals.values
 
     national_target_validation["error"] = (
-        national_target_validation["estimate"] - national_target_validation["target"]
+        national_target_validation["estimate"]
+        - national_target_validation["target"]
     )
-    national_target_validation["abs_error"] = national_target_validation["error"].abs()
+    national_target_validation["abs_error"] = national_target_validation[
+        "error"
+    ].abs()
     national_target_validation["rel_abs_error"] = (
-        national_target_validation["abs_error"] / national_target_validation["target"]
+        national_target_validation["abs_error"]
+        / national_target_validation["target"]
     )
 
     df = pd.concat(
diff --git a/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py b/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py
index 177b2883..1b4e113e 100644
--- a/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py
+++ b/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py
@@ -151,8 +151,7 @@ def create_local_authority_target_matrix(
     )
 
     has_ons_data = (
-        ons_merged["net_income_bhc"].notna()
-        & ons_merged["households"].notna()
+        ons_merged["net_income_bhc"].notna() & ons_merged["households"].notna()
     ).values
     total_households = ons_merged["households"].sum()
     la_household_share = np.where(
@@ -195,18 +194,17 @@ def create_local_authority_target_matrix(
     )
 
     tenure_type = sim.calculate("tenure_type").values
-    matrix["tenure/owned_outright"] = (
-        tenure_type == "OWNED_OUTRIGHT"
-    ).astype(float)
+    matrix["tenure/owned_outright"] = (tenure_type == "OWNED_OUTRIGHT").astype(
+        float
+    )
     matrix["tenure/owned_mortgage"] = (
         tenure_type == "OWNED_WITH_MORTGAGE"
     ).astype(float)
-    matrix["tenure/private_rent"] = (
-        tenure_type == "RENT_PRIVATELY"
-    ).astype(float)
+    matrix["tenure/private_rent"] = (tenure_type == "RENT_PRIVATELY").astype(
+        float
+    )
     matrix["tenure/social_rent"] = (
-        (tenure_type == "RENT_FROM_COUNCIL")
-        | (tenure_type == "RENT_FROM_HA")
+        (tenure_type == "RENT_FROM_COUNCIL") | (tenure_type == "RENT_FROM_HA")
     ).astype(float)
 
     has_tenure = (
@@ -220,9 +218,7 @@ def create_local_authority_target_matrix(
         ("private_rent", "private_rent_pct"),
         ("social_rent", "social_rent_pct"),
     ]:
-        targets = (
-            tenure_merged[pct_col] / 100 * tenure_merged["households"]
-        )
+        targets = tenure_merged[pct_col] / 100 * tenure_merged["households"]
         national = (
             original_weights * matrix[f"tenure/{tenure_key}"].values
         ).sum()
@@ -246,7 +242,8 @@ def create_local_authority_target_matrix(
 
     tenure_merged["private_rent_target"] = (
         tenure_merged["median_annual_rent"]
-        * tenure_merged["private_rent_pct"] / 100
+        * tenure_merged["private_rent_pct"]
+        / 100
         * tenure_merged["households"]
     )
 
diff --git a/policyengine_uk_data/targets/build_loss_matrix.py b/policyengine_uk_data/targets/build_loss_matrix.py
index e76ff50c..ddb71593 100644
--- a/policyengine_uk_data/targets/build_loss_matrix.py
+++ b/policyengine_uk_data/targets/build_loss_matrix.py
@@ -114,7 +114,9 @@ def pe(self, variable: str):
         """Calculate variable mapped to household level."""
         key = ("pe", variable)
         if key not in self._cache:
-            self._cache[key] = self.sim.calculate(variable, map_to="household").values
+            self._cache[key] = self.sim.calculate(
+                variable, map_to="household"
+            ).values
         return self._cache[key]
 
     def pe_person(self, variable: str):
@@ -145,7 +147,9 @@ def household_from_family(self, values):
     @property
     def region(self):
         if "region" not in self._cache:
-            self._cache["region"] = self.sim.calculate("region", map_to="person")
+            self._cache["region"] = self.sim.calculate(
+                "region", map_to="person"
+            )
         return self._cache["region"]
 
     @property
@@ -174,7 +178,9 @@ def counterfactual_sim(self):
         if "counterfactual_sim" not in self._cache:
             from policyengine_uk import Microsimulation
 
-            ss = self.sim.calculate("pension_contributions_via_salary_sacrifice")
+            ss = self.sim.calculate(
+                "pension_contributions_via_salary_sacrifice"
+            )
             emp = self.sim.calculate("employment_income")
             cf_sim = Microsimulation(dataset=self.dataset, reform=self.reform)
             cf_sim.set_input(
@@ -214,7 +220,9 @@ def counterfactual_sim(self):
 # ── Column computation dispatch ──────────────────────────────────────
 
 
-def _compute_column(target: Target, ctx: _SimContext, year: int) -> np.ndarray | None:
+def _compute_column(
+    target: Target, ctx: _SimContext, year: int
+) -> np.ndarray | None:
     """Compute the household-level column for a target.
 
     Returns None if the target can't be computed (e.g. missing
@@ -255,9 +263,9 @@ def _compute_column(target: Target, ctx: _SimContext, year: int) -> np.ndarray |
     if name == "ons/scotland_households_3plus_children":
         is_child = ctx.pe_person("is_child")
         children_per_hh = ctx.household_from_person(is_child)
-        return ((ctx.household_region == "SCOTLAND") & (children_per_hh >= 3)).astype(
-            float
-        )
+        return (
+            (ctx.household_region == "SCOTLAND") & (children_per_hh >= 3)
+        ).astype(float)
 
     # ── Household type targets ────────────────────────────────────
     if target.variable == "family_type" and target.is_count:
@@ -327,9 +335,9 @@ def _compute_column(target: Target, ctx: _SimContext, year: int) -> np.ndarray |
         on_uc = ctx.household_from_family(uc > 0) > 0
         child_u1 = ctx.pe_person("is_child") & (ctx.age < 1)
         has_child_u1 = ctx.household_from_person(child_u1) > 0
-        return ((ctx.household_region == "SCOTLAND") & on_uc & has_child_u1).astype(
-            float
-        )
+        return (
+            (ctx.household_region == "SCOTLAND") & on_uc & has_child_u1
+        ).astype(float)
 
     # ── UC claimants by number of children ─────────────────────────
     if name.startswith("dwp/uc/claimants_with_") and "_children" in name:
@@ -355,14 +363,18 @@ def _compute_column(target: Target, ctx: _SimContext, year: int) -> np.ndarray |
         "obr/salary_sacrifice_employee_ni_relief",
     ):
         ni_base = ctx.sim.calculate("ni_employee")
-        ni_cf = ctx.counterfactual_sim.calculate("ni_employee", ctx.time_period)
+        ni_cf = ctx.counterfactual_sim.calculate(
+            "ni_employee", ctx.time_period
+        )
         return ctx.household_from_person(ni_cf - ni_base)
     if name in (
         "hmrc/salary_sacrifice_employer_nics_relief",
         "obr/salary_sacrifice_employer_ni_relief",
     ):
         ni_base = ctx.sim.calculate("ni_employer")
-        ni_cf = ctx.counterfactual_sim.calculate("ni_employer", ctx.time_period)
+        ni_cf = ctx.counterfactual_sim.calculate(
+            "ni_employer", ctx.time_period
+        )
         return ctx.household_from_person(ni_cf - ni_base)
 
     # ── UC jobseeker / non-jobseeker splits ───────────────────────
@@ -444,7 +456,9 @@ def _compute_regional_age(target: Target, ctx: _SimContext) -> np.ndarray:
         return None
 
     person_match = (
-        (ctx.region.values == pe_region) & (ctx.age >= lower) & (ctx.age <= upper)
+        (ctx.region.values == pe_region)
+        & (ctx.age >= lower)
+        & (ctx.age <= upper)
     )
     return ctx.household_from_person(person_match)
 
@@ -464,7 +478,9 @@ def _compute_gender_age(target: Target, ctx: _SimContext) -> np.ndarray:
     return ctx.household_from_person(sex_match & age_match)
 
 
-def _compute_household_type(target: Target, ctx: _SimContext) -> np.ndarray | None:
+def _compute_household_type(
+    target: Target, ctx: _SimContext
+) -> np.ndarray | None:
     """Compute household type count from ONS families & households categories.
 
     Maps ONS household categories to PE family_type enum values and
@@ -482,18 +498,18 @@ def ft_hh(value):
         return ctx.household_from_family(ft == value) > 0
 
     if name == "lone_households_under_65":
-        return (ft_hh("SINGLE") & (children_per_hh == 0) & (age_hh_head < 65)).astype(
-            float
-        )
+        return (
+            ft_hh("SINGLE") & (children_per_hh == 0) & (age_hh_head < 65)
+        ).astype(float)
     if name == "lone_households_over_65":
-        return (ft_hh("SINGLE") & (children_per_hh == 0) & (age_hh_head >= 65)).astype(
-            float
-        )
+        return (
+            ft_hh("SINGLE") & (children_per_hh == 0) & (age_hh_head >= 65)
+        ).astype(float)
     if name == "unrelated_adult_households":
         people_per_hh = ctx.household_from_person(np.ones_like(is_child))
-        return (ft_hh("SINGLE") & (children_per_hh == 0) & (people_per_hh > 1)).astype(
-            float
-        )
+        return (
+            ft_hh("SINGLE") & (children_per_hh == 0) & (people_per_hh > 1)
+        ).astype(float)
     if name == "couple_no_children_households":
         return ft_hh("COUPLE_NO_CHILDREN").astype(float)
     if name == "couple_under_3_children_households":
@@ -503,10 +519,14 @@ def ft_hh(value):
             & (children_per_hh <= 2)
         ).astype(float)
     if name == "couple_3_plus_children_households":
-        return (ft_hh("COUPLE_WITH_CHILDREN") & (children_per_hh >= 3)).astype(float)
+        return (ft_hh("COUPLE_WITH_CHILDREN") & (children_per_hh >= 3)).astype(
+            float
+        )
     if name == "couple_non_dependent_children_only_households":
         people_per_hh = ctx.household_from_person(np.ones_like(is_child))
-        return (ft_hh("COUPLE_NO_CHILDREN") & (people_per_hh > 2)).astype(float)
+        return (ft_hh("COUPLE_NO_CHILDREN") & (people_per_hh > 2)).astype(
+            float
+        )
     if name == "lone_parent_dependent_children_households":
         return (ft_hh("LONE_PARENT") & (children_per_hh > 0)).astype(float)
     if name == "lone_parent_non_dependent_children_households":
@@ -560,7 +580,9 @@ def _compute_income_band(target: Target, ctx: _SimContext) -> np.ndarray:
     upper = target.upper_bound
 
     income_df = ctx.sim.calculate_dataframe(["total_income", variable])
-    in_band = (income_df.total_income >= lower) & (income_df.total_income < upper)
+    in_band = (income_df.total_income >= lower) & (
+        income_df.total_income < upper
+    )
 
     if target.is_count:
         return ctx.household_from_person((income_df[variable] > 0) * in_band)
@@ -605,7 +627,9 @@ def _compute_uc_jobseeker(target: Target, ctx: _SimContext) -> np.ndarray:
     family = ctx.sim.populations["benunit"]
     uc = ctx.sim.calculate("universal_credit")
     on_uc = uc > 0
-    unemployed = family.any(ctx.sim.calculate("employment_status") == "UNEMPLOYED")
+    unemployed = family.any(
+        ctx.sim.calculate("employment_status") == "UNEMPLOYED"
+    )
 
     if "non_jobseekers" in target.name:
         mask = on_uc * ~unemployed
@@ -628,11 +652,15 @@ def _compute_uc_payment_dist(target: Target, ctx: _SimContext) -> np.ndarray:
     lower = target.lower_bound
     upper = target.upper_bound
 
-    uc_payments = ctx.sim.calculate("universal_credit", map_to="benunit").values
+    uc_payments = ctx.sim.calculate(
+        "universal_credit", map_to="benunit"
+    ).values
     uc_family_type = ctx.sim.calculate("family_type", map_to="benunit").values
 
     in_band = (
-        (uc_payments >= lower) & (uc_payments < upper) & (uc_family_type == family_type)
+        (uc_payments >= lower)
+        & (uc_payments < upper)
+        & (uc_family_type == family_type)
     )
     return ctx.household_from_family(in_band)
 
@@ -654,7 +682,9 @@ def _compute_ss_it_relief(target: Target, ctx: _SimContext) -> np.ndarray:
 
     name = target.name
     if "basic" in name:
-        mask = (adj_net_income_cf > basic_thresh) & (adj_net_income_cf <= higher_thresh)
+        mask = (adj_net_income_cf > basic_thresh) & (
+            adj_net_income_cf <= higher_thresh
+        )
     elif "higher" in name:
         mask = (adj_net_income_cf > higher_thresh) & (
             adj_net_income_cf <= additional_thresh
@@ -668,7 +698,9 @@ def _compute_ss_it_relief(target: Target, ctx: _SimContext) -> np.ndarray:
     return ctx.household_from_person(it_relief * mask)
 
 
-def _compute_two_child_limit(target: Target, ctx: _SimContext) -> np.ndarray | None:
+def _compute_two_child_limit(
+    target: Target, ctx: _SimContext
+) -> np.ndarray | None:
     """Compute two-child limit targets.
 
     These involve cross-tabulations of UC eligibility, child count,
@@ -699,14 +731,18 @@ def _compute_two_child_limit(target: Target, ctx: _SimContext) -> np.ndarray | N
         return children_in_capped
     if name == "dwp/uc/two_child_limit/children_in_affected_households":
         # Total children (not just affected ones) in capped households
-        total_children = sim.map_result(is_child * child_in_uc, "person", "household")
+        total_children = sim.map_result(
+            is_child * child_in_uc, "person", "household"
+        )
         return total_children * capped_hh
 
     # By number of children: "dwp/uc/two_child_limit/{n}_children_households"
     if "_children_households_total_children" in name:
         n = int(name.split("/")[-1].split("_")[0])
         children_count = sim.map_result(is_child, "person", "household")
-        return (capped_hh * (children_count == n) * children_count).astype(float)
+        return (capped_hh * (children_count == n) * children_count).astype(
+            float
+        )
     if "_children_households" in name and "total" not in name:
         n = int(name.split("/")[-1].split("_")[0])
         children_count = sim.map_result(is_child, "person", "household")
@@ -778,7 +814,9 @@ def ft_hh(value):
     if ft_str == "single_no_children":
         match = ft_hh("SINGLE") & (children_per_hh == 0)
     elif ft_str == "single_with_children":
-        match = (ft_hh("SINGLE") | ft_hh("LONE_PARENT")) & (children_per_hh > 0)
+        match = (ft_hh("SINGLE") | ft_hh("LONE_PARENT")) & (
+            children_per_hh > 0
+        )
     elif ft_str == "couple_no_children":
         match = ft_hh("COUPLE_NO_CHILDREN")
     elif ft_str == "couple_with_children":
diff --git a/policyengine_uk_data/targets/registry.py b/policyengine_uk_data/targets/registry.py
index b4c97108..ebd0af4f 100644
--- a/policyengine_uk_data/targets/registry.py
+++ b/policyengine_uk_data/targets/registry.py
@@ -25,7 +25,9 @@ def discover_source_modules() -> list:
     modules = []
     package_path = Path(sources_pkg.__file__).parent
     for importer, modname, ispkg in pkgutil.iter_modules([str(package_path)]):
-        mod = importlib.import_module(f"policyengine_uk_data.targets.sources.{modname}")
+        mod = importlib.import_module(
+            f"policyengine_uk_data.targets.sources.{modname}"
+        )
         if hasattr(mod, "get_targets"):
             modules.append(mod)
     return modules
diff --git a/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py b/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py
index 1ff2ac82..4df4e48d 100644
--- a/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py
+++ b/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py
@@ -51,7 +51,9 @@ def get_targets() -> list[Target]:
     targets = []
 
     try:
-        r = requests.get(ref, headers=_HEADERS, allow_redirects=True, timeout=30)
+        r = requests.get(
+            ref, headers=_HEADERS, allow_redirects=True, timeout=30
+        )
         r.raise_for_status()
         df = pd.read_csv(io.StringIO(r.content.decode("utf-8-sig")))
 
@@ -125,6 +127,8 @@ def get_targets() -> list[Target]:
             )
 
     except Exception as e:
-        logger.error("Failed to download/parse HMRC salary sacrifice CSV: %s", e)
+        logger.error(
+            "Failed to download/parse HMRC salary sacrifice CSV: %s", e
+        )
 
     return targets
diff --git a/policyengine_uk_data/targets/sources/hmrc_spi.py b/policyengine_uk_data/targets/sources/hmrc_spi.py
index 296965dc..a976c668 100644
--- a/policyengine_uk_data/targets/sources/hmrc_spi.py
+++ b/policyengine_uk_data/targets/sources/hmrc_spi.py
@@ -170,7 +170,9 @@ def get_targets() -> list[Target]:
 
         for idx, row in merged.iterrows():
             lower = int(row["lower_bound"])
-            upper = _BAND_UPPER[idx] if idx < len(_BAND_UPPER) else float("inf")
+            upper = (
+                _BAND_UPPER[idx] if idx < len(_BAND_UPPER) else float("inf")
+            )
             band_label = f"{lower:_}_to_{upper:_}"
 
             for variable in INCOME_VARIABLES:
@@ -196,7 +198,9 @@ def get_targets() -> list[Target]:
                 if count_col in row.index and row[count_col] > 0:
                     targets.append(
                         Target(
-                            name=(f"hmrc/{variable}_count_income_band_{band_label}"),
+                            name=(
+                                f"hmrc/{variable}_count_income_band_{band_label}"
+                            ),
                             variable=variable,
                             source="hmrc_spi",
                             unit=Unit.COUNT,
diff --git a/policyengine_uk_data/targets/sources/local_la_extras.py b/policyengine_uk_data/targets/sources/local_la_extras.py
index 193daa53..fea211ca 100644
--- a/policyengine_uk_data/targets/sources/local_la_extras.py
+++ b/policyengine_uk_data/targets/sources/local_la_extras.py
@@ -29,7 +29,9 @@
     "earningsandworkinghours/datasets/"
     "smallareaincomeestimatesformiddlelayersuperoutputareasenglandandwales"
 )
-_REF_TENURE = "https://www.gov.uk/government/statistics/english-housing-survey-2023"
+_REF_TENURE = (
+    "https://www.gov.uk/government/statistics/english-housing-survey-2023"
+)
 _REF_RENT = (
     "https://www.ons.gov.uk/peoplepopulationandcommunity/housing/datasets/"
     "privaterentalmarketsummarystatisticsinengland"
diff --git a/policyengine_uk_data/targets/sources/obr.py b/policyengine_uk_data/targets/sources/obr.py
index 5a5bccc3..6a95e504 100644
--- a/policyengine_uk_data/targets/sources/obr.py
+++ b/policyengine_uk_data/targets/sources/obr.py
@@ -56,7 +56,9 @@ def _download_workbook(url: str) -> openpyxl.Workbook:
     return openpyxl.load_workbook(io.BytesIO(r.content), data_only=False)
 
 
-def _read_row_values(ws, row_num: int, col_letters: list[str]) -> dict[int, float]:
+def _read_row_values(
+    ws, row_num: int, col_letters: list[str]
+) -> dict[int, float]:
     """Read numeric values from a row, mapped to calendar years."""
     result = {}
     for col in col_letters:
@@ -362,7 +364,9 @@ def read_49(row_num: int) -> dict[int, float]:
         # Find the second UC row (outside cap section)
         for row in range(uc_outside_row + 1, 55):
             cell_val = ws[f"B{row}"].value
-            if cell_val and str(cell_val).strip().startswith("Universal credit"):
+            if cell_val and str(cell_val).strip().startswith(
+                "Universal credit"
+            ):
                 values = read_49(row)
                 if values:
                     targets.append(
@@ -434,8 +438,12 @@ def _parse_tv_licence(wb: openpyxl.Workbook) -> list[Target]:
 _PRIVATE_SCHOOL = {y: 557_000 for y in range(2018, 2032)}
 
 # SPP Review: salary sacrifice NI relief (uprated 3% pa from 2024 base)
-_SS_EMPLOYEE_NI = {y: 1.2e9 * 1.03 ** max(0, y - 2024) for y in range(2024, 2032)}
-_SS_EMPLOYER_NI = {y: 2.9e9 * 1.03 ** max(0, y - 2024) for y in range(2024, 2032)}
+_SS_EMPLOYEE_NI = {
+    y: 1.2e9 * 1.03 ** max(0, y - 2024) for y in range(2024, 2032)
+}
+_SS_EMPLOYER_NI = {
+    y: 2.9e9 * 1.03 ** max(0, y - 2024) for y in range(2024, 2032)
+}
 
 
 def get_targets() -> list[Target]:
diff --git a/policyengine_uk_data/targets/sources/ons_demographics.py b/policyengine_uk_data/targets/sources/ons_demographics.py
index 0a88d54b..3c48c38a 100644
--- a/policyengine_uk_data/targets/sources/ons_demographics.py
+++ b/policyengine_uk_data/targets/sources/ons_demographics.py
@@ -87,7 +87,9 @@
 @lru_cache(maxsize=1)
 def _download_uk_projection() -> pd.DataFrame:
     """Download and parse the UK principal population projection."""
-    r = requests.get(_UK_ZIP_URL, headers=_HEADERS, allow_redirects=True, timeout=120)
+    r = requests.get(
+        _UK_ZIP_URL, headers=_HEADERS, allow_redirects=True, timeout=120
+    )
     r.raise_for_status()
     z = zipfile.ZipFile(io.BytesIO(r.content))
     with z.open("uk/uk_ppp_machine_readable.xlsx") as f:
@@ -189,7 +191,9 @@ def _parse_regional_from_csv() -> list[Target]:
 
     for _, row in demographics.iterrows():
         name = row["name"]
-        if name in _SKIP_NAMES or any(name.startswith(p) for p in _SKIP_PREFIXES):
+        if name in _SKIP_NAMES or any(
+            name.startswith(p) for p in _SKIP_PREFIXES
+        ):
             continue
         values = {}
         for y in _YEARS:
diff --git a/policyengine_uk_data/targets/sources/ons_savings.py b/policyengine_uk_data/targets/sources/ons_savings.py
index a2984713..21edb0c0 100644
--- a/policyengine_uk_data/targets/sources/ons_savings.py
+++ b/policyengine_uk_data/targets/sources/ons_savings.py
@@ -17,9 +17,7 @@
 
 logger = logging.getLogger(__name__)
 
-_API_URL = (
-    "https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/haxv/ukea/data"
-)
+_API_URL = "https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/haxv/ukea/data"
 _REF = "https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/haxv/ukea"
 _HEADERS = {
     "User-Agent": (
@@ -30,7 +28,9 @@
 
 def get_targets() -> list[Target]:
     try:
-        r = requests.get(_API_URL, headers=_HEADERS, allow_redirects=True, timeout=30)
+        r = requests.get(
+            _API_URL, headers=_HEADERS, allow_redirects=True, timeout=30
+        )
         r.raise_for_status()
         data = r.json()
 
diff --git a/policyengine_uk_data/tests/test_target_registry.py b/policyengine_uk_data/tests/test_target_registry.py
index c6f78bdd..ccc49e00 100644
--- a/policyengine_uk_data/tests/test_target_registry.py
+++ b/policyengine_uk_data/tests/test_target_registry.py
@@ -52,9 +52,9 @@ def test_hmrc_spi_targets_exist():
     targets = get_all_targets(year=2025)
     spi_targets = [t for t in targets if t.source == "hmrc_spi"]
     # 13 bands × 6 income types × 2 (count + amount) = 156 per year
-    assert len(spi_targets) >= 100, (
-        f"Expected 100+ SPI targets, got {len(spi_targets)}"
-    )
+    assert (
+        len(spi_targets) >= 100
+    ), f"Expected 100+ SPI targets, got {len(spi_targets)}"
 
 
 def test_dwp_pip_targets():
@@ -76,9 +76,9 @@ def test_voa_council_tax_targets():
 def test_core_target_count():
     """Total target count should be substantial."""
     targets = get_all_targets(year=2025)
-    assert len(targets) >= 200, (
-        f"Expected 200+ targets for 2025, got {len(targets)}"
-    )
+    assert (
+        len(targets) >= 200
+    ), f"Expected 200+ targets for 2025, got {len(targets)}"
 
 
 def test_two_child_limit_targets():
diff --git a/policyengine_uk_data/utils/loss.py b/policyengine_uk_data/utils/loss.py
index 27eb919f..18d30bed 100644
--- a/policyengine_uk_data/utils/loss.py
+++ b/policyengine_uk_data/utils/loss.py
@@ -11,7 +11,9 @@
 )
 
 
-def get_loss_results(dataset, time_period, reform=None, household_weights=None):
+def get_loss_results(
+    dataset, time_period, reform=None, household_weights=None
+):
     """Calculate loss metrics comparing model outputs to targets.
 
     Args:

From 4bff224526eb42303a0997770d8db45049c6de98 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil@policyengine.org>
Date: Sun, 15 Feb 2026 13:48:04 +0000
Subject: [PATCH 4/6] Remove unused pkg_resources import (broken on Python
 3.13)

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
---
 policyengine_uk_data/utils/huggingface.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/policyengine_uk_data/utils/huggingface.py b/policyengine_uk_data/utils/huggingface.py
index d2fa27e6..1ed8de25 100644
--- a/policyengine_uk_data/utils/huggingface.py
+++ b/policyengine_uk_data/utils/huggingface.py
@@ -1,6 +1,5 @@
 from huggingface_hub import hf_hub_download, login, HfApi
 import os
-import pkg_resources
 
 
 def download(

From 07e345272fd072140f4a7be2f8e1d7db2410dc22 Mon Sep 17 00:00:00 2001
From: Vahid Ahmadi <va.vahidahmadi@gmail.com>
Date: Mon, 16 Feb 2026 10:39:45 +0000
Subject: [PATCH 5/6] Fix PR review issues: restore dropped targets,
 deduplicate, decompose
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Restore hmrc/salary_sacrifice_contributions target (24bn base, 3%/yr)
- Fix obr/esa to combine esa_income + esa_contrib
- Restore VOA council tax population uprating for non-base years
- Extract shared HEADERS/STORAGE/load_config/to_float into _common.py
- Decompose build_loss_matrix.py (828→402 lines) into targets/compute/
  subpackage with domain modules: demographics, households, income,
  benefits, council_tax, other

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../targets/build_loss_matrix.py              | 681 ++++--------------
 .../targets/compute/__init__.py               |  69 ++
 .../targets/compute/benefits.py               | 205 ++++++
 .../targets/compute/council_tax.py            |  34 +
 .../targets/compute/demographics.py           |  79 ++
 .../targets/compute/households.py             |  91 +++
 .../targets/compute/income.py                 |  81 +++
 policyengine_uk_data/targets/compute/other.py |  36 +
 .../targets/sources/_common.py                |  29 +
 policyengine_uk_data/targets/sources/dwp.py   |   4 -
 .../targets/sources/hmrc_salary_sacrifice.py  |  56 +-
 .../targets/sources/hmrc_spi.py               |  62 +-
 .../targets/sources/local_age.py              |  13 +-
 .../targets/sources/local_income.py           |  13 +-
 .../targets/sources/local_la_extras.py        |  13 +-
 policyengine_uk_data/targets/sources/obr.py   |  33 +-
 .../targets/sources/ons_demographics.py       |  15 +-
 .../targets/sources/ons_households.py         |   8 +-
 .../targets/sources/ons_savings.py            |   8 +-
 .../targets/sources/ons_tenure.py             |   8 +-
 .../targets/sources/voa_council_tax.py        |   5 +-
 21 files changed, 841 insertions(+), 702 deletions(-)
 create mode 100644 policyengine_uk_data/targets/compute/__init__.py
 create mode 100644 policyengine_uk_data/targets/compute/benefits.py
 create mode 100644 policyengine_uk_data/targets/compute/council_tax.py
 create mode 100644 policyengine_uk_data/targets/compute/demographics.py
 create mode 100644 policyengine_uk_data/targets/compute/households.py
 create mode 100644 policyengine_uk_data/targets/compute/income.py
 create mode 100644 policyengine_uk_data/targets/compute/other.py
 create mode 100644 policyengine_uk_data/targets/sources/_common.py

diff --git a/policyengine_uk_data/targets/build_loss_matrix.py b/policyengine_uk_data/targets/build_loss_matrix.py
index ddb71593..8f95a15a 100644
--- a/policyengine_uk_data/targets/build_loss_matrix.py
+++ b/policyengine_uk_data/targets/build_loss_matrix.py
@@ -17,7 +17,39 @@
 import pandas as pd
 
 from policyengine_uk_data.targets import get_all_targets
-from policyengine_uk_data.targets.schema import GeographicLevel, Target, Unit
+from policyengine_uk_data.targets.schema import (
+    GeographicLevel,
+    Target,
+    Unit,
+)
+from policyengine_uk_data.targets.compute import (
+    compute_benefit_cap,
+    compute_council_tax_band,
+    compute_esa,
+    compute_gender_age,
+    compute_household_type,
+    compute_housing,
+    compute_income_band,
+    compute_obr_council_tax,
+    compute_pip_claimants,
+    compute_regional_age,
+    compute_savings_interest,
+    compute_scotland_demographics,
+    compute_scotland_uc_child,
+    compute_scottish_child_payment,
+    compute_ss_contributions,
+    compute_ss_it_relief,
+    compute_ss_ni_relief,
+    compute_tenure,
+    compute_two_child_limit,
+    compute_uc_by_children,
+    compute_uc_by_family_type,
+    compute_uc_jobseeker,
+    compute_uc_outside_cap,
+    compute_uc_payment_dist,
+    compute_uk_population,
+    compute_vehicles,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -48,10 +80,8 @@ def create_target_matrix(
     sim = Microsimulation(dataset=dataset, reform=reform)
     sim.default_calculation_period = time_period
 
-    # Helper closures for the simulation
     ctx = _SimContext(sim, time_period, dataset, reform)
 
-    # Fetch all targets (no year filter — we resolve values below)
     all_targets = []
     seen = set()
     for level in (
@@ -86,18 +116,31 @@ def create_target_matrix(
 
 
 def _resolve_value(target: Target, year: int) -> float | None:
-    """Get the target value for a year, falling back to nearest year."""
+    """Get the target value for a year, falling back to nearest year.
+
+    VOA council tax targets are population-uprated when extrapolating
+    from their base year (2024).
+    """
     if year in target.values:
         return target.values[year]
-    # Use nearest available year
     available = sorted(target.values.keys())
     if not available:
         return None
     closest = min(available, key=lambda y: abs(y - year))
-    # Only allow ±3 years of extrapolation
     if abs(closest - year) > 3:
         return None
-    return target.values[closest]
+    base_value = target.values[closest]
+    # VOA council tax counts scale with population
+    if target.source == "voa" and year != closest:
+        from policyengine_uk_data.targets.sources.local_age import (
+            get_uk_total_population,
+        )
+
+        pop_target = get_uk_total_population(year)
+        pop_base = get_uk_total_population(closest)
+        if pop_base > 0:
+            base_value *= pop_target / pop_base
+    return base_value
 
 
 class _SimContext:
@@ -174,7 +217,7 @@ def country(self):
 
     @property
     def counterfactual_sim(self):
-        """Lazily create the salary sacrifice counterfactual simulation."""
+        """Lazily create the salary sacrifice counterfactual."""
         if "counterfactual_sim" not in self._cache:
             from policyengine_uk import Microsimulation
 
@@ -197,26 +240,6 @@ def counterfactual_sim(self):
         return self._cache["counterfactual_sim"]
 
 
-# ── Region name mapping ──────────────────────────────────────────────
-
-_REGION_MAP = {
-    "NORTH_EAST": "north_east",
-    "SOUTH_EAST": "south_east",
-    "EAST_MIDLANDS": "east_midlands",
-    "WEST_MIDLANDS": "west_midlands",
-    "YORKSHIRE": "yorkshire_and_the_humber",
-    "EAST_OF_ENGLAND": "east",
-    "LONDON": "london",
-    "SOUTH_WEST": "south_west",
-    "NORTH_WEST": "north_west",
-    "WALES": "wales",
-    "SCOTLAND": "scotland",
-    "NORTHERN_IRELAND": "northern_ireland",
-}
-
-_REGION_INV = {v: k for k, v in _REGION_MAP.items()}
-
-
 # ── Column computation dispatch ──────────────────────────────────────
 
 
@@ -225,190 +248,132 @@ def _compute_column(
 ) -> np.ndarray | None:
     """Compute the household-level column for a target.
 
-    Returns None if the target can't be computed (e.g. missing
-    custom_compute for a complex target).
+    Dispatches to domain-specific compute modules.
     """
-    # If the target has a custom compute function, use it
     if target.custom_compute is not None:
         return target.custom_compute(ctx, target, year)
 
-    # Dispatch by target name patterns and metadata
     name = target.name
 
-    # ── Regional age bands ────────────────────────────────────────
-    # Names like "ons/north_east_age_0_9"
+    # Demographics
     if name.startswith("ons/") and "_age_" in name:
-        return _compute_regional_age(target, ctx)
-
-    # ── Gender × age bands ────────────────────────────────────────
-    # Names like "ons/female_0_14"
-    if name.startswith("ons/") and (
-        name.startswith("ons/female_") or name.startswith("ons/male_")
-    ):
-        return _compute_gender_age(target, ctx)
-
-    # ── UK total population ───────────────────────────────────────
+        return compute_regional_age(target, ctx)
+    if name.startswith("ons/female_") or name.startswith("ons/male_"):
+        return compute_gender_age(target, ctx)
     if name == "ons/uk_population":
-        return ctx.household_from_person(ctx.age >= 0)
+        return compute_uk_population(target, ctx)
+    if name in (
+        "ons/scotland_children_under_16",
+        "ons/scotland_babies_under_1",
+        "ons/scotland_households_3plus_children",
+    ):
+        return compute_scotland_demographics(target, ctx)
 
-    # ── Scotland-specific demographics ────────────────────────────
-    if name == "ons/scotland_children_under_16":
-        return ctx.household_from_person(
-            (ctx.region.values == "SCOTLAND") & (ctx.age < 16)
-        )
-    if name == "ons/scotland_babies_under_1":
-        return ctx.household_from_person(
-            (ctx.region.values == "SCOTLAND") & (ctx.age < 1)
-        )
-    if name == "ons/scotland_households_3plus_children":
-        is_child = ctx.pe_person("is_child")
-        children_per_hh = ctx.household_from_person(is_child)
-        return (
-            (ctx.household_region == "SCOTLAND") & (children_per_hh >= 3)
-        ).astype(float)
-
-    # ── Household type targets ────────────────────────────────────
+    # Households and tenure
     if target.variable == "family_type" and target.is_count:
-        return _compute_household_type(target, ctx)
-
-    # ── Tenure targets ────────────────────────────────────────────
+        return compute_household_type(target, ctx)
     if target.variable == "tenure_type" and target.is_count:
-        return _compute_tenure(target, ctx)
+        return compute_tenure(target, ctx)
 
-    # ── Income band breakdowns (HMRC SPI) ─────────────────────────
+    # Income bands (HMRC SPI)
     if target.breakdown_variable == "total_income":
-        return _compute_income_band(target, ctx)
+        return compute_income_band(target, ctx)
 
-    # ── Council tax bands by region (VOA) ─────────────────────────
+    # Council tax
     if name.startswith("voa/council_tax/"):
-        return _compute_council_tax_band(target, ctx)
-
-    # ── Vehicle ownership (NTS) ───────────────────────────────────
-    if name == "nts/households_no_vehicle":
-        return (ctx.pe("num_vehicles") == 0).astype(float)
-    if name == "nts/households_one_vehicle":
-        return (ctx.pe("num_vehicles") == 1).astype(float)
-    if name == "nts/households_two_plus_vehicles":
-        return (ctx.pe("num_vehicles") >= 2).astype(float)
-
-    # ── Housing targets ───────────────────────────────────────────
-    if name == "housing/total_mortgage":
-        return ctx.pe("mortgage_capital_repayment") + ctx.pe(
-            "mortgage_interest_repayment"
-        )
-    if name == "housing/rent_private":
-        tenure = ctx.sim.calculate("tenure_type", map_to="household").values
-        return ctx.pe("rent") * (tenure == "RENT_PRIVATELY")
+        return compute_council_tax_band(target, ctx)
+    if name.startswith("obr/council_tax"):
+        return compute_obr_council_tax(target, ctx)
 
-    # ── Savings interest (ONS) ────────────────────────────────────
+    # Vehicles
+    if name.startswith("nts/households_"):
+        return compute_vehicles(target, ctx)
+
+    # Housing
+    if name in ("housing/total_mortgage", "housing/rent_private"):
+        return compute_housing(target, ctx)
+
+    # Savings
     if name == "ons/savings_interest_income":
-        savings = ctx.sim.calculate("savings_interest_income")
-        return ctx.household_from_person(savings)
+        return compute_savings_interest(target, ctx)
 
-    # ── Scottish child payment ────────────────────────────────────
+    # Scottish child payment
     if name == "sss/scottish_child_payment":
-        scp = ctx.sim.calculate("scottish_child_payment")
-        return ctx.household_from_person(scp)
-
-    # ── DWP PIP claimant splits ───────────────────────────────────
-    if name == "dwp/pip_dl_standard_claimants":
-        pip_dl = ctx.sim.calculate("pip_dl_category")
-        return ctx.sim.map_result(pip_dl == "STANDARD", "person", "household")
-    if name == "dwp/pip_dl_enhanced_claimants":
-        pip_dl = ctx.sim.calculate("pip_dl_category")
-        return ctx.sim.map_result(pip_dl == "ENHANCED", "person", "household")
-
-    # ── DWP benefit cap ───────────────────────────────────────────
-    if name == "dwp/benefit_capped_households":
-        reduction = ctx.sim.calculate(
-            "benefit_cap_reduction", map_to="household"
-        ).values
-        return (reduction > 0).astype(float)
-    if name == "dwp/benefit_cap_total_reduction":
-        return ctx.sim.calculate(
-            "benefit_cap_reduction", map_to="household"
-        ).values.astype(float)
-
-    # ── DWP Scotland UC + child under 1 ──────────────────────────
+        return compute_scottish_child_payment(target, ctx)
+
+    # PIP claimants
+    if name in (
+        "dwp/pip_dl_standard_claimants",
+        "dwp/pip_dl_enhanced_claimants",
+    ):
+        return compute_pip_claimants(target, ctx)
+
+    # Benefit cap
+    if name in (
+        "dwp/benefit_capped_households",
+        "dwp/benefit_cap_total_reduction",
+    ):
+        return compute_benefit_cap(target, ctx)
+
+    # Scotland UC + child under 1
     if name == "dwp/scotland_uc_households_child_under_1":
-        uc = ctx.sim.calculate("universal_credit")
-        on_uc = ctx.household_from_family(uc > 0) > 0
-        child_u1 = ctx.pe_person("is_child") & (ctx.age < 1)
-        has_child_u1 = ctx.household_from_person(child_u1) > 0
-        return (
-            (ctx.household_region == "SCOTLAND") & on_uc & has_child_u1
-        ).astype(float)
-
-    # ── UC claimants by number of children ─────────────────────────
+        return compute_scotland_uc_child(target, ctx)
+
+    # UC claimants by children
     if name.startswith("dwp/uc/claimants_with_") and "_children" in name:
-        return _compute_uc_by_children(target, ctx)
+        return compute_uc_by_children(target, ctx)
 
-    # ── UC claimants by family type ──────────────────────────────
+    # UC claimants by family type
     if name.startswith("dwp/uc/claimants_") and not name.startswith(
         "dwp/uc/claimants_with_"
     ):
-        return _compute_uc_by_family_type(target, ctx)
+        return compute_uc_by_family_type(target, ctx)
 
-    # ── UC payment distribution ───────────────────────────────────
+    # UC payment distribution
     if name.startswith("dwp/uc_payment_dist/"):
-        return _compute_uc_payment_dist(target, ctx)
+        return compute_uc_payment_dist(target, ctx)
 
-    # ── Salary sacrifice IT relief by tax band ────────────────────
+    # Salary sacrifice IT relief
     if name.startswith("hmrc/salary_sacrifice_it_relief_"):
-        return _compute_ss_it_relief(target, ctx)
+        return compute_ss_it_relief(target, ctx)
+
+    # Salary sacrifice contributions
+    if name == "hmrc/salary_sacrifice_contributions":
+        return compute_ss_contributions(target, ctx)
 
-    # ── Salary sacrifice NI relief ────────────────────────────────
+    # Salary sacrifice NI relief
     if name in (
         "hmrc/salary_sacrifice_employee_nics_relief",
         "obr/salary_sacrifice_employee_ni_relief",
-    ):
-        ni_base = ctx.sim.calculate("ni_employee")
-        ni_cf = ctx.counterfactual_sim.calculate(
-            "ni_employee", ctx.time_period
-        )
-        return ctx.household_from_person(ni_cf - ni_base)
-    if name in (
         "hmrc/salary_sacrifice_employer_nics_relief",
         "obr/salary_sacrifice_employer_ni_relief",
     ):
-        ni_base = ctx.sim.calculate("ni_employer")
-        ni_cf = ctx.counterfactual_sim.calculate(
-            "ni_employer", ctx.time_period
-        )
-        return ctx.household_from_person(ni_cf - ni_base)
+        return compute_ss_ni_relief(target, ctx)
 
-    # ── UC jobseeker / non-jobseeker splits ───────────────────────
+    # UC jobseeker splits
     if name in (
         "obr/universal_credit_jobseekers",
         "obr/universal_credit_non_jobseekers",
         "obr/universal_credit_jobseekers_count",
         "obr/universal_credit_non_jobseekers_count",
     ):
-        return _compute_uc_jobseeker(target, ctx)
+        return compute_uc_jobseeker(target, ctx)
 
-    # ── OBR UC outside benefit cap ────────────────────────────────
+    # UC outside benefit cap
     if name == "obr/universal_credit_outside_cap":
-        uc = ctx.sim.calculate("universal_credit")
-        uc_hh = ctx.household_from_family(uc)
-        cap_reduction = ctx.sim.calculate(
-            "benefit_cap_reduction", map_to="household"
-        ).values
-        not_capped = cap_reduction == 0
-        return uc_hh * not_capped
-
-    # ── Two-child limit targets ───────────────────────────────────
+        return compute_uc_outside_cap(target, ctx)
+
+    # Two-child limit
     if "two_child_limit" in name:
-        return _compute_two_child_limit(target, ctx)
+        return compute_two_child_limit(target, ctx)
 
-    # ── OBR council tax by country ────────────────────────────────
-    if name.startswith("obr/council_tax"):
-        return _compute_obr_council_tax(target, ctx)
+    # ESA (combined income + contributory)
+    if name == "obr/esa":
+        return compute_esa(target, ctx)
 
-    # ── Simple GBP sum targets ────────────────────────────────────
+    # Fallbacks: simple GBP sum / simple count
     if target.unit == Unit.GBP and not target.is_count:
         return _compute_simple_gbp(target, ctx)
-
-    # ── Simple count targets ──────────────────────────────────────
     if target.is_count and target.unit == Unit.COUNT:
         return _compute_simple_count(target, ctx)
 
@@ -416,10 +381,7 @@ def _compute_column(
     return None
 
 
-# ── Compute implementations ──────────────────────────────────────────
-
-
-def _compute_simple_gbp(target: Target, ctx: _SimContext) -> np.ndarray:
+def _compute_simple_gbp(target: Target, ctx: _SimContext) -> np.ndarray | None:
     """Sum a variable at household level."""
     variable = target.variable
     try:
@@ -438,390 +400,3 @@ def _compute_simple_gbp(target: Target, ctx: _SimContext) -> np.ndarray:
 def _compute_simple_count(target: Target, ctx: _SimContext) -> np.ndarray:
     """Count recipients of a variable, mapped to household."""
     return ctx.pe_count(target.variable)
-
-
-def _compute_regional_age(target: Target, ctx: _SimContext) -> np.ndarray:
-    """Compute person count in a region × age band."""
-    # Parse "ons/{region_name}_age_{lower}_{upper}" from the name
-    name = target.name.removeprefix("ons/")
-    # Find the _age_ part
-    idx = name.index("_age_")
-    region_name = name[:idx]
-    age_part = name[idx + 5 :]  # e.g. "0_9"
-    lower, upper = age_part.split("_")
-    lower, upper = int(lower), int(upper)
-
-    pe_region = _REGION_INV.get(region_name)
-    if pe_region is None:
-        return None
-
-    person_match = (
-        (ctx.region.values == pe_region)
-        & (ctx.age >= lower)
-        & (ctx.age <= upper)
-    )
-    return ctx.household_from_person(person_match)
-
-
-def _compute_gender_age(target: Target, ctx: _SimContext) -> np.ndarray:
-    """Compute person count in a gender × age band."""
-    name = target.name.removeprefix("ons/")
-    # "female_0_14" or "male_75_90"
-    parts = name.split("_")
-    sex = parts[0]
-    lower = int(parts[1])
-    upper = int(parts[2])
-
-    gender = ctx.sim.calculate("gender").values
-    sex_match = gender == ("FEMALE" if sex == "female" else "MALE")
-    age_match = (ctx.age >= lower) & (ctx.age <= upper)
-    return ctx.household_from_person(sex_match & age_match)
-
-
-def _compute_household_type(
-    target: Target, ctx: _SimContext
-) -> np.ndarray | None:
-    """Compute household type count from ONS families & households categories.
-
-    Maps ONS household categories to PE family_type enum values and
-    household composition conditions. family_type is a benunit variable
-    so we map boolean comparisons to household level.
-    """
-    name = target.name.removeprefix("ons/")
-    ft = ctx.sim.calculate("family_type").values  # benunit level
-    is_child = ctx.pe_person("is_child")
-    children_per_hh = ctx.household_from_person(is_child)
-    age_hh_head = ctx.pe("age")  # head of household age
-
-    def ft_hh(value):
-        """Map family_type == value from benunit to household (any)."""
-        return ctx.household_from_family(ft == value) > 0
-
-    if name == "lone_households_under_65":
-        return (
-            ft_hh("SINGLE") & (children_per_hh == 0) & (age_hh_head < 65)
-        ).astype(float)
-    if name == "lone_households_over_65":
-        return (
-            ft_hh("SINGLE") & (children_per_hh == 0) & (age_hh_head >= 65)
-        ).astype(float)
-    if name == "unrelated_adult_households":
-        people_per_hh = ctx.household_from_person(np.ones_like(is_child))
-        return (
-            ft_hh("SINGLE") & (children_per_hh == 0) & (people_per_hh > 1)
-        ).astype(float)
-    if name == "couple_no_children_households":
-        return ft_hh("COUPLE_NO_CHILDREN").astype(float)
-    if name == "couple_under_3_children_households":
-        return (
-            ft_hh("COUPLE_WITH_CHILDREN")
-            & (children_per_hh >= 1)
-            & (children_per_hh <= 2)
-        ).astype(float)
-    if name == "couple_3_plus_children_households":
-        return (ft_hh("COUPLE_WITH_CHILDREN") & (children_per_hh >= 3)).astype(
-            float
-        )
-    if name == "couple_non_dependent_children_only_households":
-        people_per_hh = ctx.household_from_person(np.ones_like(is_child))
-        return (ft_hh("COUPLE_NO_CHILDREN") & (people_per_hh > 2)).astype(
-            float
-        )
-    if name == "lone_parent_dependent_children_households":
-        return (ft_hh("LONE_PARENT") & (children_per_hh > 0)).astype(float)
-    if name == "lone_parent_non_dependent_children_households":
-        people_per_hh = ctx.household_from_person(np.ones_like(is_child))
-        return (
-            ft_hh("SINGLE")
-            & (children_per_hh == 0)
-            & (people_per_hh > 1)
-            & (age_hh_head >= 40)
-        ).astype(float)
-    if name == "multi_family_households":
-        n_benunits = ctx.pe("household_num_benunits")
-        return (n_benunits > 1).astype(float)
-
-    return None
-
-
-def _compute_tenure(target: Target, ctx: _SimContext) -> np.ndarray | None:
-    """Compute dwelling count by tenure type."""
-    # Map ONS target name suffixes to PE tenure_type enum values
-    _TENURE_MAP = {
-        "tenure_england_owned_outright": "OWNED_OUTRIGHT",
-        "tenure_england_owned_with_mortgage": "OWNED_WITH_MORTGAGE",
-        "tenure_england_rented_privately": "RENT_PRIVATELY",
-        "tenure_england_social_rent": ["RENT_FROM_COUNCIL", "RENT_FROM_HA"],
-        "tenure_england_total": None,  # all tenures
-    }
-    suffix = target.name.removeprefix("ons/")
-    pe_values = _TENURE_MAP.get(suffix)
-    if pe_values is None and suffix == "tenure_england_total":
-        # Total dwellings in England
-        return (ctx.country == "ENGLAND").astype(float)
-    if pe_values is None:
-        return None
-
-    tenure = ctx.sim.calculate("tenure_type", map_to="household").values
-    in_england = ctx.country == "ENGLAND"
-    if isinstance(pe_values, list):
-        match = np.zeros_like(tenure, dtype=bool)
-        for v in pe_values:
-            match = match | (tenure == v)
-    else:
-        match = tenure == pe_values
-    return (match & in_england).astype(float)
-
-
-def _compute_income_band(target: Target, ctx: _SimContext) -> np.ndarray:
-    """Compute income variable within a total income band."""
-    variable = target.variable
-    lower = target.lower_bound
-    upper = target.upper_bound
-
-    income_df = ctx.sim.calculate_dataframe(["total_income", variable])
-    in_band = (income_df.total_income >= lower) & (
-        income_df.total_income < upper
-    )
-
-    if target.is_count:
-        return ctx.household_from_person((income_df[variable] > 0) * in_band)
-    else:
-        return ctx.household_from_person(income_df[variable] * in_band)
-
-
-def _compute_council_tax_band(target: Target, ctx: _SimContext) -> np.ndarray:
-    """Compute council tax band count for a region."""
-    # "voa/council_tax/{REGION}/{band}"
-    parts = target.name.split("/")
-    region = parts[2]
-    band = parts[3]
-
-    in_region = ctx.sim.calculate("region").values == region
-
-    if band == "total":
-        return in_region.astype(float)
-
-    in_band = ctx.sim.calculate("council_tax_band") == band
-    return (in_band * in_region).astype(float)
-
-
-def _compute_obr_council_tax(target: Target, ctx: _SimContext) -> np.ndarray:
-    """Compute OBR council tax receipts, optionally by country."""
-    name = target.name
-    ct = ctx.pe("council_tax")
-
-    if name == "obr/council_tax":
-        return ct
-    if name == "obr/council_tax_england":
-        return ct * (ctx.country == "ENGLAND")
-    if name == "obr/council_tax_scotland":
-        return ct * (ctx.country == "SCOTLAND")
-    if name == "obr/council_tax_wales":
-        return ct * (ctx.country == "WALES")
-    return ct
-
-
-def _compute_uc_jobseeker(target: Target, ctx: _SimContext) -> np.ndarray:
-    """Compute UC jobseeker / non-jobseeker splits."""
-    family = ctx.sim.populations["benunit"]
-    uc = ctx.sim.calculate("universal_credit")
-    on_uc = uc > 0
-    unemployed = family.any(
-        ctx.sim.calculate("employment_status") == "UNEMPLOYED"
-    )
-
-    if "non_jobseekers" in target.name:
-        mask = on_uc * ~unemployed
-    else:
-        mask = on_uc * unemployed
-
-    if "_count" in target.name:
-        return ctx.household_from_family(mask)
-    else:
-        return ctx.household_from_family(uc * mask)
-
-
-def _compute_uc_payment_dist(target: Target, ctx: _SimContext) -> np.ndarray:
-    """Compute UC payment distribution band × family type."""
-    # Parse from name: "dwp/uc_payment_dist/{family_type}_annual_payment_{lower}_to_{upper}"
-    name = target.name.removeprefix("dwp/uc_payment_dist/")
-    # Find the _annual_payment_ separator
-    idx = name.index("_annual_payment_")
-    family_type = name[:idx]
-    lower = target.lower_bound
-    upper = target.upper_bound
-
-    uc_payments = ctx.sim.calculate(
-        "universal_credit", map_to="benunit"
-    ).values
-    uc_family_type = ctx.sim.calculate("family_type", map_to="benunit").values
-
-    in_band = (
-        (uc_payments >= lower)
-        & (uc_payments < upper)
-        & (uc_family_type == family_type)
-    )
-    return ctx.household_from_family(in_band)
-
-
-def _compute_ss_it_relief(target: Target, ctx: _SimContext) -> np.ndarray:
-    """Compute salary sacrifice IT relief by tax band."""
-    it_base = ctx.sim.calculate("income_tax")
-    it_cf = ctx.counterfactual_sim.calculate("income_tax", ctx.time_period)
-    it_relief = it_cf - it_base
-
-    adj_net_income_cf = ctx.counterfactual_sim.calculate(
-        "adjusted_net_income", ctx.time_period
-    )
-
-    params = ctx.sim.tax_benefit_system.parameters.gov.hmrc.income_tax.rates.uk
-    basic_thresh = params[0].threshold(ctx.time_period)
-    higher_thresh = params[1].threshold(ctx.time_period)
-    additional_thresh = params[2].threshold(ctx.time_period)
-
-    name = target.name
-    if "basic" in name:
-        mask = (adj_net_income_cf > basic_thresh) & (
-            adj_net_income_cf <= higher_thresh
-        )
-    elif "higher" in name:
-        mask = (adj_net_income_cf > higher_thresh) & (
-            adj_net_income_cf <= additional_thresh
-        )
-    elif "additional" in name:
-        mask = adj_net_income_cf > additional_thresh
-    else:
-        # Total — no mask
-        mask = np.ones_like(it_relief, dtype=bool)
-
-    return ctx.household_from_person(it_relief * mask)
-
-
-def _compute_two_child_limit(
-    target: Target, ctx: _SimContext
-) -> np.ndarray | None:
-    """Compute two-child limit targets.
-
-    These involve cross-tabulations of UC eligibility, child count,
-    disability status, etc. Complex enough to need specific logic
-    per target name.
-    """
-    name = target.name
-    sim = ctx.sim
-
-    is_child = sim.calculate("is_child").values
-    child_is_affected = (
-        sim.map_result(
-            sim.calculate("uc_is_child_limit_affected", map_to="household"),
-            "household",
-            "person",
-        )
-        > 0
-    ) * is_child
-    child_in_uc = sim.calculate("universal_credit", map_to="person").values > 0
-    children_in_capped = sim.map_result(
-        child_is_affected * child_in_uc, "person", "household"
-    )
-    capped_hh = (children_in_capped > 0) * 1.0
-
-    if name == "dwp/uc/two_child_limit/households_affected":
-        return capped_hh
-    if name == "dwp/uc/two_child_limit/children_affected":
-        return children_in_capped
-    if name == "dwp/uc/two_child_limit/children_in_affected_households":
-        # Total children (not just affected ones) in capped households
-        total_children = sim.map_result(
-            is_child * child_in_uc, "person", "household"
-        )
-        return total_children * capped_hh
-
-    # By number of children: "dwp/uc/two_child_limit/{n}_children_households"
-    if "_children_households_total_children" in name:
-        n = int(name.split("/")[-1].split("_")[0])
-        children_count = sim.map_result(is_child, "person", "household")
-        return (capped_hh * (children_count == n) * children_count).astype(
-            float
-        )
-    if "_children_households" in name and "total" not in name:
-        n = int(name.split("/")[-1].split("_")[0])
-        children_count = sim.map_result(is_child, "person", "household")
-        match = n if n < 6 else slice(6, None)
-        if isinstance(match, int):
-            return (capped_hh * (children_count == n)).astype(float)
-        else:
-            return (capped_hh * (children_count >= 6)).astype(float)
-
-    # Disability cross-tabs
-    if "adult_pip_households" in name:
-        pip = sim.calculate("pip", map_to="household").values
-        return (capped_hh * (pip > 0)).astype(float)
-    if "adult_pip_children" in name:
-        pip = sim.calculate("pip", map_to="household").values
-        return (children_in_capped * (pip > 0)).astype(float)
-    if "disabled_child_element_households" in name:
-        dce = sim.calculate(
-            "uc_individual_disabled_child_element", map_to="household"
-        ).values
-        return (capped_hh * (dce > 0)).astype(float)
-    if "disabled_child_element_children" in name:
-        dce = sim.calculate(
-            "uc_individual_disabled_child_element", map_to="household"
-        ).values
-        return (children_in_capped * (dce > 0)).astype(float)
-
-    return None
-
-
-def _compute_uc_by_children(target: Target, ctx: _SimContext) -> np.ndarray:
-    """Compute UC claimant households filtered by number of dependent children."""
-    # Parse "dwp/uc/claimants_with_{n}_children"
-    name = target.name
-    n_str = name.split("claimants_with_")[1].split("_children")[0]
-
-    uc = ctx.sim.calculate("universal_credit")
-    on_uc = ctx.household_from_family(uc > 0) > 0
-
-    is_child = ctx.pe_person("is_child")
-    children_per_hh = ctx.household_from_person(is_child)
-
-    if n_str.endswith("+"):
-        n = int(n_str[:-1])
-        match = children_per_hh >= n
-    else:
-        n = int(n_str)
-        match = children_per_hh == n
-
-    return (on_uc & match).astype(float)
-
-
-def _compute_uc_by_family_type(target: Target, ctx: _SimContext) -> np.ndarray:
-    """Compute UC claimant households filtered by family type."""
-    name = target.name
-    ft_str = name.split("dwp/uc/claimants_")[1]
-
-    uc = ctx.sim.calculate("universal_credit")
-    on_uc = ctx.household_from_family(uc > 0) > 0
-
-    ft = ctx.sim.calculate("family_type").values  # benunit level
-
-    def ft_hh(value):
-        return ctx.household_from_family(ft == value) > 0
-
-    is_child = ctx.pe_person("is_child")
-    children_per_hh = ctx.household_from_person(is_child)
-
-    if ft_str == "single_no_children":
-        match = ft_hh("SINGLE") & (children_per_hh == 0)
-    elif ft_str == "single_with_children":
-        match = (ft_hh("SINGLE") | ft_hh("LONE_PARENT")) & (
-            children_per_hh > 0
-        )
-    elif ft_str == "couple_no_children":
-        match = ft_hh("COUPLE_NO_CHILDREN")
-    elif ft_str == "couple_with_children":
-        match = ft_hh("COUPLE_WITH_CHILDREN")
-    else:
-        return None
-
-    return (on_uc & match).astype(float)
diff --git a/policyengine_uk_data/targets/compute/__init__.py b/policyengine_uk_data/targets/compute/__init__.py
new file mode 100644
index 00000000..9ab7340c
--- /dev/null
+++ b/policyengine_uk_data/targets/compute/__init__.py
@@ -0,0 +1,69 @@
+"""Compute subpackage: domain-specific column computation for targets."""
+
+from policyengine_uk_data.targets.compute.benefits import (
+    compute_benefit_cap,
+    compute_pip_claimants,
+    compute_scotland_uc_child,
+    compute_two_child_limit,
+    compute_uc_by_children,
+    compute_uc_by_family_type,
+    compute_uc_jobseeker,
+    compute_uc_outside_cap,
+    compute_uc_payment_dist,
+)
+from policyengine_uk_data.targets.compute.council_tax import (
+    compute_council_tax_band,
+    compute_obr_council_tax,
+)
+from policyengine_uk_data.targets.compute.demographics import (
+    compute_gender_age,
+    compute_regional_age,
+    compute_scotland_demographics,
+    compute_uk_population,
+)
+from policyengine_uk_data.targets.compute.households import (
+    compute_household_type,
+    compute_tenure,
+)
+from policyengine_uk_data.targets.compute.income import (
+    compute_esa,
+    compute_income_band,
+    compute_ss_contributions,
+    compute_ss_it_relief,
+    compute_ss_ni_relief,
+)
+from policyengine_uk_data.targets.compute.other import (
+    compute_housing,
+    compute_savings_interest,
+    compute_scottish_child_payment,
+    compute_vehicles,
+)
+
+__all__ = [
+    "compute_benefit_cap",
+    "compute_council_tax_band",
+    "compute_esa",
+    "compute_gender_age",
+    "compute_household_type",
+    "compute_housing",
+    "compute_income_band",
+    "compute_obr_council_tax",
+    "compute_pip_claimants",
+    "compute_regional_age",
+    "compute_savings_interest",
+    "compute_scotland_demographics",
+    "compute_scotland_uc_child",
+    "compute_scottish_child_payment",
+    "compute_ss_contributions",
+    "compute_ss_it_relief",
+    "compute_ss_ni_relief",
+    "compute_tenure",
+    "compute_two_child_limit",
+    "compute_uc_by_children",
+    "compute_uc_by_family_type",
+    "compute_uc_jobseeker",
+    "compute_uc_outside_cap",
+    "compute_uc_payment_dist",
+    "compute_uk_population",
+    "compute_vehicles",
+]
diff --git a/policyengine_uk_data/targets/compute/benefits.py b/policyengine_uk_data/targets/compute/benefits.py
new file mode 100644
index 00000000..140eab68
--- /dev/null
+++ b/policyengine_uk_data/targets/compute/benefits.py
@@ -0,0 +1,205 @@
+"""Benefit-related compute functions (UC, PIP, benefit cap, etc)."""
+
+import numpy as np
+
+
+def compute_pip_claimants(target, ctx) -> np.ndarray:
+    """Compute PIP daily living standard/enhanced claimant counts."""
+    pip_dl = ctx.sim.calculate("pip_dl_category")
+    if "standard" in target.name:
+        return ctx.sim.map_result(pip_dl == "STANDARD", "person", "household")
+    return ctx.sim.map_result(pip_dl == "ENHANCED", "person", "household")
+
+
+def compute_benefit_cap(target, ctx) -> np.ndarray:
+    """Compute benefit cap targets."""
+    if "total_reduction" in target.name:
+        return ctx.sim.calculate(
+            "benefit_cap_reduction", map_to="household"
+        ).values.astype(float)
+    reduction = ctx.sim.calculate(
+        "benefit_cap_reduction", map_to="household"
+    ).values
+    return (reduction > 0).astype(float)
+
+
+def compute_scotland_uc_child(target, ctx) -> np.ndarray:
+    """Compute Scotland UC households with child under 1."""
+    uc = ctx.sim.calculate("universal_credit")
+    on_uc = ctx.household_from_family(uc > 0) > 0
+    child_u1 = ctx.pe_person("is_child") & (ctx.age < 1)
+    has_child_u1 = ctx.household_from_person(child_u1) > 0
+    return (
+        (ctx.household_region == "SCOTLAND") & on_uc & has_child_u1
+    ).astype(float)
+
+
+def compute_uc_by_children(target, ctx) -> np.ndarray:
+    """Compute UC claimant households by number of children."""
+    name = target.name
+    n_str = name.split("claimants_with_")[1].split("_children")[0]
+
+    uc = ctx.sim.calculate("universal_credit")
+    on_uc = ctx.household_from_family(uc > 0) > 0
+
+    is_child = ctx.pe_person("is_child")
+    children_per_hh = ctx.household_from_person(is_child)
+
+    if n_str.endswith("+"):
+        n = int(n_str[:-1])
+        match = children_per_hh >= n
+    else:
+        n = int(n_str)
+        match = children_per_hh == n
+
+    return (on_uc & match).astype(float)
+
+
+def compute_uc_by_family_type(target, ctx) -> np.ndarray | None:
+    """Compute UC claimant households by family type."""
+    name = target.name
+    ft_str = name.split("dwp/uc/claimants_")[1]
+
+    uc = ctx.sim.calculate("universal_credit")
+    on_uc = ctx.household_from_family(uc > 0) > 0
+
+    ft = ctx.sim.calculate("family_type").values
+
+    def ft_hh(value):
+        return ctx.household_from_family(ft == value) > 0
+
+    is_child = ctx.pe_person("is_child")
+    children_per_hh = ctx.household_from_person(is_child)
+
+    if ft_str == "single_no_children":
+        match = ft_hh("SINGLE") & (children_per_hh == 0)
+    elif ft_str == "single_with_children":
+        match = (ft_hh("SINGLE") | ft_hh("LONE_PARENT")) & (
+            children_per_hh > 0
+        )
+    elif ft_str == "couple_no_children":
+        match = ft_hh("COUPLE_NO_CHILDREN")
+    elif ft_str == "couple_with_children":
+        match = ft_hh("COUPLE_WITH_CHILDREN")
+    else:
+        return None
+
+    return (on_uc & match).astype(float)
+
+
+def compute_uc_payment_dist(target, ctx) -> np.ndarray:
+    """Compute UC payment distribution band x family type."""
+    name = target.name.removeprefix("dwp/uc_payment_dist/")
+    idx = name.index("_annual_payment_")
+    family_type = name[:idx]
+    lower = target.lower_bound
+    upper = target.upper_bound
+
+    uc_payments = ctx.sim.calculate(
+        "universal_credit", map_to="benunit"
+    ).values
+    uc_family_type = ctx.sim.calculate("family_type", map_to="benunit").values
+
+    in_band = (
+        (uc_payments >= lower)
+        & (uc_payments < upper)
+        & (uc_family_type == family_type)
+    )
+    return ctx.household_from_family(in_band)
+
+
+def compute_uc_jobseeker(target, ctx) -> np.ndarray:
+    """Compute UC jobseeker / non-jobseeker splits."""
+    family = ctx.sim.populations["benunit"]
+    uc = ctx.sim.calculate("universal_credit")
+    on_uc = uc > 0
+    unemployed = family.any(
+        ctx.sim.calculate("employment_status") == "UNEMPLOYED"
+    )
+
+    if "non_jobseekers" in target.name:
+        mask = on_uc * ~unemployed
+    else:
+        mask = on_uc * unemployed
+
+    if "_count" in target.name:
+        return ctx.household_from_family(mask)
+    else:
+        return ctx.household_from_family(uc * mask)
+
+
+def compute_uc_outside_cap(target, ctx) -> np.ndarray:
+    """Compute OBR UC outside benefit cap."""
+    uc = ctx.sim.calculate("universal_credit")
+    uc_hh = ctx.household_from_family(uc)
+    cap_reduction = ctx.sim.calculate(
+        "benefit_cap_reduction", map_to="household"
+    ).values
+    not_capped = cap_reduction == 0
+    return uc_hh * not_capped
+
+
+def compute_two_child_limit(target, ctx) -> np.ndarray | None:
+    """Compute two-child limit targets."""
+    name = target.name
+    sim = ctx.sim
+
+    is_child = sim.calculate("is_child").values
+    child_is_affected = (
+        sim.map_result(
+            sim.calculate("uc_is_child_limit_affected", map_to="household"),
+            "household",
+            "person",
+        )
+        > 0
+    ) * is_child
+    child_in_uc = sim.calculate("universal_credit", map_to="person").values > 0
+    children_in_capped = sim.map_result(
+        child_is_affected * child_in_uc, "person", "household"
+    )
+    capped_hh = (children_in_capped > 0) * 1.0
+
+    if name == "dwp/uc/two_child_limit/households_affected":
+        return capped_hh
+    if name == "dwp/uc/two_child_limit/children_affected":
+        return children_in_capped
+    if name == "dwp/uc/two_child_limit/children_in_affected_households":
+        total_children = sim.map_result(
+            is_child * child_in_uc, "person", "household"
+        )
+        return total_children * capped_hh
+
+    if "_children_households_total_children" in name:
+        n = int(name.split("/")[-1].split("_")[0])
+        children_count = sim.map_result(is_child, "person", "household")
+        return (capped_hh * (children_count == n) * children_count).astype(
+            float
+        )
+    if "_children_households" in name and "total" not in name:
+        n = int(name.split("/")[-1].split("_")[0])
+        children_count = sim.map_result(is_child, "person", "household")
+        if n < 6:
+            return (capped_hh * (children_count == n)).astype(float)
+        else:
+            return (capped_hh * (children_count >= 6)).astype(float)
+
+    if "adult_pip_households" in name:
+        pip = sim.calculate("pip", map_to="household").values
+        return (capped_hh * (pip > 0)).astype(float)
+    if "adult_pip_children" in name:
+        pip = sim.calculate("pip", map_to="household").values
+        return (children_in_capped * (pip > 0)).astype(float)
+    if "disabled_child_element_households" in name:
+        dce = sim.calculate(
+            "uc_individual_disabled_child_element",
+            map_to="household",
+        ).values
+        return (capped_hh * (dce > 0)).astype(float)
+    if "disabled_child_element_children" in name:
+        dce = sim.calculate(
+            "uc_individual_disabled_child_element",
+            map_to="household",
+        ).values
+        return (children_in_capped * (dce > 0)).astype(float)
+
+    return None
diff --git a/policyengine_uk_data/targets/compute/council_tax.py b/policyengine_uk_data/targets/compute/council_tax.py
new file mode 100644
index 00000000..2c538f25
--- /dev/null
+++ b/policyengine_uk_data/targets/compute/council_tax.py
@@ -0,0 +1,34 @@
+"""Council tax compute functions."""
+
+import numpy as np
+
+
+def compute_council_tax_band(target, ctx) -> np.ndarray:
+    """Compute council tax band count for a region."""
+    parts = target.name.split("/")
+    region = parts[2]
+    band = parts[3]
+
+    in_region = ctx.sim.calculate("region").values == region
+
+    if band == "total":
+        return in_region.astype(float)
+
+    in_band = ctx.sim.calculate("council_tax_band") == band
+    return (in_band * in_region).astype(float)
+
+
+def compute_obr_council_tax(target, ctx) -> np.ndarray:
+    """Compute OBR council tax receipts, optionally by country."""
+    name = target.name
+    ct = ctx.pe("council_tax")
+
+    if name == "obr/council_tax":
+        return ct
+    if name == "obr/council_tax_england":
+        return ct * (ctx.country == "ENGLAND")
+    if name == "obr/council_tax_scotland":
+        return ct * (ctx.country == "SCOTLAND")
+    if name == "obr/council_tax_wales":
+        return ct * (ctx.country == "WALES")
+    return ct
diff --git a/policyengine_uk_data/targets/compute/demographics.py b/policyengine_uk_data/targets/compute/demographics.py
new file mode 100644
index 00000000..670b8072
--- /dev/null
+++ b/policyengine_uk_data/targets/compute/demographics.py
@@ -0,0 +1,79 @@
+"""Demographic target compute functions."""
+
+import numpy as np
+
+_REGION_MAP = {
+    "NORTH_EAST": "north_east",
+    "SOUTH_EAST": "south_east",
+    "EAST_MIDLANDS": "east_midlands",
+    "WEST_MIDLANDS": "west_midlands",
+    "YORKSHIRE": "yorkshire_and_the_humber",
+    "EAST_OF_ENGLAND": "east",
+    "LONDON": "london",
+    "SOUTH_WEST": "south_west",
+    "NORTH_WEST": "north_west",
+    "WALES": "wales",
+    "SCOTLAND": "scotland",
+    "NORTHERN_IRELAND": "northern_ireland",
+}
+_REGION_INV = {v: k for k, v in _REGION_MAP.items()}
+
+
+def compute_regional_age(target, ctx) -> np.ndarray | None:
+    """Compute person count in a region x age band."""
+    name = target.name.removeprefix("ons/")
+    idx = name.index("_age_")
+    region_name = name[:idx]
+    age_part = name[idx + 5 :]
+    lower, upper = age_part.split("_")
+    lower, upper = int(lower), int(upper)
+
+    pe_region = _REGION_INV.get(region_name)
+    if pe_region is None:
+        return None
+
+    person_match = (
+        (ctx.region.values == pe_region)
+        & (ctx.age >= lower)
+        & (ctx.age <= upper)
+    )
+    return ctx.household_from_person(person_match)
+
+
+def compute_gender_age(target, ctx) -> np.ndarray:
+    """Compute person count in a gender x age band."""
+    name = target.name.removeprefix("ons/")
+    parts = name.split("_")
+    sex = parts[0]
+    lower = int(parts[1])
+    upper = int(parts[2])
+
+    gender = ctx.sim.calculate("gender").values
+    sex_match = gender == ("FEMALE" if sex == "female" else "MALE")
+    age_match = (ctx.age >= lower) & (ctx.age <= upper)
+    return ctx.household_from_person(sex_match & age_match)
+
+
+def compute_uk_population(target, ctx) -> np.ndarray:
+    """Compute UK total population column."""
+    return ctx.household_from_person(ctx.age >= 0)
+
+
+def compute_scotland_demographics(target, ctx) -> np.ndarray | None:
+    """Compute Scotland-specific demographic targets."""
+    name = target.name
+    if name == "ons/scotland_children_under_16":
+        return ctx.household_from_person(
+            (ctx.region.values == "SCOTLAND") & (ctx.age < 16)
+        )
+    if name == "ons/scotland_babies_under_1":
+        return ctx.household_from_person(
+            (ctx.region.values == "SCOTLAND") & (ctx.age < 1)
+        )
+    if name == "ons/scotland_households_3plus_children":
+        is_child = ctx.pe_person("is_child")
+        children_per_hh = ctx.household_from_person(is_child)
+        return (
+            (ctx.household_region == "SCOTLAND") & (children_per_hh >= 3)
+        ).astype(float)
+    return None
diff --git a/policyengine_uk_data/targets/compute/households.py b/policyengine_uk_data/targets/compute/households.py
new file mode 100644
index 00000000..be7686ac
--- /dev/null
+++ b/policyengine_uk_data/targets/compute/households.py
@@ -0,0 +1,91 @@
+"""Household type and tenure compute functions."""
+
+import numpy as np
+
+
+def compute_household_type(target, ctx) -> np.ndarray | None:
+    """Compute household type count from ONS categories."""
+    name = target.name.removeprefix("ons/")
+    ft = ctx.sim.calculate("family_type").values
+    is_child = ctx.pe_person("is_child")
+    children_per_hh = ctx.household_from_person(is_child)
+    age_hh_head = ctx.pe("age")
+
+    def ft_hh(value):
+        return ctx.household_from_family(ft == value) > 0
+
+    if name == "lone_households_under_65":
+        return (
+            ft_hh("SINGLE") & (children_per_hh == 0) & (age_hh_head < 65)
+        ).astype(float)
+    if name == "lone_households_over_65":
+        return (
+            ft_hh("SINGLE") & (children_per_hh == 0) & (age_hh_head >= 65)
+        ).astype(float)
+    if name == "unrelated_adult_households":
+        people_per_hh = ctx.household_from_person(np.ones_like(is_child))
+        return (
+            ft_hh("SINGLE") & (children_per_hh == 0) & (people_per_hh > 1)
+        ).astype(float)
+    if name == "couple_no_children_households":
+        return ft_hh("COUPLE_NO_CHILDREN").astype(float)
+    if name == "couple_under_3_children_households":
+        return (
+            ft_hh("COUPLE_WITH_CHILDREN")
+            & (children_per_hh >= 1)
+            & (children_per_hh <= 2)
+        ).astype(float)
+    if name == "couple_3_plus_children_households":
+        return (ft_hh("COUPLE_WITH_CHILDREN") & (children_per_hh >= 3)).astype(
+            float
+        )
+    if name == "couple_non_dependent_children_only_households":
+        people_per_hh = ctx.household_from_person(np.ones_like(is_child))
+        return (ft_hh("COUPLE_NO_CHILDREN") & (people_per_hh > 2)).astype(
+            float
+        )
+    if name == "lone_parent_dependent_children_households":
+        return (ft_hh("LONE_PARENT") & (children_per_hh > 0)).astype(float)
+    if name == "lone_parent_non_dependent_children_households":
+        people_per_hh = ctx.household_from_person(np.ones_like(is_child))
+        return (
+            ft_hh("SINGLE")
+            & (children_per_hh == 0)
+            & (people_per_hh > 1)
+            & (age_hh_head >= 40)
+        ).astype(float)
+    if name == "multi_family_households":
+        n_benunits = ctx.pe("household_num_benunits")
+        return (n_benunits > 1).astype(float)
+
+    return None
+
+
+def compute_tenure(target, ctx) -> np.ndarray | None:
+    """Compute dwelling count by tenure type."""
+    _TENURE_MAP = {
+        "tenure_england_owned_outright": "OWNED_OUTRIGHT",
+        "tenure_england_owned_with_mortgage": "OWNED_WITH_MORTGAGE",
+        "tenure_england_rented_privately": "RENT_PRIVATELY",
+        "tenure_england_social_rent": [
+            "RENT_FROM_COUNCIL",
+            "RENT_FROM_HA",
+        ],
+        "tenure_england_total": None,
+    }
+    suffix = target.name.removeprefix("ons/")
+    pe_values = _TENURE_MAP.get(suffix)
+    if pe_values is None and suffix == "tenure_england_total":
+        return (ctx.country == "ENGLAND").astype(float)
+    if pe_values is None:
+        return None
+
+    tenure = ctx.sim.calculate("tenure_type", map_to="household").values
+    in_england = ctx.country == "ENGLAND"
+    if isinstance(pe_values, list):
+        match = np.zeros_like(tenure, dtype=bool)
+        for v in pe_values:
+            match = match | (tenure == v)
+    else:
+        match = tenure == pe_values
+    return (match & in_england).astype(float)
diff --git a/policyengine_uk_data/targets/compute/income.py b/policyengine_uk_data/targets/compute/income.py
new file mode 100644
index 00000000..34a99a84
--- /dev/null
+++ b/policyengine_uk_data/targets/compute/income.py
@@ -0,0 +1,81 @@
+"""Income and salary sacrifice compute functions."""
+
+import numpy as np
+
+
+def compute_income_band(target, ctx) -> np.ndarray:
+    """Compute income variable within a total income band."""
+    variable = target.variable
+    lower = target.lower_bound
+    upper = target.upper_bound
+
+    income_df = ctx.sim.calculate_dataframe(["total_income", variable])
+    in_band = (income_df.total_income >= lower) & (
+        income_df.total_income < upper
+    )
+
+    if target.is_count:
+        return ctx.household_from_person((income_df[variable] > 0) * in_band)
+    else:
+        return ctx.household_from_person(income_df[variable] * in_band)
+
+
+def compute_ss_it_relief(target, ctx) -> np.ndarray:
+    """Compute salary sacrifice IT relief by tax band."""
+    it_base = ctx.sim.calculate("income_tax")
+    it_cf = ctx.counterfactual_sim.calculate("income_tax", ctx.time_period)
+    it_relief = it_cf - it_base
+
+    adj_net_income_cf = ctx.counterfactual_sim.calculate(
+        "adjusted_net_income", ctx.time_period
+    )
+
+    params = ctx.sim.tax_benefit_system.parameters.gov.hmrc.income_tax.rates.uk
+    basic_thresh = params[0].threshold(ctx.time_period)
+    higher_thresh = params[1].threshold(ctx.time_period)
+    additional_thresh = params[2].threshold(ctx.time_period)
+
+    name = target.name
+    if "basic" in name:
+        mask = (adj_net_income_cf > basic_thresh) & (
+            adj_net_income_cf <= higher_thresh
+        )
+    elif "higher" in name:
+        mask = (adj_net_income_cf > higher_thresh) & (
+            adj_net_income_cf <= additional_thresh
+        )
+    elif "additional" in name:
+        mask = adj_net_income_cf > additional_thresh
+    else:
+        mask = np.ones_like(it_relief, dtype=bool)
+
+    return ctx.household_from_person(it_relief * mask)
+
+
+def compute_ss_contributions(target, ctx) -> np.ndarray:
+    """Compute total salary sacrifice pension contributions."""
+    ss = ctx.sim.calculate("pension_contributions_via_salary_sacrifice")
+    return ctx.household_from_person(ss)
+
+
+def compute_ss_ni_relief(target, ctx) -> np.ndarray:
+    """Compute salary sacrifice NI relief (employee or employer)."""
+    name = target.name
+    if "employee" in name:
+        ni_base = ctx.sim.calculate("ni_employee")
+        ni_cf = ctx.counterfactual_sim.calculate(
+            "ni_employee", ctx.time_period
+        )
+    else:
+        ni_base = ctx.sim.calculate("ni_employer")
+        ni_cf = ctx.counterfactual_sim.calculate(
+            "ni_employer", ctx.time_period
+        )
+    return ctx.household_from_person(ni_cf - ni_base)
+
+
+def compute_esa(target, ctx) -> np.ndarray:
+    """Compute ESA (combined income-related + contributory)."""
+    return ctx.household_from_person(
+        ctx.sim.calculate("esa_income")
+    ) + ctx.household_from_person(ctx.sim.calculate("esa_contrib"))
diff --git a/policyengine_uk_data/targets/compute/other.py b/policyengine_uk_data/targets/compute/other.py
new file mode 100644
index 00000000..64e015b4
--- /dev/null
+++ b/policyengine_uk_data/targets/compute/other.py
@@ -0,0 +1,36 @@
+"""Miscellaneous compute functions (vehicles, housing, savings, SCP)."""
+
+import numpy as np
+
+
+def compute_vehicles(target, ctx) -> np.ndarray:
+    """Compute vehicle ownership targets."""
+    name = target.name
+    if name == "nts/households_no_vehicle":
+        return (ctx.pe("num_vehicles") == 0).astype(float)
+    if name == "nts/households_one_vehicle":
+        return (ctx.pe("num_vehicles") == 1).astype(float)
+    return (ctx.pe("num_vehicles") >= 2).astype(float)
+
+
+def compute_housing(target, ctx) -> np.ndarray:
+    """Compute housing targets (mortgage, private rent)."""
+    name = target.name
+    if name == "housing/total_mortgage":
+        return ctx.pe("mortgage_capital_repayment") + ctx.pe(
+            "mortgage_interest_repayment"
+        )
+    tenure = ctx.sim.calculate("tenure_type", map_to="household").values
+    return ctx.pe("rent") * (tenure == "RENT_PRIVATELY")
+
+
+def compute_savings_interest(target, ctx) -> np.ndarray:
+    """Compute ONS savings interest income."""
+    savings = ctx.sim.calculate("savings_interest_income")
+    return ctx.household_from_person(savings)
+
+
+def compute_scottish_child_payment(target, ctx) -> np.ndarray:
+    """Compute Scottish child payment spend."""
+    scp = ctx.sim.calculate("scottish_child_payment")
+    return ctx.household_from_person(scp)
diff --git a/policyengine_uk_data/targets/sources/_common.py b/policyengine_uk_data/targets/sources/_common.py
new file mode 100644
index 00000000..69fb6988
--- /dev/null
+++ b/policyengine_uk_data/targets/sources/_common.py
@@ -0,0 +1,29 @@
+"""Shared utilities for target source modules."""
+
+from pathlib import Path
+
+import yaml
+
+SOURCES_YAML = Path(__file__).parent.parent / "sources.yaml"
+STORAGE = Path(__file__).parents[2] / "storage"
+
+HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" " AppleWebKit/537.36"
+    ),
+}
+
+
+def load_config() -> dict:
+    with open(SOURCES_YAML) as f:
+        return yaml.safe_load(f)
+
+
+def to_float(val) -> float:
+    """Convert a cell value to float, handling suppressed markers."""
+    if isinstance(val, (int, float)):
+        return float(val)
+    try:
+        return float(val)
+    except (ValueError, TypeError):
+        return 0.0
diff --git a/policyengine_uk_data/targets/sources/dwp.py b/policyengine_uk_data/targets/sources/dwp.py
index ff4441b2..ddfc1ff6 100644
--- a/policyengine_uk_data/targets/sources/dwp.py
+++ b/policyengine_uk_data/targets/sources/dwp.py
@@ -10,12 +10,8 @@
 - DWP two-child limit: https://www.gov.uk/government/statistics/universal-credit-and-child-tax-credit-claimants-statistics-related-to-the-policy-to-provide-support-for-a-maximum-of-2-children-april-2024
 """
 
-from pathlib import Path
-
 from policyengine_uk_data.targets.schema import Target, Unit
 
-_STORAGE = Path(__file__).parents[2] / "storage"
-
 
 def get_targets() -> list[Target]:
     targets = []
diff --git a/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py b/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py
index 4df4e48d..97ec499a 100644
--- a/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py
+++ b/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py
@@ -8,51 +8,32 @@
 
 import io
 import logging
-from pathlib import Path
 
 import pandas as pd
 import requests
-import yaml
 
 from policyengine_uk_data.targets.schema import Target, Unit
+from policyengine_uk_data.targets.sources._common import (
+    HEADERS,
+    load_config,
+    to_float,
+)
 
 logger = logging.getLogger(__name__)
 
-_SOURCES_YAML = Path(__file__).parent.parent / "sources.yaml"
-_HEADERS = {
-    "User-Agent": (
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
-    ),
-}
-
 # Uprate 3% pa for wage growth from the base year
 _GROWTH = 1.03
 _BASE_YEAR = 2024  # 2023-24 tax year → calendar 2024
 
 
-def _load_config():
-    with open(_SOURCES_YAML) as f:
-        return yaml.safe_load(f)
-
-
-def _to_float(val) -> float:
-    """Convert CSV value to float, handling suppressed '[z]' etc."""
-    if isinstance(val, (int, float)):
-        return float(val)
-    try:
-        return float(val)
-    except (ValueError, TypeError):
-        return 0.0
-
-
 def get_targets() -> list[Target]:
-    config = _load_config()
+    config = load_config()
     ref = config["hmrc"]["salary_sacrifice_table_6"]
     targets = []
 
     try:
         r = requests.get(
-            ref, headers=_HEADERS, allow_redirects=True, timeout=30
+            ref, headers=HEADERS, allow_redirects=True, timeout=30
         )
         r.raise_for_status()
         df = pd.read_csv(io.StringIO(r.content.decode("utf-8-sig")))
@@ -67,7 +48,7 @@ def get_targets() -> list[Target]:
         ]
         for _, row in ss_it.iterrows():
             rate = row["tax_rate"]
-            val = _to_float(row["value_of_relief"])
+            val = to_float(row["value_of_relief"])
             if val <= 0:
                 continue
             rate_key = rate.lower().replace(" ", "_")
@@ -94,7 +75,7 @@ def get_targets() -> list[Target]:
         ]
         for _, row in ss_nics.iterrows():
             nics_class = row["nics_relief_class"]
-            val = _to_float(row["value_of_relief"])
+            val = to_float(row["value_of_relief"])
             if val <= 0:
                 continue
             if "employee" in str(nics_class).lower():
@@ -131,4 +112,23 @@ def get_targets() -> list[Target]:
             "Failed to download/parse HMRC salary sacrifice CSV: %s", e
         )
 
+    # Total salary sacrifice contributions (SPP Review 2025: £24bn base)
+    _SS_CONTRIBUTIONS = {
+        y: 24e9 * _GROWTH ** max(0, y - _BASE_YEAR)
+        for y in range(_BASE_YEAR, 2030)
+    }
+    targets.append(
+        Target(
+            name="hmrc/salary_sacrifice_contributions",
+            variable="pension_contributions_via_salary_sacrifice",
+            source="hmrc",
+            unit=Unit.GBP,
+            values=_SS_CONTRIBUTIONS,
+            reference_url=(
+                "https://assets.publishing.service.gov.uk/media/"
+                "67ce0e7c08e764d17a5d3c21/2025_SPP_Review.pdf"
+            ),
+        )
+    )
+
     return targets
diff --git a/policyengine_uk_data/targets/sources/hmrc_spi.py b/policyengine_uk_data/targets/sources/hmrc_spi.py
index a976c668..de993d49 100644
--- a/policyengine_uk_data/targets/sources/hmrc_spi.py
+++ b/policyengine_uk_data/targets/sources/hmrc_spi.py
@@ -18,21 +18,17 @@
 
 import pandas as pd
 import requests
-import yaml
 
 from policyengine_uk_data.targets.schema import Target, Unit
+from policyengine_uk_data.targets.sources._common import (
+    HEADERS,
+    STORAGE,
+    load_config,
+    to_float,
+)
 
 logger = logging.getLogger(__name__)
 
-_SOURCES_YAML = Path(__file__).parent.parent / "sources.yaml"
-_STORAGE = Path(__file__).parents[2] / "storage"
-
-_HEADERS = {
-    "User-Agent": (
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
-    ),
-}
-
 # Income bands in the SPI tables (lower bounds)
 _BAND_LOWER = [
     12_570,
@@ -55,15 +51,10 @@
 _SPI_YEAR = 2023
 
 
-def _load_config():
-    with open(_SOURCES_YAML) as f:
-        return yaml.safe_load(f)
-
-
 @lru_cache(maxsize=1)
 def _download_ods(url: str) -> bytes:
     """Download an ODS file."""
-    r = requests.get(url, headers=_HEADERS, allow_redirects=True, timeout=60)
+    r = requests.get(url, headers=HEADERS, allow_redirects=True, timeout=60)
     r.raise_for_status()
     return r.content
 
@@ -90,14 +81,14 @@ def _parse_table_36(ods_bytes: bytes) -> pd.DataFrame:
         data_rows.append(
             {
                 "lower_bound": int(lower),
-                "self_employment_income_count": _to_float(df.iloc[i, 1]),
-                "self_employment_income_amount": _to_float(df.iloc[i, 2]),
-                "employment_income_count": _to_float(df.iloc[i, 4]),
-                "employment_income_amount": _to_float(df.iloc[i, 5]),
-                "state_pension_count": _to_float(df.iloc[i, 7]),
-                "state_pension_amount": _to_float(df.iloc[i, 8]),
-                "private_pension_income_count": _to_float(df.iloc[i, 10]),
-                "private_pension_income_amount": _to_float(df.iloc[i, 11]),
+                "self_employment_income_count": to_float(df.iloc[i, 1]),
+                "self_employment_income_amount": to_float(df.iloc[i, 2]),
+                "employment_income_count": to_float(df.iloc[i, 4]),
+                "employment_income_amount": to_float(df.iloc[i, 5]),
+                "state_pension_count": to_float(df.iloc[i, 7]),
+                "state_pension_amount": to_float(df.iloc[i, 8]),
+                "private_pension_income_count": to_float(df.iloc[i, 10]),
+                "private_pension_income_amount": to_float(df.iloc[i, 11]),
             }
         )
     return pd.DataFrame(data_rows)
@@ -123,24 +114,17 @@ def _parse_table_37(ods_bytes: bytes) -> pd.DataFrame:
         data_rows.append(
             {
                 "lower_bound": int(lower),
-                "property_income_count": _to_float(df.iloc[i, 1]),
-                "property_income_amount": _to_float(df.iloc[i, 2]),
-                "savings_interest_income_count": _to_float(df.iloc[i, 4]),
-                "savings_interest_income_amount": _to_float(df.iloc[i, 5]),
-                "dividend_income_count": _to_float(df.iloc[i, 7]),
-                "dividend_income_amount": _to_float(df.iloc[i, 8]),
+                "property_income_count": to_float(df.iloc[i, 1]),
+                "property_income_amount": to_float(df.iloc[i, 2]),
+                "savings_interest_income_count": to_float(df.iloc[i, 4]),
+                "savings_interest_income_amount": to_float(df.iloc[i, 5]),
+                "dividend_income_count": to_float(df.iloc[i, 7]),
+                "dividend_income_amount": to_float(df.iloc[i, 8]),
             }
         )
     return pd.DataFrame(data_rows)
 
 
-def _to_float(val) -> float:
-    """Convert cell value to float, handling '[Not available]' etc."""
-    if isinstance(val, (int, float)):
-        return float(val)
-    return 0.0
-
-
 INCOME_VARIABLES = [
     "employment_income",
     "self_employment_income",
@@ -157,7 +141,7 @@ def get_targets() -> list[Target]:
     Also reads incomes_projection.csv if available, which contains
     projected future year data generated by the microsimulation.
     """
-    config = _load_config()
+    config = load_config()
     ref = config["hmrc"]["spi_collated"]
     targets = []
 
@@ -217,7 +201,7 @@ def get_targets() -> list[Target]:
 
     # Also read projected future years from incomes_projection.csv
     # if it exists (generated by utils/incomes_projection.py)
-    proj_path = _STORAGE / "incomes_projection.csv"
+    proj_path = STORAGE / "incomes_projection.csv"
     if proj_path.exists():
         targets.extend(_read_projection_csv(proj_path, ref))
 
diff --git a/policyengine_uk_data/targets/sources/local_age.py b/policyengine_uk_data/targets/sources/local_age.py
index 5cd7f744..0f74bc44 100644
--- a/policyengine_uk_data/targets/sources/local_age.py
+++ b/policyengine_uk_data/targets/sources/local_age.py
@@ -13,23 +13,20 @@
 
 import pandas as pd
 
+from policyengine_uk_data.targets.sources._common import STORAGE
+
 logger = logging.getLogger(__name__)
 
 _CONST_DIR = (
-    Path(__file__).parents[2]
-    / "datasets"
-    / "local_areas"
-    / "constituencies"
-    / "targets"
+    STORAGE.parent / "datasets" / "local_areas" / "constituencies" / "targets"
 )
 _LA_DIR = (
-    Path(__file__).parents[2]
+    STORAGE.parent
     / "datasets"
     / "local_areas"
     / "local_authorities"
     / "targets"
 )
-_STORAGE = Path(__file__).parents[2] / "storage"
 
 _REF = (
     "https://www.ons.gov.uk/peoplepopulationandcommunity/"
@@ -85,7 +82,7 @@ def get_la_age_targets() -> pd.DataFrame:
 
 def get_uk_total_population(year: int) -> float:
     """UK total population from demographics.csv (in persons, not thousands)."""
-    csv_path = _STORAGE / "demographics.csv"
+    csv_path = STORAGE / "demographics.csv"
     if not csv_path.exists():
         return 69.9e6  # fallback
     demographics = pd.read_csv(csv_path)
diff --git a/policyengine_uk_data/targets/sources/local_income.py b/policyengine_uk_data/targets/sources/local_income.py
index 1e418313..695f790c 100644
--- a/policyengine_uk_data/targets/sources/local_income.py
+++ b/policyengine_uk_data/targets/sources/local_income.py
@@ -15,23 +15,20 @@
 
 import pandas as pd
 
+from policyengine_uk_data.targets.sources._common import STORAGE
+
 logger = logging.getLogger(__name__)
 
 _CONST_DIR = (
-    Path(__file__).parents[2]
-    / "datasets"
-    / "local_areas"
-    / "constituencies"
-    / "targets"
+    STORAGE.parent / "datasets" / "local_areas" / "constituencies" / "targets"
 )
 _LA_DIR = (
-    Path(__file__).parents[2]
+    STORAGE.parent
     / "datasets"
     / "local_areas"
     / "local_authorities"
     / "targets"
 )
-_STORAGE = Path(__file__).parents[2] / "storage"
 
 _REF = (
     "https://www.gov.uk/government/statistics/"
@@ -84,7 +81,7 @@ def get_national_income_projections(year: int) -> pd.DataFrame:
     Returns the incomes_projection.csv rows for the requested year,
     filtered to the above-personal-allowance band (12570+).
     """
-    path = _STORAGE / "incomes_projection.csv"
+    path = STORAGE / "incomes_projection.csv"
     if not path.exists():
         return pd.DataFrame()
     df = pd.read_csv(path)
diff --git a/policyengine_uk_data/targets/sources/local_la_extras.py b/policyengine_uk_data/targets/sources/local_la_extras.py
index fea211ca..76bcf06d 100644
--- a/policyengine_uk_data/targets/sources/local_la_extras.py
+++ b/policyengine_uk_data/targets/sources/local_la_extras.py
@@ -12,13 +12,12 @@
 """
 
 import logging
-from pathlib import Path
 
 import pandas as pd
 
-logger = logging.getLogger(__name__)
+from policyengine_uk_data.targets.sources._common import STORAGE
 
-_STORAGE = Path(__file__).parents[2] / "storage"
+logger = logging.getLogger(__name__)
 
 # Uprating factors from FYE 2020 to 2025 (OBR Nov 2025 EFO)
 UPRATING_NET_INCOME_BHC_2020_TO_2025 = 1985.1 / 1467.6
@@ -44,7 +43,7 @@ def load_ons_la_income() -> pd.DataFrame:
     Returns DataFrame with columns: la_code, total_income, net_income_bhc,
     net_income_ahc (mean income per household, FYE 2020).
     """
-    xlsx_path = _STORAGE / "local_authority_ons_income.xlsx"
+    xlsx_path = STORAGE / "local_authority_ons_income.xlsx"
     if not xlsx_path.exists():
         logger.warning("ONS LA income file not found: %s", xlsx_path)
         return pd.DataFrame()
@@ -85,7 +84,7 @@ def load_household_counts() -> pd.DataFrame:
 
     Returns DataFrame with columns: la_code, households.
     """
-    path = _STORAGE / "la_count_households.xlsx"
+    path = STORAGE / "la_count_households.xlsx"
     if not path.exists():
         logger.warning("LA household count file not found: %s", path)
         return pd.DataFrame()
@@ -100,7 +99,7 @@ def load_tenure_data() -> pd.DataFrame:
     Returns DataFrame with columns: la_code, owned_outright_pct,
     owned_mortgage_pct, private_rent_pct, social_rent_pct.
     """
-    path = _STORAGE / "la_tenure.xlsx"
+    path = STORAGE / "la_tenure.xlsx"
     if not path.exists():
         logger.warning("LA tenure file not found: %s", path)
         return pd.DataFrame()
@@ -131,7 +130,7 @@ def load_private_rents() -> pd.DataFrame:
 
     Returns DataFrame with columns: area_code, median_annual_rent.
     """
-    path = _STORAGE / "la_private_rents_median.xlsx"
+    path = STORAGE / "la_private_rents_median.xlsx"
     if not path.exists():
         logger.warning("LA private rent file not found: %s", path)
         return pd.DataFrame()
diff --git a/policyengine_uk_data/targets/sources/obr.py b/policyengine_uk_data/targets/sources/obr.py
index 6a95e504..45513cff 100644
--- a/policyengine_uk_data/targets/sources/obr.py
+++ b/policyengine_uk_data/targets/sources/obr.py
@@ -12,18 +12,18 @@
 import io
 import logging
 from functools import lru_cache
-from pathlib import Path
 
 import openpyxl
 import requests
-import yaml
 
 from policyengine_uk_data.targets.schema import Target, Unit
+from policyengine_uk_data.targets.sources._common import (
+    HEADERS,
+    load_config,
+)
 
 logger = logging.getLogger(__name__)
 
-_SOURCES_YAML = Path(__file__).parent.parent / "sources.yaml"
-
 # Financial year columns in OBR tables: C=2024-25, D=2025-26, ..., I=2030-31
 # PolicyEngine convention: FY 2025-26 → calendar year 2025 (first year)
 _FY_COL_TO_YEAR = {
@@ -36,22 +36,11 @@
     "I": 2030,
 }
 
-_HEADERS = {
-    "User-Agent": (
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
-    ),
-}
-
-
-def _load_config():
-    with open(_SOURCES_YAML) as f:
-        return yaml.safe_load(f)
-
 
 @lru_cache(maxsize=1)
 def _download_workbook(url: str) -> openpyxl.Workbook:
     """Download an xlsx from OBR and return an openpyxl workbook."""
-    r = requests.get(url, headers=_HEADERS, allow_redirects=True, timeout=60)
+    r = requests.get(url, headers=HEADERS, allow_redirects=True, timeout=60)
     r.raise_for_status()
     return openpyxl.load_workbook(io.BytesIO(r.content), data_only=False)
 
@@ -85,7 +74,7 @@ def _parse_receipts(wb: openpyxl.Workbook) -> list[Target]:
     the standard fiscal forecasting convention. Other receipts use
     Table 3.9 (cash basis) since they only appear there.
     """
-    config = _load_config()
+    config = load_config()
     vintage = config["obr"]["vintage"]
     ref = config["obr"]["efo_receipts"]
     cols_34 = list(_FY_COL_TO_YEAR.keys())
@@ -169,7 +158,7 @@ def read_39(ws, row_num: int) -> dict[int, float]:
 
 def _parse_council_tax(wb: openpyxl.Workbook) -> list[Target]:
     """Parse Table 4.1 (council tax receipts) from expenditure xlsx."""
-    config = _load_config()
+    config = load_config()
     vintage = config["obr"]["vintage"]
     ref = config["obr"]["efo_expenditure"]
     ws = wb["4.1"]
@@ -233,7 +222,7 @@ def read_41(row_num: int) -> dict[int, float]:
 
 def _parse_nics(wb: openpyxl.Workbook) -> list[Target]:
     """Parse Table 3.4 (income tax and NICs detail) for employee/employer."""
-    config = _load_config()
+    config = load_config()
     vintage = config["obr"]["vintage"]
     ref = config["obr"]["efo_receipts"]
     ws = wb["3.4"]
@@ -275,7 +264,7 @@ def _parse_nics(wb: openpyxl.Workbook) -> list[Target]:
 
 def _parse_welfare(wb: openpyxl.Workbook) -> list[Target]:
     """Parse Table 4.9 (welfare spending) from expenditure xlsx."""
-    config = _load_config()
+    config = load_config()
     vintage = config["obr"]["vintage"]
     ref = config["obr"]["efo_expenditure"]
     ws = wb["4.9"]
@@ -389,7 +378,7 @@ def read_49(row_num: int) -> dict[int, float]:
 
 def _parse_tv_licence(wb: openpyxl.Workbook) -> list[Target]:
     """Parse Table 4.19 (BBC) from expenditure xlsx."""
-    config = _load_config()
+    config = load_config()
     vintage = config["obr"]["vintage"]
     ref = config["obr"]["efo_expenditure"]
 
@@ -447,7 +436,7 @@ def _parse_tv_licence(wb: openpyxl.Workbook) -> list[Target]:
 
 
 def get_targets() -> list[Target]:
-    config = _load_config()
+    config = load_config()
     targets = []
 
     try:
diff --git a/policyengine_uk_data/targets/sources/ons_demographics.py b/policyengine_uk_data/targets/sources/ons_demographics.py
index 3c48c38a..a51adf39 100644
--- a/policyengine_uk_data/targets/sources/ons_demographics.py
+++ b/policyengine_uk_data/targets/sources/ons_demographics.py
@@ -20,7 +20,6 @@
 import logging
 import zipfile
 from functools import lru_cache
-from pathlib import Path
 
 import pandas as pd
 import requests
@@ -30,18 +29,10 @@
     Target,
     Unit,
 )
+from policyengine_uk_data.targets.sources._common import HEADERS, STORAGE
 
 logger = logging.getLogger(__name__)
 
-_SOURCES_YAML = Path(__file__).parent.parent / "sources.yaml"
-_STORAGE = Path(__file__).parents[2] / "storage"
-
-_HEADERS = {
-    "User-Agent": (
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
-    ),
-}
-
 _UK_ZIP_URL = (
     "https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/"
     "populationandmigration/populationprojections/datasets/"
@@ -88,7 +79,7 @@
 def _download_uk_projection() -> pd.DataFrame:
     """Download and parse the UK principal population projection."""
     r = requests.get(
-        _UK_ZIP_URL, headers=_HEADERS, allow_redirects=True, timeout=120
+        _UK_ZIP_URL, headers=HEADERS, allow_redirects=True, timeout=120
     )
     r.raise_for_status()
     z = zipfile.ZipFile(io.BytesIO(r.content))
@@ -165,7 +156,7 @@ def _parse_regional_from_csv() -> list[Target]:
     This CSV was extracted from ONS subnational projections which
     lack a stable machine-readable download URL.
     """
-    csv_path = _STORAGE / "demographics.csv"
+    csv_path = STORAGE / "demographics.csv"
     if not csv_path.exists():
         logger.warning("demographics.csv not found, skipping regional")
         return []
diff --git a/policyengine_uk_data/targets/sources/ons_households.py b/policyengine_uk_data/targets/sources/ons_households.py
index 88d51cb8..9fd4b49f 100644
--- a/policyengine_uk_data/targets/sources/ons_households.py
+++ b/policyengine_uk_data/targets/sources/ons_households.py
@@ -14,6 +14,7 @@
 import requests
 
 from policyengine_uk_data.targets.schema import Target, Unit
+from policyengine_uk_data.targets.sources._common import HEADERS
 
 logger = logging.getLogger(__name__)
 
@@ -28,11 +29,6 @@
     "birthsdeathsandmarriages/families/datasets/"
     "familiesandhouseholdsfamiliesandhouseholds"
 )
-_HEADERS = {
-    "User-Agent": (
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
-    ),
-}
 
 # Table 7 rows: (row_number, target_name)
 # Row numbers are 1-indexed in the xlsx
@@ -56,7 +52,7 @@
 
 @lru_cache(maxsize=1)
 def _download_workbook() -> openpyxl.Workbook:
-    r = requests.get(_URL, headers=_HEADERS, allow_redirects=True, timeout=60)
+    r = requests.get(_URL, headers=HEADERS, allow_redirects=True, timeout=60)
     r.raise_for_status()
     return openpyxl.load_workbook(io.BytesIO(r.content), data_only=True)
 
diff --git a/policyengine_uk_data/targets/sources/ons_savings.py b/policyengine_uk_data/targets/sources/ons_savings.py
index 21edb0c0..3764f22e 100644
--- a/policyengine_uk_data/targets/sources/ons_savings.py
+++ b/policyengine_uk_data/targets/sources/ons_savings.py
@@ -14,22 +14,18 @@
 import requests
 
 from policyengine_uk_data.targets.schema import Target, Unit
+from policyengine_uk_data.targets.sources._common import HEADERS
 
 logger = logging.getLogger(__name__)
 
 _API_URL = "https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/haxv/ukea/data"
 _REF = "https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/haxv/ukea"
-_HEADERS = {
-    "User-Agent": (
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
-    ),
-}
 
 
 def get_targets() -> list[Target]:
     try:
         r = requests.get(
-            _API_URL, headers=_HEADERS, allow_redirects=True, timeout=30
+            _API_URL, headers=HEADERS, allow_redirects=True, timeout=30
         )
         r.raise_for_status()
         data = r.json()
diff --git a/policyengine_uk_data/targets/sources/ons_tenure.py b/policyengine_uk_data/targets/sources/ons_tenure.py
index 0ae4ccdd..1da49b95 100644
--- a/policyengine_uk_data/targets/sources/ons_tenure.py
+++ b/policyengine_uk_data/targets/sources/ons_tenure.py
@@ -19,6 +19,7 @@
     Target,
     Unit,
 )
+from policyengine_uk_data.targets.sources._common import HEADERS
 
 logger = logging.getLogger(__name__)
 
@@ -31,11 +32,6 @@
     "https://www.ons.gov.uk/peoplepopulationandcommunity/"
     "housing/datasets/subnationaldwellingstockbytenureestimates"
 )
-_HEADERS = {
-    "User-Agent": (
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
-    ),
-}
 
 # Tenure categories in the xlsx header → target name suffix
 _TENURE_COLS = {
@@ -49,7 +45,7 @@
 
 @lru_cache(maxsize=1)
 def _download_workbook() -> openpyxl.Workbook:
-    r = requests.get(_URL, headers=_HEADERS, allow_redirects=True, timeout=60)
+    r = requests.get(_URL, headers=HEADERS, allow_redirects=True, timeout=60)
     r.raise_for_status()
     return openpyxl.load_workbook(io.BytesIO(r.content), data_only=True)
 
diff --git a/policyengine_uk_data/targets/sources/voa_council_tax.py b/policyengine_uk_data/targets/sources/voa_council_tax.py
index 19c3d92a..0a0af5d1 100644
--- a/policyengine_uk_data/targets/sources/voa_council_tax.py
+++ b/policyengine_uk_data/targets/sources/voa_council_tax.py
@@ -8,21 +8,20 @@
 """
 
 import pandas as pd
-from pathlib import Path
 
 from policyengine_uk_data.targets.schema import (
     GeographicLevel,
     Target,
     Unit,
 )
+from policyengine_uk_data.targets.sources._common import STORAGE
 
-_STORAGE = Path(__file__).parents[2] / "storage"
 _REF = "https://www.gov.uk/government/statistics/council-tax-stock-of-properties-2024"
 
 
 def get_targets() -> list[Target]:
     """Build council tax band targets from the CSV."""
-    csv_path = _STORAGE / "council_tax_bands_2024.csv"
+    csv_path = STORAGE / "council_tax_bands_2024.csv"
     if not csv_path.exists():
         return []
 

From 7d5ada9a2aaa13dcb62e9098d72ee599519caee1 Mon Sep 17 00:00:00 2001
From: Vahid Ahmadi <va.vahidahmadi@gmail.com>
Date: Wed, 18 Feb 2026 10:39:29 +0000
Subject: [PATCH 6/6] Format income.py with black

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 policyengine_uk_data/targets/compute/income.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/policyengine_uk_data/targets/compute/income.py b/policyengine_uk_data/targets/compute/income.py
index ea4c3613..c2f286c4 100644
--- a/policyengine_uk_data/targets/compute/income.py
+++ b/policyengine_uk_data/targets/compute/income.py
@@ -86,13 +86,12 @@ def compute_ss_headcount(target, ctx) -> np.ndarray:
     prices before applying the threshold.
     """
     ss = ctx.sim.calculate("pension_contributions_via_salary_sacrifice")
-    uprating = pd.read_csv(
-        STORAGE_FOLDER / "uprating_factors.csv"
-    ).set_index("Variable")
+    uprating = pd.read_csv(STORAGE_FOLDER / "uprating_factors.csv").set_index(
+        "Variable"
+    )
     row = "pension_contributions_via_salary_sacrifice"
     price_adj = (
-        uprating.loc[row, "2023"]
-        / uprating.loc[row, str(ctx.time_period)]
+        uprating.loc[row, "2023"] / uprating.loc[row, str(ctx.time_period)]
     )
     ss_base = ss * price_adj