From 1ef54142fca144def00fc8a299d4ad2766e89dca Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Fri, 12 Dec 2025 17:49:31 +0000 Subject: [PATCH 1/5] Adjust economic impact function to take Simulation kwargs --- src/policyengine/outputs/decile_impact.py | 24 +++++++++++++++++-- .../tax_benefit_models/uk/analysis.py | 19 ++++++++++++--- .../tax_benefit_models/us/analysis.py | 19 ++++++++++++--- 3 files changed, 54 insertions(+), 8 deletions(-) diff --git a/src/policyengine/outputs/decile_impact.py b/src/policyengine/outputs/decile_impact.py index 8fcc8579..f58c631b 100644 --- a/src/policyengine/outputs/decile_impact.py +++ b/src/policyengine/outputs/decile_impact.py @@ -2,6 +2,10 @@ from pydantic import ConfigDict from policyengine.core import Output, OutputCollection, Simulation +from policyengine.core.dataset import Dataset +from policyengine.core.dynamic import Dynamic +from policyengine.core.policy import Policy +from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion class DecileImpact(Output): @@ -93,8 +97,11 @@ def run(self): def calculate_decile_impacts( - baseline_simulation: Simulation, - reform_simulation: Simulation, + dataset: Dataset, + tax_benefit_model_version: TaxBenefitModelVersion, + baseline_policy: Policy | None = None, + reform_policy: Policy | None = None, + dynamic: Dynamic | None = None, income_variable: str = "equiv_hbai_household_net_income", entity: str | None = None, quantiles: int = 10, @@ -104,6 +111,19 @@ def calculate_decile_impacts( Returns: OutputCollection containing list of DecileImpact objects and DataFrame """ + baseline_simulation = Simulation( + dataset=dataset, + tax_benefit_model_version=tax_benefit_model_version, + policy=baseline_policy, + dynamic=dynamic, + ) + reform_simulation = Simulation( + dataset=dataset, + tax_benefit_model_version=tax_benefit_model_version, + policy=reform_policy, + dynamic=dynamic, + ) + results = [] for decile in range(1, quantiles + 1): impact = DecileImpact( diff --git a/src/policyengine/tax_benefit_models/uk/analysis.py b/src/policyengine/tax_benefit_models/uk/analysis.py index 40805bf2..e8069455 100644 --- a/src/policyengine/tax_benefit_models/uk/analysis.py +++ b/src/policyengine/tax_benefit_models/uk/analysis.py @@ -19,7 +19,7 @@ class PolicyReformAnalysis(BaseModel): programme_statistics: OutputCollection[ProgrammeStatistics] -def general_policy_reform_analysis( +def economic_impact_analysis( baseline_simulation: Simulation, reform_simulation: Simulation, ) -> PolicyReformAnalysis: @@ -28,10 +28,23 @@ def general_policy_reform_analysis( Returns: PolicyReformAnalysis containing decile impacts and programme statistics """ + baseline_simulation.ensure() + reform_simulation.ensure() + + assert ( + len(baseline_simulation.dataset.data.household) > 100 + ), "Baseline simulation must have more than 100 households" + assert ( + len(reform_simulation.dataset.data.household) > 100 + ), "Reform simulation must have more than 100 households" + # Decile impact decile_impacts = calculate_decile_impacts( - baseline_simulation=baseline_simulation, - reform_simulation=reform_simulation, + dataset=baseline_simulation.dataset, + tax_benefit_model_version=baseline_simulation.tax_benefit_model_version, + baseline_policy=baseline_simulation.policy, + reform_policy=reform_simulation.policy, + dynamic=baseline_simulation.dynamic, ) # Major programmes to analyse diff --git a/src/policyengine/tax_benefit_models/us/analysis.py b/src/policyengine/tax_benefit_models/us/analysis.py index c3098d45..c56f2bef 100644 --- a/src/policyengine/tax_benefit_models/us/analysis.py +++ b/src/policyengine/tax_benefit_models/us/analysis.py @@ -19,7 +19,7 @@ class PolicyReformAnalysis(BaseModel): program_statistics: OutputCollection[ProgramStatistics] -def general_policy_reform_analysis( +def economic_impact_analysis( baseline_simulation: Simulation, reform_simulation: Simulation, ) -> PolicyReformAnalysis: @@ -28,10 +28,23 @@ def general_policy_reform_analysis( Returns: PolicyReformAnalysis containing decile impacts and program statistics """ + baseline_simulation.ensure() + reform_simulation.ensure() + + assert ( + len(baseline_simulation.dataset.data.household) > 100 + ), "Baseline simulation must have more than 100 households" + assert ( + len(reform_simulation.dataset.data.household) > 100 + ), "Reform simulation must have more than 100 households" + # Decile impact (using household_net_income for US) decile_impacts = calculate_decile_impacts( - baseline_simulation=baseline_simulation, - reform_simulation=reform_simulation, + dataset=baseline_simulation.dataset, + tax_benefit_model_version=baseline_simulation.tax_benefit_model_version, + baseline_policy=baseline_simulation.policy, + reform_policy=reform_simulation.policy, + dynamic=baseline_simulation.dynamic, income_variable="household_net_income", ) From 0b1afd0dfd8e3e6f77a412bbb40b33ed971ebbb1 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Fri, 12 Dec 2025 18:04:04 +0000 Subject: [PATCH 2/5] Add household impacts --- .../tax_benefit_models/uk/__init__.py | 12 +- .../tax_benefit_models/uk/analysis.py | 144 +++++++++++++- .../tax_benefit_models/us/__init__.py | 12 +- .../tax_benefit_models/us/analysis.py | 160 +++++++++++++++- tests/test_household_impact.py | 176 ++++++++++++++++++ uv.lock | 2 +- 6 files changed, 499 insertions(+), 7 deletions(-) create mode 100644 tests/test_household_impact.py diff --git a/src/policyengine/tax_benefit_models/uk/__init__.py b/src/policyengine/tax_benefit_models/uk/__init__.py index d933589d..09e697b7 100644 --- a/src/policyengine/tax_benefit_models/uk/__init__.py +++ b/src/policyengine/tax_benefit_models/uk/__init__.py @@ -5,7 +5,12 @@ if find_spec("policyengine_uk") is not None: from policyengine.core import Dataset - from .analysis import general_policy_reform_analysis + from .analysis import ( + UKHouseholdInput, + UKHouseholdOutput, + calculate_household_impact, + economic_impact_analysis, + ) from .datasets import ( PolicyEngineUKDataset, UKYearData, @@ -37,7 +42,10 @@ "PolicyEngineUKLatest", "uk_model", "uk_latest", - "general_policy_reform_analysis", + "economic_impact_analysis", + "calculate_household_impact", + "UKHouseholdInput", + "UKHouseholdOutput", "ProgrammeStatistics", ] else: diff --git a/src/policyengine/tax_benefit_models/uk/analysis.py b/src/policyengine/tax_benefit_models/uk/analysis.py index e8069455..607e25aa 100644 --- a/src/policyengine/tax_benefit_models/uk/analysis.py +++ b/src/policyengine/tax_benefit_models/uk/analysis.py @@ -1,17 +1,159 @@ """General utility functions for UK policy reform analysis.""" +import tempfile +from pathlib import Path +from typing import Any + import pandas as pd -from pydantic import BaseModel +from microdf import MicroDataFrame +from pydantic import BaseModel, Field, create_model from policyengine.core import OutputCollection, Simulation +from policyengine.core.policy import Policy from policyengine.outputs.decile_impact import ( DecileImpact, calculate_decile_impacts, ) +from .datasets import PolicyEngineUKDataset, UKYearData +from .model import uk_latest from .outputs import ProgrammeStatistics +def _create_entity_output_model(entity: str, variables: list[str]) -> type[BaseModel]: + """Create a dynamic Pydantic model for entity output variables.""" + fields = {var: (float, ...) for var in variables} + return create_model(f"{entity.title()}Output", **fields) + + +# Create output models dynamically from uk_latest.entity_variables +PersonOutput = _create_entity_output_model("person", uk_latest.entity_variables["person"]) +BenunitOutput = _create_entity_output_model("benunit", uk_latest.entity_variables["benunit"]) +HouseholdEntityOutput = _create_entity_output_model("household", uk_latest.entity_variables["household"]) + + +class UKHouseholdOutput(BaseModel): + """Output from a UK household calculation with all entity data.""" + + person: list[dict[str, Any]] + benunit: list[dict[str, Any]] + household: dict[str, Any] + + +class UKHouseholdInput(BaseModel): + """Input for a UK household calculation.""" + + people: list[dict[str, Any]] + benunit: dict[str, Any] = Field(default_factory=dict) + household: dict[str, Any] = Field(default_factory=dict) + year: int = 2026 + + +def calculate_household_impact( + household_input: UKHouseholdInput, + policy: Policy | None = None, +) -> UKHouseholdOutput: + """Calculate tax and benefit impacts for a single UK household.""" + n_people = len(household_input.people) + + # Build person data with defaults + person_data = { + "person_id": list(range(n_people)), + "person_benunit_id": [0] * n_people, + "person_household_id": [0] * n_people, + "person_weight": [1.0] * n_people, + } + # Add user-provided person fields + for i, person in enumerate(household_input.people): + for key, value in person.items(): + if key not in person_data: + person_data[key] = [0.0] * n_people # Default to 0 for numeric fields + person_data[key][i] = value + + # Build benunit data with defaults + benunit_data = { + "benunit_id": [0], + "benunit_weight": [1.0], + } + for key, value in household_input.benunit.items(): + benunit_data[key] = [value] + + # Build household data with defaults (required for uprating) + household_data = { + "household_id": [0], + "household_weight": [1.0], + "region": ["LONDON"], + "tenure_type": ["RENT_PRIVATELY"], + "council_tax": [0.0], + "rent": [0.0], + } + for key, value in household_input.household.items(): + household_data[key] = [value] + + # Create MicroDataFrames + person_df = MicroDataFrame(pd.DataFrame(person_data), weights="person_weight") + benunit_df = MicroDataFrame(pd.DataFrame(benunit_data), weights="benunit_weight") + household_df = MicroDataFrame(pd.DataFrame(household_data), weights="household_weight") + + # Create temporary dataset + tmpdir = tempfile.mkdtemp() + filepath = str(Path(tmpdir) / "household_impact.h5") + + dataset = PolicyEngineUKDataset( + name="Household impact calculation", + description="Single household for impact calculation", + filepath=filepath, + year=household_input.year, + data=UKYearData( + person=person_df, + benunit=benunit_df, + household=household_df, + ), + ) + + # Run simulation + simulation = Simulation( + dataset=dataset, + tax_benefit_model_version=uk_latest, + policy=policy, + ) + simulation.run() + + # Extract all output variables defined in entity_variables + output_data = simulation.output_dataset.data + + def safe_convert(value): + """Convert value to float if numeric, otherwise return as string.""" + try: + return float(value) + except (ValueError, TypeError): + return str(value) + + person_outputs = [] + for i in range(n_people): + person_dict = {} + for var in uk_latest.entity_variables["person"]: + person_dict[var] = safe_convert(output_data.person[var].iloc[i]) + person_outputs.append(person_dict) + + benunit_outputs = [] + for i in range(len(output_data.benunit)): + benunit_dict = {} + for var in uk_latest.entity_variables["benunit"]: + benunit_dict[var] = safe_convert(output_data.benunit[var].iloc[i]) + benunit_outputs.append(benunit_dict) + + household_dict = {} + for var in uk_latest.entity_variables["household"]: + household_dict[var] = safe_convert(output_data.household[var].iloc[0]) + + return UKHouseholdOutput( + person=person_outputs, + benunit=benunit_outputs, + household=household_dict, + ) + + class PolicyReformAnalysis(BaseModel): """Complete policy reform analysis result.""" diff --git a/src/policyengine/tax_benefit_models/us/__init__.py b/src/policyengine/tax_benefit_models/us/__init__.py index 26d9da96..b5a95b3f 100644 --- a/src/policyengine/tax_benefit_models/us/__init__.py +++ b/src/policyengine/tax_benefit_models/us/__init__.py @@ -5,7 +5,12 @@ if find_spec("policyengine_us") is not None: from policyengine.core import Dataset - from .analysis import general_policy_reform_analysis + from .analysis import ( + USHouseholdInput, + USHouseholdOutput, + calculate_household_impact, + economic_impact_analysis, + ) from .datasets import ( PolicyEngineUSDataset, USYearData, @@ -37,7 +42,10 @@ "PolicyEngineUSLatest", "us_model", "us_latest", - "general_policy_reform_analysis", + "economic_impact_analysis", + "calculate_household_impact", + "USHouseholdInput", + "USHouseholdOutput", "ProgramStatistics", ] else: diff --git a/src/policyengine/tax_benefit_models/us/analysis.py b/src/policyengine/tax_benefit_models/us/analysis.py index c56f2bef..3c77e6fa 100644 --- a/src/policyengine/tax_benefit_models/us/analysis.py +++ b/src/policyengine/tax_benefit_models/us/analysis.py @@ -1,17 +1,175 @@ """General utility functions for US policy reform analysis.""" +import tempfile +from pathlib import Path +from typing import Any + import pandas as pd -from pydantic import BaseModel +from microdf import MicroDataFrame +from pydantic import BaseModel, Field from policyengine.core import OutputCollection, Simulation +from policyengine.core.policy import Policy from policyengine.outputs.decile_impact import ( DecileImpact, calculate_decile_impacts, ) +from .datasets import PolicyEngineUSDataset, USYearData +from .model import us_latest from .outputs import ProgramStatistics +class USHouseholdOutput(BaseModel): + """Output from a US household calculation with all entity data.""" + + person: list[dict[str, Any]] + marital_unit: list[dict[str, Any]] + family: list[dict[str, Any]] + spm_unit: list[dict[str, Any]] + tax_unit: list[dict[str, Any]] + household: dict[str, Any] + + +class USHouseholdInput(BaseModel): + """Input for a US household calculation.""" + + people: list[dict[str, Any]] + marital_unit: dict[str, Any] = Field(default_factory=dict) + family: dict[str, Any] = Field(default_factory=dict) + spm_unit: dict[str, Any] = Field(default_factory=dict) + tax_unit: dict[str, Any] = Field(default_factory=dict) + household: dict[str, Any] = Field(default_factory=dict) + year: int = 2024 + + +def calculate_household_impact( + household_input: USHouseholdInput, + policy: Policy | None = None, +) -> USHouseholdOutput: + """Calculate tax and benefit impacts for a single US household.""" + n_people = len(household_input.people) + + # Build person data with defaults + person_data = { + "person_id": list(range(n_people)), + "person_household_id": [0] * n_people, + "person_marital_unit_id": [0] * n_people, + "person_family_id": [0] * n_people, + "person_spm_unit_id": [0] * n_people, + "person_tax_unit_id": [0] * n_people, + "person_weight": [1.0] * n_people, + } + # Add user-provided person fields + for i, person in enumerate(household_input.people): + for key, value in person.items(): + if key not in person_data: + person_data[key] = [0.0] * n_people # Default to 0 for numeric fields + person_data[key][i] = value + + # Build entity data with defaults + household_data = { + "household_id": [0], + "household_weight": [1.0], + } + for key, value in household_input.household.items(): + household_data[key] = [value] + + marital_unit_data = { + "marital_unit_id": [0], + "marital_unit_weight": [1.0], + } + for key, value in household_input.marital_unit.items(): + marital_unit_data[key] = [value] + + family_data = { + "family_id": [0], + "family_weight": [1.0], + } + for key, value in household_input.family.items(): + family_data[key] = [value] + + spm_unit_data = { + "spm_unit_id": [0], + "spm_unit_weight": [1.0], + } + for key, value in household_input.spm_unit.items(): + spm_unit_data[key] = [value] + + tax_unit_data = { + "tax_unit_id": [0], + "tax_unit_weight": [1.0], + } + for key, value in household_input.tax_unit.items(): + tax_unit_data[key] = [value] + + # Create MicroDataFrames + person_df = MicroDataFrame(pd.DataFrame(person_data), weights="person_weight") + household_df = MicroDataFrame(pd.DataFrame(household_data), weights="household_weight") + marital_unit_df = MicroDataFrame(pd.DataFrame(marital_unit_data), weights="marital_unit_weight") + family_df = MicroDataFrame(pd.DataFrame(family_data), weights="family_weight") + spm_unit_df = MicroDataFrame(pd.DataFrame(spm_unit_data), weights="spm_unit_weight") + tax_unit_df = MicroDataFrame(pd.DataFrame(tax_unit_data), weights="tax_unit_weight") + + # Create temporary dataset + tmpdir = tempfile.mkdtemp() + filepath = str(Path(tmpdir) / "household_impact.h5") + + dataset = PolicyEngineUSDataset( + name="Household impact calculation", + description="Single household for impact calculation", + filepath=filepath, + year=household_input.year, + data=USYearData( + person=person_df, + household=household_df, + marital_unit=marital_unit_df, + family=family_df, + spm_unit=spm_unit_df, + tax_unit=tax_unit_df, + ), + ) + + # Run simulation + simulation = Simulation( + dataset=dataset, + tax_benefit_model_version=us_latest, + policy=policy, + ) + simulation.run() + + # Extract all output variables defined in entity_variables + output_data = simulation.output_dataset.data + + def safe_convert(value): + """Convert value to float if numeric, otherwise return as string.""" + try: + return float(value) + except (ValueError, TypeError): + return str(value) + + def extract_entity_outputs(entity_name: str, entity_data, n_rows: int) -> list[dict[str, Any]]: + outputs = [] + for i in range(n_rows): + row_dict = {} + for var in us_latest.entity_variables[entity_name]: + row_dict[var] = safe_convert(entity_data[var].iloc[i]) + outputs.append(row_dict) + return outputs + + return USHouseholdOutput( + person=extract_entity_outputs("person", output_data.person, n_people), + marital_unit=extract_entity_outputs("marital_unit", output_data.marital_unit, 1), + family=extract_entity_outputs("family", output_data.family, 1), + spm_unit=extract_entity_outputs("spm_unit", output_data.spm_unit, 1), + tax_unit=extract_entity_outputs("tax_unit", output_data.tax_unit, 1), + household={ + var: safe_convert(output_data.household[var].iloc[0]) + for var in us_latest.entity_variables["household"] + }, + ) + + class PolicyReformAnalysis(BaseModel): """Complete policy reform analysis result.""" diff --git a/tests/test_household_impact.py b/tests/test_household_impact.py new file mode 100644 index 00000000..0ddf70f7 --- /dev/null +++ b/tests/test_household_impact.py @@ -0,0 +1,176 @@ +"""Tests for calculate_household_impact functions.""" + +import pytest + +from policyengine.tax_benefit_models.uk import ( + UKHouseholdInput, + UKHouseholdOutput, + calculate_household_impact as calculate_uk_household_impact, + uk_latest, +) +from policyengine.tax_benefit_models.us import ( + USHouseholdInput, + USHouseholdOutput, + calculate_household_impact as calculate_us_household_impact, + us_latest, +) + + +class TestUKHouseholdImpact: + """Tests for UK calculate_household_impact.""" + + def test_single_adult_no_income(self): + """Single adult with no income should have output for all entity variables.""" + household = UKHouseholdInput( + people=[{"age": 30}], + year=2026, + ) + result = calculate_uk_household_impact(household) + + assert isinstance(result, UKHouseholdOutput) + assert len(result.person) == 1 + assert len(result.benunit) == 1 + assert "hbai_household_net_income" in result.household + + def test_single_adult_with_employment_income(self): + """Single adult with employment income should pay tax.""" + household = UKHouseholdInput( + people=[{"age": 30, "employment_income": 50000}], + year=2026, + ) + result = calculate_uk_household_impact(household) + + assert isinstance(result, UKHouseholdOutput) + assert result.person[0]["income_tax"] > 0 + assert result.person[0]["national_insurance"] > 0 + assert result.household["hbai_household_net_income"] > 0 + + def test_family_with_children(self): + """Family with children should receive child benefit.""" + household = UKHouseholdInput( + people=[ + {"age": 35, "employment_income": 30000}, + {"age": 8}, + {"age": 5}, + ], + benunit={"would_claim_child_benefit": True}, + year=2026, + ) + result = calculate_uk_household_impact(household) + + assert isinstance(result, UKHouseholdOutput) + assert len(result.person) == 3 + assert result.benunit[0]["child_benefit"] > 0 + + def test_output_contains_all_entity_variables(self): + """Output should contain all variables from entity_variables.""" + household = UKHouseholdInput( + people=[{"age": 30, "employment_income": 25000}], + year=2026, + ) + result = calculate_uk_household_impact(household) + + # Check all household variables are present + for var in uk_latest.entity_variables["household"]: + assert var in result.household, f"Missing household variable: {var}" + + # Check all person variables are present + for var in uk_latest.entity_variables["person"]: + assert var in result.person[0], f"Missing person variable: {var}" + + # Check all benunit variables are present + for var in uk_latest.entity_variables["benunit"]: + assert var in result.benunit[0], f"Missing benunit variable: {var}" + + def test_output_is_json_serializable(self): + """Output should be JSON serializable.""" + household = UKHouseholdInput( + people=[{"age": 30, "employment_income": 25000}], + year=2026, + ) + result = calculate_uk_household_impact(household) + + json_dict = result.model_dump() + assert isinstance(json_dict, dict) + assert "household" in json_dict + assert "person" in json_dict + + def test_input_is_json_serializable(self): + """Input should be JSON serializable.""" + household = UKHouseholdInput( + people=[{"age": 30, "employment_income": 25000}], + year=2026, + ) + + json_dict = household.model_dump() + assert isinstance(json_dict, dict) + assert "people" in json_dict + + +class TestUSHouseholdImpact: + """Tests for US calculate_household_impact.""" + + def test_single_adult_no_income(self): + """Single adult with no income.""" + household = USHouseholdInput( + people=[{"age": 30, "is_tax_unit_head": True}], + year=2024, + ) + result = calculate_us_household_impact(household) + + assert isinstance(result, USHouseholdOutput) + assert len(result.person) == 1 + assert "household_net_income" in result.household + + def test_single_adult_with_employment_income(self): + """Single adult with employment income should pay tax.""" + household = USHouseholdInput( + people=[{"age": 30, "employment_income": 50000, "is_tax_unit_head": True}], + tax_unit={"filing_status": "SINGLE"}, + year=2024, + ) + result = calculate_us_household_impact(household) + + assert isinstance(result, USHouseholdOutput) + assert result.tax_unit[0]["income_tax"] > 0 + assert result.household["household_net_income"] > 0 + + def test_output_contains_all_entity_variables(self): + """Output should contain all variables from entity_variables.""" + household = USHouseholdInput( + people=[{"age": 30, "employment_income": 25000, "is_tax_unit_head": True}], + year=2024, + ) + result = calculate_us_household_impact(household) + + # Check all household variables are present + for var in us_latest.entity_variables["household"]: + assert var in result.household, f"Missing household variable: {var}" + + # Check all person variables are present + for var in us_latest.entity_variables["person"]: + assert var in result.person[0], f"Missing person variable: {var}" + + def test_output_is_json_serializable(self): + """Output should be JSON serializable.""" + household = USHouseholdInput( + people=[{"age": 30, "employment_income": 25000, "is_tax_unit_head": True}], + year=2024, + ) + result = calculate_us_household_impact(household) + + json_dict = result.model_dump() + assert isinstance(json_dict, dict) + assert "household" in json_dict + assert "person" in json_dict + + def test_input_is_json_serializable(self): + """Input should be JSON serializable.""" + household = USHouseholdInput( + people=[{"age": 30, "employment_income": 25000, "is_tax_unit_head": True}], + year=2024, + ) + + json_dict = household.model_dump() + assert isinstance(json_dict, dict) + assert "people" in json_dict diff --git a/uv.lock b/uv.lock index 8b519db7..55fe0b9c 100644 --- a/uv.lock +++ b/uv.lock @@ -1080,7 +1080,7 @@ wheels = [ [[package]] name = "policyengine" -version = "3.1.13" +version = "3.1.14" source = { editable = "." } dependencies = [ { name = "microdf-python" }, From 3b008ed100c5351174975bde67b19f0ddc20de23 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Sun, 14 Dec 2025 23:43:00 +0000 Subject: [PATCH 3/5] Add household impact example --- examples/household_impact_example.py | 124 ++++++++++++++++++ .../core/tax_benefit_model_version.py | 2 +- .../tax_benefit_models/uk/analysis.py | 44 +++++-- .../tax_benefit_models/uk/model.py | 5 + .../tax_benefit_models/us/analysis.py | 48 ++++--- tests/test_household_impact.py | 40 +++++- 6 files changed, 227 insertions(+), 36 deletions(-) create mode 100644 examples/household_impact_example.py diff --git a/examples/household_impact_example.py b/examples/household_impact_example.py new file mode 100644 index 00000000..f4f63236 --- /dev/null +++ b/examples/household_impact_example.py @@ -0,0 +1,124 @@ +"""Example: Calculate household tax and benefit impacts. + +This script demonstrates using calculate_household_impact for both UK and US +to compute taxes and benefits for custom households. + +Run: python examples/household_impact_example.py +""" + +from policyengine.tax_benefit_models.uk import ( + UKHouseholdInput, + calculate_household_impact as calculate_uk_impact, +) +from policyengine.tax_benefit_models.us import ( + USHouseholdInput, + calculate_household_impact as calculate_us_impact, +) + + +def uk_example(): + """UK household impact example.""" + print("=" * 60) + print("UK HOUSEHOLD IMPACT") + print("=" * 60) + + # Single adult earning £50,000 + household = UKHouseholdInput( + people=[{"age": 35, "employment_income": 50_000}], + year=2026, + ) + result = calculate_uk_impact(household) + + print("\nSingle adult, £50k income:") + print( + f" Net income: £{result.household['hbai_household_net_income']:,.0f}" + ) + print(f" Income tax: £{result.person[0]['income_tax']:,.0f}") + print( + f" National Insurance: £{result.person[0]['national_insurance']:,.0f}" + ) + print(f" Total tax: £{result.household['household_tax']:,.0f}") + + # Family with two children, £30k income, renting + household = UKHouseholdInput( + people=[ + {"age": 35, "employment_income": 30_000}, + {"age": 33}, + {"age": 8}, + {"age": 5}, + ], + benunit={ + "would_claim_uc": True, + "would_claim_child_benefit": True, + }, + household={ + "rent": 12_000, # £1k/month + "region": "NORTH_WEST", + }, + year=2026, + ) + result = calculate_uk_impact(household) + + print("\nFamily (2 adults, 2 children), £30k income, renting:") + print( + f" Net income: £{result.household['hbai_household_net_income']:,.0f}" + ) + print(f" Income tax: £{result.person[0]['income_tax']:,.0f}") + print(f" Child benefit: £{result.benunit[0]['child_benefit']:,.0f}") + print(f" Universal credit: £{result.benunit[0]['universal_credit']:,.0f}") + print(f" Total benefits: £{result.household['household_benefits']:,.0f}") + + +def us_example(): + """US household impact example.""" + print("\n" + "=" * 60) + print("US HOUSEHOLD IMPACT") + print("=" * 60) + + # Single adult earning $50,000 + household = USHouseholdInput( + people=[ + {"age": 35, "employment_income": 50_000, "is_tax_unit_head": True} + ], + tax_unit={"filing_status": "SINGLE"}, + household={"state_code_str": "CA"}, + year=2024, + ) + result = calculate_us_impact(household) + + print("\nSingle adult, $50k income (California):") + print(f" Net income: ${result.household['household_net_income']:,.0f}") + print(f" Income tax: ${result.tax_unit[0]['income_tax']:,.0f}") + print(f" Payroll tax: ${result.tax_unit[0]['employee_payroll_tax']:,.0f}") + + # Married couple with children, lower income + household = USHouseholdInput( + people=[ + {"age": 35, "employment_income": 40_000, "is_tax_unit_head": True}, + {"age": 33, "is_tax_unit_spouse": True}, + {"age": 8, "is_tax_unit_dependent": True}, + {"age": 5, "is_tax_unit_dependent": True}, + ], + tax_unit={"filing_status": "JOINT"}, + household={"state_code_str": "TX"}, + year=2024, + ) + result = calculate_us_impact(household) + + print("\nMarried couple with 2 children, $40k income (Texas):") + print(f" Net income: ${result.household['household_net_income']:,.0f}") + print(f" Federal income tax: ${result.tax_unit[0]['income_tax']:,.0f}") + print(f" EITC: ${result.tax_unit[0]['eitc']:,.0f}") + print(f" Child tax credit: ${result.tax_unit[0]['ctc']:,.0f}") + print(f" SNAP: ${result.spm_unit[0]['snap']:,.0f}") + + +def main(): + uk_example() + us_example() + print("\n" + "=" * 60) + print("Done!") + + +if __name__ == "__main__": + main() diff --git a/src/policyengine/core/tax_benefit_model_version.py b/src/policyengine/core/tax_benefit_model_version.py index bd172c47..dd294388 100644 --- a/src/policyengine/core/tax_benefit_model_version.py +++ b/src/policyengine/core/tax_benefit_model_version.py @@ -32,7 +32,7 @@ def parameter_values(self) -> list["ParameterValue"]: pv for parameter in self.parameters for pv in parameter.parameter_values - ) + ) # Lookup dicts for O(1) access (excluded from serialization) variables_by_name: dict[str, "Variable"] = Field( diff --git a/src/policyengine/tax_benefit_models/uk/analysis.py b/src/policyengine/tax_benefit_models/uk/analysis.py index 607e25aa..5d633619 100644 --- a/src/policyengine/tax_benefit_models/uk/analysis.py +++ b/src/policyengine/tax_benefit_models/uk/analysis.py @@ -20,16 +20,24 @@ from .outputs import ProgrammeStatistics -def _create_entity_output_model(entity: str, variables: list[str]) -> type[BaseModel]: +def _create_entity_output_model( + entity: str, variables: list[str] +) -> type[BaseModel]: """Create a dynamic Pydantic model for entity output variables.""" fields = {var: (float, ...) for var in variables} return create_model(f"{entity.title()}Output", **fields) # Create output models dynamically from uk_latest.entity_variables -PersonOutput = _create_entity_output_model("person", uk_latest.entity_variables["person"]) -BenunitOutput = _create_entity_output_model("benunit", uk_latest.entity_variables["benunit"]) -HouseholdEntityOutput = _create_entity_output_model("household", uk_latest.entity_variables["household"]) +PersonOutput = _create_entity_output_model( + "person", uk_latest.entity_variables["person"] +) +BenunitOutput = _create_entity_output_model( + "benunit", uk_latest.entity_variables["benunit"] +) +HouseholdEntityOutput = _create_entity_output_model( + "household", uk_latest.entity_variables["household"] +) class UKHouseholdOutput(BaseModel): @@ -67,7 +75,9 @@ def calculate_household_impact( for i, person in enumerate(household_input.people): for key, value in person.items(): if key not in person_data: - person_data[key] = [0.0] * n_people # Default to 0 for numeric fields + person_data[key] = [ + 0.0 + ] * n_people # Default to 0 for numeric fields person_data[key][i] = value # Build benunit data with defaults @@ -91,9 +101,15 @@ def calculate_household_impact( household_data[key] = [value] # Create MicroDataFrames - person_df = MicroDataFrame(pd.DataFrame(person_data), weights="person_weight") - benunit_df = MicroDataFrame(pd.DataFrame(benunit_data), weights="benunit_weight") - household_df = MicroDataFrame(pd.DataFrame(household_data), weights="household_weight") + person_df = MicroDataFrame( + pd.DataFrame(person_data), weights="person_weight" + ) + benunit_df = MicroDataFrame( + pd.DataFrame(benunit_data), weights="benunit_weight" + ) + household_df = MicroDataFrame( + pd.DataFrame(household_data), weights="household_weight" + ) # Create temporary dataset tmpdir = tempfile.mkdtemp() @@ -173,12 +189,12 @@ def economic_impact_analysis( baseline_simulation.ensure() reform_simulation.ensure() - assert ( - len(baseline_simulation.dataset.data.household) > 100 - ), "Baseline simulation must have more than 100 households" - assert ( - len(reform_simulation.dataset.data.household) > 100 - ), "Reform simulation must have more than 100 households" + assert len(baseline_simulation.dataset.data.household) > 100, ( + "Baseline simulation must have more than 100 households" + ) + assert len(reform_simulation.dataset.data.household) > 100, ( + "Reform simulation must have more than 100 households" + ) # Decile impact decile_impacts = calculate_decile_impacts( diff --git a/src/policyengine/tax_benefit_models/uk/model.py b/src/policyengine/tax_benefit_models/uk/model.py index 27f6a5f6..4d7f3ac8 100644 --- a/src/policyengine/tax_benefit_models/uk/model.py +++ b/src/policyengine/tax_benefit_models/uk/model.py @@ -108,6 +108,11 @@ class PolicyEngineUKLatest(TaxBenefitModelVersion): "rent", "council_tax", "tenure_type", + # Poverty measures + "in_poverty_bhc", + "in_poverty_ahc", + "in_relative_poverty_bhc", + "in_relative_poverty_ahc", ], } diff --git a/src/policyengine/tax_benefit_models/us/analysis.py b/src/policyengine/tax_benefit_models/us/analysis.py index 3c77e6fa..f626a8c6 100644 --- a/src/policyengine/tax_benefit_models/us/analysis.py +++ b/src/policyengine/tax_benefit_models/us/analysis.py @@ -64,7 +64,9 @@ def calculate_household_impact( for i, person in enumerate(household_input.people): for key, value in person.items(): if key not in person_data: - person_data[key] = [0.0] * n_people # Default to 0 for numeric fields + person_data[key] = [ + 0.0 + ] * n_people # Default to 0 for numeric fields person_data[key][i] = value # Build entity data with defaults @@ -104,12 +106,24 @@ def calculate_household_impact( tax_unit_data[key] = [value] # Create MicroDataFrames - person_df = MicroDataFrame(pd.DataFrame(person_data), weights="person_weight") - household_df = MicroDataFrame(pd.DataFrame(household_data), weights="household_weight") - marital_unit_df = MicroDataFrame(pd.DataFrame(marital_unit_data), weights="marital_unit_weight") - family_df = MicroDataFrame(pd.DataFrame(family_data), weights="family_weight") - spm_unit_df = MicroDataFrame(pd.DataFrame(spm_unit_data), weights="spm_unit_weight") - tax_unit_df = MicroDataFrame(pd.DataFrame(tax_unit_data), weights="tax_unit_weight") + person_df = MicroDataFrame( + pd.DataFrame(person_data), weights="person_weight" + ) + household_df = MicroDataFrame( + pd.DataFrame(household_data), weights="household_weight" + ) + marital_unit_df = MicroDataFrame( + pd.DataFrame(marital_unit_data), weights="marital_unit_weight" + ) + family_df = MicroDataFrame( + pd.DataFrame(family_data), weights="family_weight" + ) + spm_unit_df = MicroDataFrame( + pd.DataFrame(spm_unit_data), weights="spm_unit_weight" + ) + tax_unit_df = MicroDataFrame( + pd.DataFrame(tax_unit_data), weights="tax_unit_weight" + ) # Create temporary dataset tmpdir = tempfile.mkdtemp() @@ -148,7 +162,9 @@ def safe_convert(value): except (ValueError, TypeError): return str(value) - def extract_entity_outputs(entity_name: str, entity_data, n_rows: int) -> list[dict[str, Any]]: + def extract_entity_outputs( + entity_name: str, entity_data, n_rows: int + ) -> list[dict[str, Any]]: outputs = [] for i in range(n_rows): row_dict = {} @@ -159,7 +175,9 @@ def extract_entity_outputs(entity_name: str, entity_data, n_rows: int) -> list[d return USHouseholdOutput( person=extract_entity_outputs("person", output_data.person, n_people), - marital_unit=extract_entity_outputs("marital_unit", output_data.marital_unit, 1), + marital_unit=extract_entity_outputs( + "marital_unit", output_data.marital_unit, 1 + ), family=extract_entity_outputs("family", output_data.family, 1), spm_unit=extract_entity_outputs("spm_unit", output_data.spm_unit, 1), tax_unit=extract_entity_outputs("tax_unit", output_data.tax_unit, 1), @@ -189,12 +207,12 @@ def economic_impact_analysis( baseline_simulation.ensure() reform_simulation.ensure() - assert ( - len(baseline_simulation.dataset.data.household) > 100 - ), "Baseline simulation must have more than 100 households" - assert ( - len(reform_simulation.dataset.data.household) > 100 - ), "Reform simulation must have more than 100 households" + assert len(baseline_simulation.dataset.data.household) > 100, ( + "Baseline simulation must have more than 100 households" + ) + assert len(reform_simulation.dataset.data.household) > 100, ( + "Reform simulation must have more than 100 households" + ) # Decile impact (using household_net_income for US) decile_impacts = calculate_decile_impacts( diff --git a/tests/test_household_impact.py b/tests/test_household_impact.py index 0ddf70f7..92238ed1 100644 --- a/tests/test_household_impact.py +++ b/tests/test_household_impact.py @@ -72,7 +72,9 @@ def test_output_contains_all_entity_variables(self): # Check all household variables are present for var in uk_latest.entity_variables["household"]: - assert var in result.household, f"Missing household variable: {var}" + assert var in result.household, ( + f"Missing household variable: {var}" + ) # Check all person variables are present for var in uk_latest.entity_variables["person"]: @@ -125,7 +127,13 @@ def test_single_adult_no_income(self): def test_single_adult_with_employment_income(self): """Single adult with employment income should pay tax.""" household = USHouseholdInput( - people=[{"age": 30, "employment_income": 50000, "is_tax_unit_head": True}], + people=[ + { + "age": 30, + "employment_income": 50000, + "is_tax_unit_head": True, + } + ], tax_unit={"filing_status": "SINGLE"}, year=2024, ) @@ -138,14 +146,22 @@ def test_single_adult_with_employment_income(self): def test_output_contains_all_entity_variables(self): """Output should contain all variables from entity_variables.""" household = USHouseholdInput( - people=[{"age": 30, "employment_income": 25000, "is_tax_unit_head": True}], + people=[ + { + "age": 30, + "employment_income": 25000, + "is_tax_unit_head": True, + } + ], year=2024, ) result = calculate_us_household_impact(household) # Check all household variables are present for var in us_latest.entity_variables["household"]: - assert var in result.household, f"Missing household variable: {var}" + assert var in result.household, ( + f"Missing household variable: {var}" + ) # Check all person variables are present for var in us_latest.entity_variables["person"]: @@ -154,7 +170,13 @@ def test_output_contains_all_entity_variables(self): def test_output_is_json_serializable(self): """Output should be JSON serializable.""" household = USHouseholdInput( - people=[{"age": 30, "employment_income": 25000, "is_tax_unit_head": True}], + people=[ + { + "age": 30, + "employment_income": 25000, + "is_tax_unit_head": True, + } + ], year=2024, ) result = calculate_us_household_impact(household) @@ -167,7 +189,13 @@ def test_output_is_json_serializable(self): def test_input_is_json_serializable(self): """Input should be JSON serializable.""" household = USHouseholdInput( - people=[{"age": 30, "employment_income": 25000, "is_tax_unit_head": True}], + people=[ + { + "age": 30, + "employment_income": 25000, + "is_tax_unit_head": True, + } + ], year=2024, ) From 12f58d92a3056f600eefdd946ab89c52289a6d85 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Sun, 14 Dec 2025 23:49:00 +0000 Subject: [PATCH 4/5] Pass tests --- examples/household_impact_example.py | 4 ++++ src/policyengine/core/tax_benefit_model_version.py | 4 ++-- src/policyengine/tax_benefit_models/uk/model.py | 2 -- src/policyengine/tax_benefit_models/us/model.py | 2 -- tests/test_household_impact.py | 9 ++++++--- tests/test_models.py | 1 - 6 files changed, 12 insertions(+), 10 deletions(-) diff --git a/examples/household_impact_example.py b/examples/household_impact_example.py index f4f63236..3474447b 100644 --- a/examples/household_impact_example.py +++ b/examples/household_impact_example.py @@ -8,10 +8,14 @@ from policyengine.tax_benefit_models.uk import ( UKHouseholdInput, +) +from policyengine.tax_benefit_models.uk import ( calculate_household_impact as calculate_uk_impact, ) from policyengine.tax_benefit_models.us import ( USHouseholdInput, +) +from policyengine.tax_benefit_models.us import ( calculate_household_impact as calculate_us_impact, ) diff --git a/src/policyengine/core/tax_benefit_model_version.py b/src/policyengine/core/tax_benefit_model_version.py index dd294388..be9d5af3 100644 --- a/src/policyengine/core/tax_benefit_model_version.py +++ b/src/policyengine/core/tax_benefit_model_version.py @@ -28,11 +28,11 @@ class TaxBenefitModelVersion(BaseModel): @property def parameter_values(self) -> list["ParameterValue"]: """Aggregate all parameter values from all parameters.""" - yield from ( + return [ pv for parameter in self.parameters for pv in parameter.parameter_values - ) + ] # Lookup dicts for O(1) access (excluded from serialization) variables_by_name: dict[str, "Variable"] = Field( diff --git a/src/policyengine/tax_benefit_models/uk/model.py b/src/policyengine/tax_benefit_models/uk/model.py index 4d7f3ac8..f547ac59 100644 --- a/src/policyengine/tax_benefit_models/uk/model.py +++ b/src/policyengine/tax_benefit_models/uk/model.py @@ -9,12 +9,10 @@ from policyengine.core import ( Parameter, - ParameterValue, TaxBenefitModel, TaxBenefitModelVersion, Variable, ) -from policyengine.utils import parse_safe_date from .datasets import PolicyEngineUKDataset, UKYearData diff --git a/src/policyengine/tax_benefit_models/us/model.py b/src/policyengine/tax_benefit_models/us/model.py index 9180444a..17844feb 100644 --- a/src/policyengine/tax_benefit_models/us/model.py +++ b/src/policyengine/tax_benefit_models/us/model.py @@ -9,12 +9,10 @@ from policyengine.core import ( Parameter, - ParameterValue, TaxBenefitModel, TaxBenefitModelVersion, Variable, ) -from policyengine.utils import parse_safe_date from .datasets import PolicyEngineUSDataset, USYearData diff --git a/tests/test_household_impact.py b/tests/test_household_impact.py index 92238ed1..a6616219 100644 --- a/tests/test_household_impact.py +++ b/tests/test_household_impact.py @@ -1,19 +1,22 @@ """Tests for calculate_household_impact functions.""" -import pytest from policyengine.tax_benefit_models.uk import ( UKHouseholdInput, UKHouseholdOutput, - calculate_household_impact as calculate_uk_household_impact, uk_latest, ) +from policyengine.tax_benefit_models.uk import ( + calculate_household_impact as calculate_uk_household_impact, +) from policyengine.tax_benefit_models.us import ( USHouseholdInput, USHouseholdOutput, - calculate_household_impact as calculate_us_household_impact, us_latest, ) +from policyengine.tax_benefit_models.us import ( + calculate_household_impact as calculate_us_household_impact, +) class TestUKHouseholdImpact: diff --git a/tests/test_models.py b/tests/test_models.py index 686dc7c3..02936f62 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,6 +1,5 @@ """Tests for UK and US tax-benefit model versions.""" -import pytest from policyengine.tax_benefit_models.uk import uk_latest from policyengine.tax_benefit_models.us import us_latest From 57fa570ccde2b1bdd46f7da0b2a81196d809743e Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Sun, 14 Dec 2025 23:51:01 +0000 Subject: [PATCH 5/5] Add household impacts with schema --- changelog_entry.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..c33e2c5d 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + added: + - Household impacts