From 0b00952b0b04df3bc00b382645fc768d78387930 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 31 Jan 2026 11:51:47 -0500 Subject: [PATCH] Fix federal income tax calibration to use income_tax_positive CBO reports income tax receipts where refundable credit payments in excess of liability are outlays, not negative receipts. Changes the calibration target from income_tax to income_tax_positive to match. Recreated from PR #495 rebased onto main. Closes #494 Co-Authored-By: Claude Opus 4.5 --- .beads/issues.jsonl | 4 +++ changelog_entry.yaml | 4 +++ .../db/etl_national_targets.py | 28 +++++++++++++++---- 3 files changed, 30 insertions(+), 6 deletions(-) create mode 100644 .beads/issues.jsonl diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl new file mode 100644 index 000000000..a9dbe60f7 --- /dev/null +++ b/.beads/issues.jsonl @@ -0,0 +1,4 @@ +{"id":"policyengine-us-data-apq","title":"Add age and demographics to pre-tax contribution QRF imputation","description":"The QRF in puf.py that imputes pre_tax_contributions from CPS to PUF uses only employment_income as a predictor. Age, filing status, and number of dependents are strong predictors of 401(k) participation and contribution rates. Adding these should improve the distributional accuracy.","status":"closed","priority":2,"issue_type":"feature","created_at":"2026-01-31T08:01:22.72749-05:00","updated_at":"2026-01-31T08:08:02.675063-05:00","closed_at":"2026-01-31T08:08:02.675063-05:00"} +{"id":"policyengine-us-data-jhh","title":"Parameterize retirement contribution limits by year","description":"The contribution waterfall in cps.py hardcodes 2022 limits ($20,500 401k, $6,500 catch-up, $6,000 IRA, $1,000 IRA catch-up). These should be pulled from PolicyEngine parameters or a year-indexed lookup so the dataset builds correctly for any year.","status":"closed","priority":2,"issue_type":"bug","created_at":"2026-01-31T08:01:18.941246-05:00","updated_at":"2026-01-31T08:08:02.614396-05:00","closed_at":"2026-01-31T08:08:02.614396-05:00"} +{"id":"policyengine-us-data-mnw","title":"Use SS_SC source code for Social Security retirement/disability split","description":"Currently cps.py uses a hard age-62 cutoff to split SS into retirement vs disability. The CPS ASEC has SS_SC (Social Security source codes) that distinguish retirement, disability, and survivor benefits. Use these codes instead of the age heuristic.","status":"closed","priority":2,"issue_type":"bug","created_at":"2026-01-31T08:01:21.01419-05:00","updated_at":"2026-01-31T08:08:02.644611-05:00","closed_at":"2026-01-31T08:08:02.644611-05:00"} +{"id":"policyengine-us-data-x4q","title":"Calibrate taxable pension fraction from SOI data","description":"imputation_parameters.yaml sets taxable_pension_fraction to 1.0 with the comment 'no SOI data, so arbitrary assumption.' But the SOI targets CSV includes both total_pension_income and taxable_pension_income by AGI bracket. Use the ratio of these to set a data-driven fraction instead of assuming 100% taxable.","status":"closed","priority":2,"issue_type":"bug","created_at":"2026-01-31T08:01:24.590331-05:00","updated_at":"2026-01-31T08:08:02.70425-05:00","closed_at":"2026-01-31T08:08:02.70425-05:00"} diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb..18321ccc5 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: minor + changes: + changed: + - Use income_tax_positive instead of income_tax for CBO calibration target diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py index 5cb910d5b..b25e2c2df 100644 --- a/policyengine_us_data/db/etl_national_targets.py +++ b/policyengine_us_data/db/etl_national_targets.py @@ -260,19 +260,29 @@ def extract_national_targets(): # CBO projection targets - get for a specific year CBO_YEAR = 2023 # Year the CBO projections are for cbo_vars = [ - "income_tax", + # Note: income_tax_positive matches CBO's receipts definition + # where refundable credit payments in excess of liability are + # classified as outlays, not negative receipts. See: + # https://www.cbo.gov/publication/43767 + "income_tax_positive", "snap", "social_security", "ssi", "unemployment_compensation", ] + # Mapping from target variable to CBO parameter name (when different) + cbo_param_name_map = { + "income_tax_positive": "income_tax", # CBO param is income_tax + } + cbo_targets = [] for variable_name in cbo_vars: + param_name = cbo_param_name_map.get(variable_name, variable_name) try: value = sim.tax_benefit_system.parameters( CBO_YEAR - ).calibration.gov.cbo._children[variable_name] + ).calibration.gov.cbo._children[param_name] cbo_targets.append( { "variable": variable_name, @@ -284,7 +294,8 @@ def extract_national_targets(): ) except (KeyError, AttributeError) as e: print( - f"Warning: Could not extract CBO parameter for {variable_name}: {e}" + f"Warning: Could not extract CBO parameter for " + f"{variable_name} (param: {param_name}): {e}" ) # Treasury/JCT targets (EITC) - get for a specific year @@ -334,12 +345,17 @@ def transform_national_targets(raw_targets): """ # Process direct sum targets (non-tax items and some CBO items) - # Note: income_tax from CBO and eitc from Treasury need filer constraint + # Note: income_tax_positive from CBO and eitc from Treasury need + # filer constraint cbo_non_tax = [ - t for t in raw_targets["cbo_targets"] if t["variable"] != "income_tax" + t + for t in raw_targets["cbo_targets"] + if t["variable"] != "income_tax_positive" ] cbo_tax = [ - t for t in raw_targets["cbo_targets"] if t["variable"] == "income_tax" + t + for t in raw_targets["cbo_targets"] + if t["variable"] == "income_tax_positive" ] all_direct_targets = raw_targets["direct_sum_targets"] + cbo_non_tax