From 0b00952b0b04df3bc00b382645fc768d78387930 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sat, 31 Jan 2026 11:51:47 -0500
Subject: [PATCH] Fix federal income tax calibration to use income_tax_positive

CBO reports income tax receipts where refundable credit payments in
excess of liability are outlays, not negative receipts. Changes the
calibration target from income_tax to income_tax_positive to match.

Recreated from PR #495 rebased onto main.

Closes #494

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .beads/issues.jsonl                           |  4 +++
 changelog_entry.yaml                          |  4 +++
 .../db/etl_national_targets.py                | 28 +++++++++++++++----
 3 files changed, 30 insertions(+), 6 deletions(-)
 create mode 100644 .beads/issues.jsonl

diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl
new file mode 100644
index 000000000..a9dbe60f7
--- /dev/null
+++ b/.beads/issues.jsonl
@@ -0,0 +1,4 @@
+{"id":"policyengine-us-data-apq","title":"Add age and demographics to pre-tax contribution QRF imputation","description":"The QRF in puf.py that imputes pre_tax_contributions from CPS to PUF uses only employment_income as a predictor. Age, filing status, and number of dependents are strong predictors of 401(k) participation and contribution rates. Adding these should improve the distributional accuracy.","status":"closed","priority":2,"issue_type":"feature","created_at":"2026-01-31T08:01:22.72749-05:00","updated_at":"2026-01-31T08:08:02.675063-05:00","closed_at":"2026-01-31T08:08:02.675063-05:00"}
+{"id":"policyengine-us-data-jhh","title":"Parameterize retirement contribution limits by year","description":"The contribution waterfall in cps.py hardcodes 2022 limits ($20,500 401k, $6,500 catch-up, $6,000 IRA, $1,000 IRA catch-up). These should be pulled from PolicyEngine parameters or a year-indexed lookup so the dataset builds correctly for any year.","status":"closed","priority":2,"issue_type":"bug","created_at":"2026-01-31T08:01:18.941246-05:00","updated_at":"2026-01-31T08:08:02.614396-05:00","closed_at":"2026-01-31T08:08:02.614396-05:00"}
+{"id":"policyengine-us-data-mnw","title":"Use SS_SC source code for Social Security retirement/disability split","description":"Currently cps.py uses a hard age-62 cutoff to split SS into retirement vs disability. The CPS ASEC has SS_SC (Social Security source codes) that distinguish retirement, disability, and survivor benefits. Use these codes instead of the age heuristic.","status":"closed","priority":2,"issue_type":"bug","created_at":"2026-01-31T08:01:21.01419-05:00","updated_at":"2026-01-31T08:08:02.644611-05:00","closed_at":"2026-01-31T08:08:02.644611-05:00"}
+{"id":"policyengine-us-data-x4q","title":"Calibrate taxable pension fraction from SOI data","description":"imputation_parameters.yaml sets taxable_pension_fraction to 1.0 with the comment 'no SOI data, so arbitrary assumption.' But the SOI targets CSV includes both total_pension_income and taxable_pension_income by AGI bracket. Use the ratio of these to set a data-driven fraction instead of assuming 100% taxable.","status":"closed","priority":2,"issue_type":"bug","created_at":"2026-01-31T08:01:24.590331-05:00","updated_at":"2026-01-31T08:08:02.70425-05:00","closed_at":"2026-01-31T08:08:02.70425-05:00"}
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29bb..18321ccc5 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: minor
+  changes:
+    changed:
+    - Use income_tax_positive instead of income_tax for CBO calibration target
diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py
index 5cb910d5b..b25e2c2df 100644
--- a/policyengine_us_data/db/etl_national_targets.py
+++ b/policyengine_us_data/db/etl_national_targets.py
@@ -260,19 +260,29 @@ def extract_national_targets():
     # CBO projection targets - get for a specific year
     CBO_YEAR = 2023  # Year the CBO projections are for
     cbo_vars = [
-        "income_tax",
+        # Note: income_tax_positive matches CBO's receipts definition
+        # where refundable credit payments in excess of liability are
+        # classified as outlays, not negative receipts. See:
+        # https://www.cbo.gov/publication/43767
+        "income_tax_positive",
         "snap",
         "social_security",
         "ssi",
         "unemployment_compensation",
     ]
 
+    # Mapping from target variable to CBO parameter name (when different)
+    cbo_param_name_map = {
+        "income_tax_positive": "income_tax",  # CBO param is income_tax
+    }
+
     cbo_targets = []
     for variable_name in cbo_vars:
+        param_name = cbo_param_name_map.get(variable_name, variable_name)
         try:
             value = sim.tax_benefit_system.parameters(
                 CBO_YEAR
-            ).calibration.gov.cbo._children[variable_name]
+            ).calibration.gov.cbo._children[param_name]
             cbo_targets.append(
                 {
                     "variable": variable_name,
@@ -284,7 +294,8 @@ def extract_national_targets():
             )
         except (KeyError, AttributeError) as e:
             print(
-                f"Warning: Could not extract CBO parameter for {variable_name}: {e}"
+                f"Warning: Could not extract CBO parameter for "
+                f"{variable_name} (param: {param_name}): {e}"
             )
 
     # Treasury/JCT targets (EITC) - get for a specific year
@@ -334,12 +345,17 @@ def transform_national_targets(raw_targets):
     """
 
     # Process direct sum targets (non-tax items and some CBO items)
-    # Note: income_tax from CBO and eitc from Treasury need filer constraint
+    # Note: income_tax_positive from CBO and eitc from Treasury need
+    # filer constraint
     cbo_non_tax = [
-        t for t in raw_targets["cbo_targets"] if t["variable"] != "income_tax"
+        t
+        for t in raw_targets["cbo_targets"]
+        if t["variable"] != "income_tax_positive"
     ]
     cbo_tax = [
-        t for t in raw_targets["cbo_targets"] if t["variable"] == "income_tax"
+        t
+        for t in raw_targets["cbo_targets"]
+        if t["variable"] == "income_tax_positive"
     ]
 
     all_direct_targets = raw_targets["direct_sum_targets"] + cbo_non_tax