PolicyEngine · juaristi22 · Jan 28, 2026 · Jan 28, 2026 · Jan 29, 2026 · Feb 9, 2026
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
@@ -0,0 +1,5 @@
+- bump: minor
+  changes:
+    added:
+    - Deduplication logic in SparseMatrixBuilder (option to remove duplicate targets or select most specific geographic level).
+    - Entity aware target calculations for correct entity counts.
diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/fit_calibration_weights.py b/policyengine_us_data/datasets/cps/local_area_calibration/fit_calibration_weights.py
@@ -114,9 +114,14 @@
     },
 )
 
-print(f"Matrix shape: {X_sparse.shape}")
+builder.print_uprating_summary(targets_df)
+
+print(f"\nMatrix shape: {X_sparse.shape}")
 print(f"Targets: {len(targets_df)}")
 
+# ============================================================================
+# STEP 2: FILTER TO ACHIEVABLE TARGETS
+# ============================================================================
 # Filter to achievable targets (rows with non-zero data)
 row_sums = np.array(X_sparse.sum(axis=1)).flatten()
 achievable_mask = row_sums > 0
@@ -129,7 +134,7 @@
 targets_df = targets_df[achievable_mask].reset_index(drop=True)
 X_sparse = X_sparse[achievable_mask, :]
 
-print(f"Filtered matrix shape: {X_sparse.shape}")
+print(f"Final matrix shape: {X_sparse.shape}")
 
 # Extract target vector and names
 targets = targets_df["value"].values
@@ -139,14 +144,14 @@
 ]
 
 # ============================================================================
-# STEP 2: INITIALIZE WEIGHTS
+# STEP 3: INITIALIZE WEIGHTS
 # ============================================================================
 initial_weights = np.ones(X_sparse.shape[1]) * 100
 print(f"\nInitial weights shape: {initial_weights.shape}")
 print(f"Initial weights sum: {initial_weights.sum():,.0f}")
 
 # ============================================================================
-# STEP 3: CREATE MODEL
+# STEP 4: CREATE MODEL
 # ============================================================================
 print("\nCreating SparseCalibrationWeights model...")
 model = SparseCalibrationWeights(
@@ -162,7 +167,7 @@
 )
 
 # ============================================================================
-# STEP 4: TRAIN IN CHUNKS
+# STEP 5: TRAIN IN CHUNKS
 # ============================================================================
 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
 calibration_log = pd.DataFrame()
@@ -205,7 +210,7 @@
     calibration_log = pd.concat([calibration_log, chunk_df], ignore_index=True)
 
 # ============================================================================
-# STEP 5: EXTRACT AND SAVE WEIGHTS
+# STEP 6: EXTRACT AND SAVE WEIGHTS
 # ============================================================================
 with torch.no_grad():
     w = model.get_weights(deterministic=True).cpu().numpy()
@@ -225,7 +230,7 @@
 print(f"LOG_PATH:{log_path}")
 
 # ============================================================================
-# STEP 6: VERIFY PREDICTIONS
+# STEP 7: VERIFY PREDICTIONS
 # ============================================================================
 print("\n" + "=" * 60)
 print("PREDICTION VERIFICATION")