Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
- bump: minor
changes:
added:
- Deduplication logic in SparseMatrixBuilder (option to remove duplicate targets or select most specific geographic level).
- Entity aware target calculations for correct entity counts.
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,14 @@
},
)

print(f"Matrix shape: {X_sparse.shape}")
builder.print_uprating_summary(targets_df)

print(f"\nMatrix shape: {X_sparse.shape}")
print(f"Targets: {len(targets_df)}")

# ============================================================================
# STEP 2: FILTER TO ACHIEVABLE TARGETS
# ============================================================================
# Filter to achievable targets (rows with non-zero data)
row_sums = np.array(X_sparse.sum(axis=1)).flatten()
achievable_mask = row_sums > 0
Expand All @@ -129,7 +134,7 @@
targets_df = targets_df[achievable_mask].reset_index(drop=True)
X_sparse = X_sparse[achievable_mask, :]

print(f"Filtered matrix shape: {X_sparse.shape}")
print(f"Final matrix shape: {X_sparse.shape}")

# Extract target vector and names
targets = targets_df["value"].values
Expand All @@ -139,14 +144,14 @@
]

# ============================================================================
# STEP 2: INITIALIZE WEIGHTS
# STEP 3: INITIALIZE WEIGHTS
# ============================================================================
initial_weights = np.ones(X_sparse.shape[1]) * 100
print(f"\nInitial weights shape: {initial_weights.shape}")
print(f"Initial weights sum: {initial_weights.sum():,.0f}")

# ============================================================================
# STEP 3: CREATE MODEL
# STEP 4: CREATE MODEL
# ============================================================================
print("\nCreating SparseCalibrationWeights model...")
model = SparseCalibrationWeights(
Expand All @@ -162,7 +167,7 @@
)

# ============================================================================
# STEP 4: TRAIN IN CHUNKS
# STEP 5: TRAIN IN CHUNKS
# ============================================================================
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
calibration_log = pd.DataFrame()
Expand Down Expand Up @@ -205,7 +210,7 @@
calibration_log = pd.concat([calibration_log, chunk_df], ignore_index=True)

# ============================================================================
# STEP 5: EXTRACT AND SAVE WEIGHTS
# STEP 6: EXTRACT AND SAVE WEIGHTS
# ============================================================================
with torch.no_grad():
w = model.get_weights(deterministic=True).cpu().numpy()
Expand All @@ -225,7 +230,7 @@
print(f"LOG_PATH:{log_path}")

# ============================================================================
# STEP 6: VERIFY PREDICTIONS
# STEP 7: VERIFY PREDICTIONS
# ============================================================================
print("\n" + "=" * 60)
print("PREDICTION VERIFICATION")
Expand Down
Loading