From 9a2b6f1e6c13a88669d5fb20e83e17f6bfeb5627 Mon Sep 17 00:00:00 2001 From: asiripanich <17020181+asiripanich@users.noreply.github.com> Date: Mon, 11 May 2026 10:02:57 +0700 Subject: [PATCH] Remove SIMULATE_CHOOSER_COLUMNS memory hacks and expose global constants Manual chooser-column filtering (SIMULATE/LOGSUM_CHOOSER_COLUMNS) was a memory workaround that is now redundant: drop_unused_columns in ComputeSettings (default True) prunes DataFrame columns automatically one layer beneath each model function. Remove all the FIXME blocks and the household_id patch-ups they contained. Deprecate SIMULATE_CHOOSER_COLUMNS and LOGSUM_CHOOSER_COLUMNS in all settings classes so existing YAML configs emit DeprecationWarning instead of failing. Also wire state.get_global_constants() into locals_d at every utility- evaluation site (_location_sample, run_location_simulate, _destination_sample, run_destination_simulate, _od_sample, run_od_simulate, _schedule_tours) so global constants are available in @ expressions in CSV specs. Closes #1017 Co-Authored-By: Claude Opus 4.5 --- activitysim/abm/models/location_choice.py | 37 +++------------- activitysim/abm/models/school_escorting.py | 34 +++++++++------ activitysim/abm/models/util/logsums.py | 36 ---------------- .../abm/models/util/tour_destination.py | 35 +-------------- activitysim/abm/models/util/tour_od.py | 20 +-------- .../abm/models/util/tour_scheduling.py | 24 ----------- .../models/util/vectorize_tour_scheduling.py | 24 ++++++++++- activitysim/core/configuration/logit.py | 43 +++++++++++++++++-- activitysim/core/expressions.py | 12 ------ 9 files changed, 94 insertions(+), 171 deletions(-) diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py index 2d629a404c..c02f701a3a 100644 --- a/activitysim/abm/models/location_choice.py +++ b/activitysim/abm/models/location_choice.py @@ -161,6 +161,7 @@ def _location_sample( "reindex": reindex, "land_use": state.get_dataframe("land_use"), } + locals_d.update(state.get_global_constants()) locals_d.update(model_settings.CONSTANTS or {}) # preprocess choosers table @@ -232,14 +233,7 @@ def location_sample( chunk_tag, trace_label, ): - # FIXME - MEMORY HACK - only include columns actually used in spec - chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS - # Drop this when PR #1017 is merged - if ("household_id" not in chooser_columns) and ( - "household_id" in persons_merged.columns - ): - chooser_columns = chooser_columns + ["household_id"] - choosers = persons_merged[chooser_columns] + choosers = persons_merged # create wrapper with keys for this lookup - in this case there is a home_zone_id in the choosers # and a zone_id in the alternatives which get merged during interaction @@ -390,17 +384,7 @@ def location_presample( HOME_TAZ in persons_merged ) # 'TAZ' should already be in persons_merged from land_use - # FIXME - MEMORY HACK - only include columns actually used in spec - # FIXME we don't actually require that land_use provide a TAZ crosswalk - # FIXME maybe we should add it for multi-zone (from maz_taz) if missing? - chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS - chooser_columns = [HOME_TAZ if c == HOME_MAZ else c for c in chooser_columns] - # Drop this when PR #1017 is merged - if ("household_id" not in chooser_columns) and ( - "household_id" in persons_merged.columns - ): - chooser_columns = chooser_columns + ["household_id"] - choosers = persons_merged[chooser_columns] + choosers = persons_merged # create wrapper with keys for this lookup - in this case there is a HOME_TAZ in the choosers # and a DEST_TAZ in the alternatives which get merged during interaction @@ -565,11 +549,6 @@ def run_location_logsums( mandatory=False, ) - # FIXME - MEMORY HACK - only include columns actually used in spec - persons_merged_df = logsum.filter_chooser_columns( - persons_merged_df, logsum_settings, model_settings - ) - logger.info(f"Running {trace_label} with {len(location_sample_df.index)} rows") choosers = location_sample_df.join(persons_merged_df, how="left") @@ -628,14 +607,7 @@ def run_location_simulate( """ assert not persons_merged.empty - # FIXME - MEMORY HACK - only include columns actually used in spec - chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS - # Drop this when PR #1017 is merged - if ("household_id" not in chooser_columns) and ( - "household_id" in persons_merged.columns - ): - chooser_columns = chooser_columns + ["household_id"] - choosers = persons_merged[chooser_columns] + choosers = persons_merged alt_dest_col_name = model_settings.ALT_DEST_COL_NAME @@ -666,6 +638,7 @@ def run_location_simulate( "reindex": reindex, "land_use": state.get_dataframe("land_use"), } + locals_d.update(state.get_global_constants()) locals_d.update(model_settings.CONSTANTS or {}) # preprocess choosers table diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py index d24d52a41f..d643232210 100644 --- a/activitysim/abm/models/school_escorting.py +++ b/activitysim/abm/models/school_escorting.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging +import warnings from typing import Any, Literal import numpy as np @@ -17,6 +18,8 @@ tracing, workflow, ) +from pydantic import field_validator + from activitysim.core.configuration.base import PreprocessorSettings from activitysim.core.configuration.logit import BaseLogitComponentSettings from activitysim.core.interaction_simulate import interaction_simulate @@ -335,7 +338,25 @@ class SchoolEscortSettings(BaseLogitComponentSettings, extra="forbid"): GENDER_WEIGHT: float = 10.0 AGE_WEIGHT: float = 1.0 - SIMULATE_CHOOSER_COLUMNS: list[str] | None = None + SIMULATE_CHOOSER_COLUMNS: Any | None = None + """Was used to help reduce the memory needed for the model. + Setting is now obsolete and doesn't do anything. + Functionality was replaced by util.drop_unused_columns + + .. deprecated:: 1.4 + """ + + @field_validator("SIMULATE_CHOOSER_COLUMNS", mode="before") + @classmethod + def _warn_simulate_chooser_columns(cls, v): + if v is not None: + warnings.warn( + "SIMULATE_CHOOSER_COLUMNS is deprecated and replaced by " + "util.drop_unused_columns; value will be ignored.", + DeprecationWarning, + stacklevel=2, + ) + return None SPEC: None = None """The school escort model does not use this setting.""" @@ -465,17 +486,6 @@ def school_escorting( # else: # locals_dict.pop("_sharrow_skip", None) - # reduce memory by limiting columns if selected columns are supplied - chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS - if chooser_columns is not None: - # Drop this when PR #1017 is merged - if ("household_id" not in chooser_columns) and ( - "household_id" in choosers.columns - ): - chooser_columns = chooser_columns + ["household_id"] - chooser_columns = chooser_columns + participant_columns - choosers = choosers[chooser_columns] - # add previous data to stage if stage_num >= 1: choosers = add_prev_choices_to_choosers( diff --git a/activitysim/abm/models/util/logsums.py b/activitysim/abm/models/util/logsums.py index 346f5ffa09..44b3dcc36c 100644 --- a/activitysim/abm/models/util/logsums.py +++ b/activitysim/abm/models/util/logsums.py @@ -7,7 +7,6 @@ import pandas as pd from activitysim.core import config, expressions, los, simulate, tracing, workflow -from activitysim.core.configuration import PydanticBase from activitysim.core.configuration.logit import ( TourLocationComponentSettings, TourModeComponentSettings, @@ -16,41 +15,6 @@ logger = logging.getLogger(__name__) -def filter_chooser_columns( - choosers, logsum_settings: dict | PydanticBase, model_settings: dict | PydanticBase -): - try: - chooser_columns = logsum_settings.LOGSUM_CHOOSER_COLUMNS - except AttributeError: - chooser_columns = logsum_settings.get("LOGSUM_CHOOSER_COLUMNS", []) - - if ( - isinstance(model_settings, dict) - and "CHOOSER_ORIG_COL_NAME" in model_settings - and model_settings["CHOOSER_ORIG_COL_NAME"] not in chooser_columns - ): - chooser_columns.append(model_settings["CHOOSER_ORIG_COL_NAME"]) - if ( - isinstance(model_settings, PydanticBase) - and hasattr(model_settings, "CHOOSER_ORIG_COL_NAME") - and model_settings.CHOOSER_ORIG_COL_NAME - and model_settings.CHOOSER_ORIG_COL_NAME not in chooser_columns - ): - chooser_columns.append(model_settings.CHOOSER_ORIG_COL_NAME) - - missing_columns = [c for c in chooser_columns if c not in choosers] - if missing_columns: - logger.debug( - "logsum.filter_chooser_columns missing_columns %s" % missing_columns - ) - - # ignore any columns not appearing in choosers df - chooser_columns = [c for c in chooser_columns if c in choosers] - - choosers = choosers[chooser_columns] - return choosers - - def compute_location_choice_logsums( state: workflow.State, choosers: pd.DataFrame, diff --git a/activitysim/abm/models/util/tour_destination.py b/activitysim/abm/models/util/tour_destination.py index 2cac47ee1a..5742ff0c8f 100644 --- a/activitysim/abm/models/util/tour_destination.py +++ b/activitysim/abm/models/util/tour_destination.py @@ -113,6 +113,7 @@ def _destination_sample( "dest_col_name": skims.dest_key, # added for sharrow flows "timeframe": "timeless", } + locals_d.update(state.get_global_constants()) constants = model_settings.CONSTANTS if constants is not None: locals_d.update(constants) @@ -619,23 +620,9 @@ def run_destination_sample( chunk_size, trace_label, ): - # FIXME - MEMORY HACK - only include columns actually used in spec (omit them pre-merge) - chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS - # if special person id is passed chooser_id_column = model_settings.CHOOSER_ID_COLUMN - # Drop this when PR #1017 is merged - if ("household_id" not in chooser_columns) and ( - "household_id" in persons_merged.columns - ): - chooser_columns = chooser_columns + ["household_id"] - persons_merged = persons_merged[ - [c for c in persons_merged.columns if c in chooser_columns] - ] - tours = tours[ - [c for c in tours.columns if c in chooser_columns or c == chooser_id_column] - ] choosers = pd.merge( tours, persons_merged, left_on=chooser_id_column, right_index=True, how="left" ) @@ -731,11 +718,6 @@ def run_destination_logsums( chunk_tag = "tour_destination.logsums" - # FIXME - MEMORY HACK - only include columns actually used in spec - persons_merged = logsum.filter_chooser_columns( - persons_merged, logsum_settings, model_settings - ) - # merge persons into tours choosers = pd.merge( destination_sample, @@ -798,23 +780,9 @@ def run_destination_simulate( coefficients_file_name=model_settings.COEFFICIENTS, ) - # FIXME - MEMORY HACK - only include columns actually used in spec (omit them pre-merge) - chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS - # if special person id is passed chooser_id_column = model_settings.CHOOSER_ID_COLUMN - # Drop this when PR #1017 is merged - if ("household_id" not in chooser_columns) and ( - "household_id" in persons_merged.columns - ): - chooser_columns = chooser_columns + ["household_id"] - persons_merged = persons_merged[ - [c for c in persons_merged.columns if c in chooser_columns] - ] - tours = tours[ - [c for c in tours.columns if c in chooser_columns or c == chooser_id_column] - ] choosers = pd.merge( tours, persons_merged, left_on=chooser_id_column, right_index=True, how="left" ) @@ -856,6 +824,7 @@ def run_destination_simulate( "dest_col_name": skims.dest_key, # added for sharrow flows "timeframe": "timeless", } + locals_d.update(state.get_global_constants()) if constants is not None: locals_d.update(constants) diff --git a/activitysim/abm/models/util/tour_od.py b/activitysim/abm/models/util/tour_od.py index c2548cbd60..734967c7d0 100644 --- a/activitysim/abm/models/util/tour_od.py +++ b/activitysim/abm/models/util/tour_od.py @@ -178,6 +178,7 @@ def _od_sample( "orig_col_name": ORIG_TAZ, "dest_col_name": DEST_TAZ, } + locals_d.update(state.get_global_constants()) constants = model_settings.CONSTANTS if constants is not None: locals_d.update(constants) @@ -690,12 +691,6 @@ def run_od_sample( ) choosers = tours - # FIXME - MEMORY HACK - only include columns actually used in spec - chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS - # Drop this when PR #1017 is merged - if ("household_id" not in chooser_columns) and ("household_id" in choosers.columns): - chooser_columns = chooser_columns + ["household_id"] - choosers = choosers[chooser_columns] # interaction_sample requires that choosers.index.is_monotonic_increasing if not choosers.index.is_monotonic_increasing: @@ -772,11 +767,6 @@ def run_od_logsums( dest_id_col = model_settings.DEST_COL_NAME tour_od_id_col = get_od_id_col(origin_id_col, dest_id_col) - # FIXME - MEMORY HACK - only include columns actually used in spec - tours_merged_df = logsum.filter_chooser_columns( - tours_merged_df, logsum_settings, model_settings - ) - # merge ods into choosers table choosers = od_sample.join(tours_merged_df, how="left") choosers[tour_od_id_col] = ( @@ -952,13 +942,6 @@ def run_od_simulate( # merge persons into tours choosers = tours - # FIXME - MEMORY HACK - only include columns actually used in spec - chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS - # Drop this when PR #1017 is merged - if ("household_id" not in chooser_columns) and ("household_id" in choosers.columns): - chooser_columns = chooser_columns + ["household_id"] - choosers = choosers[chooser_columns] - # interaction_sample requires that choosers.index.is_monotonic_increasing if not choosers.index.is_monotonic_increasing: logger.debug( @@ -1009,6 +992,7 @@ def run_od_simulate( "orig_col_name": origin_col_name, "dest_col_name": dest_col_name, } + locals_d.update(state.get_global_constants()) if constants is not None: locals_d.update(constants) diff --git a/activitysim/abm/models/util/tour_scheduling.py b/activitysim/abm/models/util/tour_scheduling.py index e591c29db1..b9e2f4c051 100644 --- a/activitysim/abm/models/util/tour_scheduling.py +++ b/activitysim/abm/models/util/tour_scheduling.py @@ -24,30 +24,6 @@ def run_tour_scheduling( trace_label: str, ): - if model_settings.LOGSUM_SETTINGS: - logsum_settings = TourModeComponentSettings.read_settings_file( - state.filesystem, - str(model_settings.LOGSUM_SETTINGS), - mandatory=False, - ) - logsum_columns = logsum_settings.LOGSUM_CHOOSER_COLUMNS - else: - logsum_columns = [] - - # - filter chooser columns for both logsums and simulate - model_columns = model_settings.SIMULATE_CHOOSER_COLUMNS - chooser_columns = logsum_columns + [ - c for c in model_columns if c not in logsum_columns - ] - - # Drop this when PR #1017 is merged - if ("household_id" not in chooser_columns) and ( - "household_id" in persons_merged.columns - ): - chooser_columns = chooser_columns + ["household_id"] - - persons_merged = expressions.filter_chooser_columns(persons_merged, chooser_columns) - timetable = state.get_injectable("timetable") # - run preprocessor to annotate choosers diff --git a/activitysim/abm/models/util/vectorize_tour_scheduling.py b/activitysim/abm/models/util/vectorize_tour_scheduling.py index 2148b220e3..dead59d90e 100644 --- a/activitysim/abm/models/util/vectorize_tour_scheduling.py +++ b/activitysim/abm/models/util/vectorize_tour_scheduling.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging +import warnings from collections import OrderedDict from pathlib import Path from typing import Any @@ -14,6 +15,8 @@ from activitysim.core import chunk, config, expressions, los, simulate from activitysim.core import timetable as tt from activitysim.core import tracing, workflow +from pydantic import field_validator + from activitysim.core.configuration.base import ComputeSettings, PreprocessorSettings from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.interaction_sample_simulate import interaction_sample_simulate @@ -42,7 +45,25 @@ class TourSchedulingSettings(LogitComponentSettings, extra="forbid"): it is assumed to be an unsegmented preprocessor. Otherwise, the dict keys give the segements. """ - SIMULATE_CHOOSER_COLUMNS: list[str] | None = None + SIMULATE_CHOOSER_COLUMNS: Any | None = None + """Was used to help reduce the memory needed for the model. + Setting is now obsolete and doesn't do anything. + Functionality was replaced by util.drop_unused_columns + + .. deprecated:: 1.4 + """ + + @field_validator("SIMULATE_CHOOSER_COLUMNS", mode="before") + @classmethod + def _warn_simulate_chooser_columns(cls, v): + if v is not None: + warnings.warn( + "SIMULATE_CHOOSER_COLUMNS is deprecated and replaced by " + "util.drop_unused_columns; value will be ignored.", + DeprecationWarning, + stacklevel=2, + ) + return None SPEC_SEGMENTS: dict[str, LogitComponentSettings] = {} @@ -791,6 +812,7 @@ def _schedule_tours( # - make choices locals_d = {"tt": timetable.attach_state(state)} + locals_d.update(state.get_global_constants()) constants = config.get_model_constants(model_settings) if constants is not None: locals_d.update(constants) diff --git a/activitysim/core/configuration/logit.py b/activitysim/core/configuration/logit.py index a1c305e532..58c52c900c 100644 --- a/activitysim/core/configuration/logit.py +++ b/activitysim/core/configuration/logit.py @@ -6,7 +6,7 @@ import pydantic from pydantic import BaseModel as PydanticBase -from pydantic import model_validator, validator +from pydantic import field_validator, model_validator, validator from activitysim.core.configuration.base import PreprocessorSettings, PydanticCompute @@ -261,7 +261,26 @@ class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"): SEGMENT_IDS: dict[str, int] | dict[str, str] | dict[str, bool] | None = None SHADOW_PRICE_TABLE: str | None = None MODELED_SIZE_TABLE: str | None = None - SIMULATE_CHOOSER_COLUMNS: list[str] | None = None + SIMULATE_CHOOSER_COLUMNS: Any | None = None + """Was used to help reduce the memory needed for the model. + Setting is now obsolete and doesn't do anything. + Functionality was replaced by util.drop_unused_columns + + .. deprecated:: 1.4 + """ + + @field_validator("SIMULATE_CHOOSER_COLUMNS", mode="before") + @classmethod + def _warn_simulate_chooser_columns(cls, v): + if v is not None: + warnings.warn( + "SIMULATE_CHOOSER_COLUMNS is deprecated and replaced by " + "util.drop_unused_columns; value will be ignored.", + DeprecationWarning, + stacklevel=2, + ) + return None + ALT_DEST_COL_NAME: str LOGSUM_TOUR_PURPOSE: str | dict[str, str] | None = None MODEL_SELECTOR: str | None = None @@ -279,4 +298,22 @@ class TourModeComponentSettings(TemplatedLogitComponentSettings, extra="forbid") nontour_preprocessor: PreprocessorSettings | list[ PreprocessorSettings ] | None = None - LOGSUM_CHOOSER_COLUMNS: list[str] = [] + LOGSUM_CHOOSER_COLUMNS: Any | None = None + """Was used to help reduce the memory needed for the model. + Setting is now obsolete and doesn't do anything. + Functionality was replaced by util.drop_unused_columns + + .. deprecated:: 1.4 + """ + + @field_validator("LOGSUM_CHOOSER_COLUMNS", mode="before") + @classmethod + def _warn_logsum_chooser_columns(cls, v): + if v is not None: + warnings.warn( + "LOGSUM_CHOOSER_COLUMNS is deprecated and replaced by " + "util.drop_unused_columns; value will be ignored.", + DeprecationWarning, + stacklevel=2, + ) + return None diff --git a/activitysim/core/expressions.py b/activitysim/core/expressions.py index f8a255fb12..e05df7eda3 100644 --- a/activitysim/core/expressions.py +++ b/activitysim/core/expressions.py @@ -338,15 +338,3 @@ def annotate_tables( # write table with new columns back to state state.add_table(table_name, df) - - -def filter_chooser_columns(choosers, chooser_columns): - missing_columns = [c for c in chooser_columns if c not in choosers] - if missing_columns: - logger.debug("filter_chooser_columns missing_columns %s" % missing_columns) - - # ignore any columns not appearing in choosers df - chooser_columns = [c for c in chooser_columns if c in choosers] - - choosers = choosers[chooser_columns] - return choosers