diff --git a/.github/workflows/core_tests.yml b/.github/workflows/core_tests.yml index f3fe380ae2..17b3cbc2ae 100644 --- a/.github/workflows/core_tests.yml +++ b/.github/workflows/core_tests.yml @@ -275,11 +275,11 @@ jobs: - region: Standard 1-Zone Example (MTC) region-org: ActivitySim region-repo: activitysim-prototype-mtc - region-branch: extended + region-branch: pandas2 - region: Standard 2-Zone Example (SANDAG) region-org: ActivitySim region-repo: sandag-abm3-example - region-branch: main + region-branch: pandas2 fail-fast: false defaults: run: diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py index e4d1ab9acb..bd1b291e68 100644 --- a/activitysim/abm/models/disaggregate_accessibility.py +++ b/activitysim/abm/models/disaggregate_accessibility.py @@ -158,7 +158,7 @@ class DisaggregateAccessibilitySettings(PydanticReadable, extra="forbid"): """ Disaggreate accessibility table is grouped by the "by" cols above and the KEEP_COLS are averaged across the group. Initializing the below as NA if not in the auto ownership level, they are skipped - in the groupby mean and the values are correct. + in the groupby mean and the values are correct. (It's a way to avoid having to update code to reshape the table and introduce new functionality there.) If none, will keep all of the columns with "accessibility" in the name. """ @@ -581,7 +581,7 @@ def expand_template_zones(self, tables): _expanded = pd.DataFrame(util.named_product(**index_params)).set_index("index") # Use result to join template onto expanded table of zones - ex_table = _expanded.join(master_template).reset_index() + ex_table = _expanded.join(master_template).sort_index().reset_index() # Concatenate a new unique set of ids cols = ["home_zone_id", "proto_household_id", "proto_person_id"] @@ -654,7 +654,9 @@ def create_proto_pop(self): .set_index("index") .rename(columns={"hhid": hhid}) ) - persons = rep.join(persons).sort_values(hhid).reset_index(drop=True) + persons = ( + rep.join(persons, sort=True).sort_values(hhid).reset_index(drop=True) + ) persons[perid] = persons.index + 1 # Assign persons to tours @@ -730,6 +732,7 @@ def merge_persons(self): perid = self.params["proto_persons"]["index_col"] persons_merged.set_index(perid, inplace=True, drop=True) + persons_merged = persons_merged.sort_index() self.proto_pop["proto_persons_merged"] = persons_merged # Store in pipeline diff --git a/activitysim/abm/models/input_checker.py b/activitysim/abm/models/input_checker.py index d1c8284f88..568da851e1 100644 --- a/activitysim/abm/models/input_checker.py +++ b/activitysim/abm/models/input_checker.py @@ -301,34 +301,42 @@ def report_errors(state, input_checker_settings, v_warnings, v_errors): for warn in warns: if "dataframe validator" in str(warn.message): - file_logger.warning( - "Failed dataframe validator: " - + str(warn.message).split("\n")[-1] - ) - elif "element-wise validator" in str(warn.message): - if "DataFrameSchema" in str(warn.message): - file_logger.warning( - "Failed element-wise validator: <" - + str(warn.message).split("\n")[0].split(" ")[1] - + table_name - + ")>\n\t" - + str(warn.message) - .split("failure cases:\n")[0] - .split("\n")[-2] - + "\n\tfailure cases:\n\t" - + "\n\t".join( - str(warn.message) - .split("failure cases:\n")[1] - .split("\n") - ) - ) - else: + try: file_logger.warning( - "Failed element-wise validator: <" - + " ".join(str(warn.message).split("\n")[0].split(" ")[1:3]) - + "\n\t" - + "\n\t".join(str(warn.message).split("\n")[1:]) + "Failed dataframe validator: " + + str(warn.message).split("\n")[-1] ) + except Exception: + file_logger.warning(warn) + elif "element-wise validator" in str(warn.message): + try: + if "DataFrameSchema" in str(warn.message): + file_logger.warning( + "Failed element-wise validator: <" + + str(warn.message).split("\n")[0].split(" ")[1] + + table_name + + ")>\n\t" + + str(warn.message) + .split("failure cases:\n")[0] + .split("\n")[-2] + + "\n\tfailure cases:\n\t" + + "\n\t".join( + str(warn.message) + .split("failure cases:\n")[1] + .split("\n") + ) + ) + else: + file_logger.warning( + "Failed element-wise validator: <" + + " ".join( + str(warn.message).split("\n")[0].split(" ")[1:3] + ) + + "\n\t" + + "\n\t".join(str(warn.message).split("\n")[1:]) + ) + except Exception: + file_logger.warning(warn) else: file_logger.warning(warn) file_logger.warning("\n") diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py index 22fea9e520..a0cf6a3312 100644 --- a/activitysim/abm/models/school_escorting.py +++ b/activitysim/abm/models/school_escorting.py @@ -634,7 +634,7 @@ def school_escorting( state.add_table("tours", tours) state.get_rn_generator().drop_channel("tours") state.get_rn_generator().add_channel("tours", tours) - state.add_table("escort_bundles", escort_bundles) + state.add_table("escort_bundles", escort_bundles.reset_index(drop=True)) # save school escorting tours and trips in pipeline so we can overwrite results from downstream models state.add_table("school_escort_tours", school_escort_tours) state.add_table("school_escort_trips", school_escort_trips) diff --git a/activitysim/abm/models/trip_departure_choice.py b/activitysim/abm/models/trip_departure_choice.py index a0ddb363d8..236a755dec 100644 --- a/activitysim/abm/models/trip_departure_choice.py +++ b/activitysim/abm/models/trip_departure_choice.py @@ -404,7 +404,7 @@ def apply_stage_two_model( trace_label: str, compute_settings: ComputeSettings | None = None, ): - if not trips.index.is_monotonic: + if not trips.index.is_monotonic_increasing: trips = trips.sort_index() # Assign the duration of the appropriate leg to the trip diff --git a/activitysim/abm/models/util/school_escort_tours_trips.py b/activitysim/abm/models/util/school_escort_tours_trips.py index 35a0e84b5c..4a9f2ef170 100644 --- a/activitysim/abm/models/util/school_escort_tours_trips.py +++ b/activitysim/abm/models/util/school_escort_tours_trips.py @@ -353,7 +353,7 @@ def create_chauf_escort_trips(bundles): "outbound", "purpose", ] - ).reset_index() + ).reset_index(drop=True) # numbering trips such that outbound escorting trips must come first and inbound trips must come last outbound_trip_num = -1 * ( @@ -539,7 +539,7 @@ def create_escortee_trips(bundles): # create a new trip for each escortee destination escortee_trips = escortee_trips.explode( ["destination", "escort_participants", "school_escort_trip_num", "purpose"] - ).reset_index() + ).reset_index(drop=True) # numbering trips such that outbound escorting trips must come first and inbound trips must come last # this comes in handy when merging trips to others in the tour decided downstream diff --git a/activitysim/abm/models/vehicle_allocation.py b/activitysim/abm/models/vehicle_allocation.py index 632839b116..8dfb35bfd4 100644 --- a/activitysim/abm/models/vehicle_allocation.py +++ b/activitysim/abm/models/vehicle_allocation.py @@ -261,6 +261,7 @@ def vehicle_allocation( ] # set choice for non-household vehicle option + choices["choice"] = choices["choice"].astype(veh_choice_dtype) choices.loc[ choices["alt_choice"] == alts_from_spec[-1], "choice" ] = alts_from_spec[-1] diff --git a/activitysim/cli/create.py b/activitysim/cli/create.py index b810cdcd2d..4275ad3a1d 100644 --- a/activitysim/cli/create.py +++ b/activitysim/cli/create.py @@ -2,6 +2,7 @@ import glob import hashlib +import importlib.resources import logging import os import shutil @@ -21,14 +22,15 @@ def _example_path(resource): resource = os.path.join(EXAMPLES_DIR, resource) - path = pkg_resources.resource_filename(PACKAGE, resource) - - return path + return importlib.resources.as_file( + importlib.resources.files(PACKAGE).joinpath(resource) + ) def _load_manifest(): - with open(_example_path(MANIFEST), "r") as f: - manifest = yaml.safe_load(f.read()) + with _example_path(MANIFEST) as f_pth: + with open(f_pth, "r") as f: + manifest = yaml.safe_load(f.read()) assert manifest, f"error: could not load {MANIFEST}" return {example["name"]: example for example in manifest} @@ -177,8 +179,9 @@ def get_example( ) else: - for asset_path in glob.glob(_example_path(assets)): - copy_asset(asset_path, target_path, dirs_exist_ok=True) + with _example_path(assets) as pth: + for asset_path in glob.glob(str(pth)): + copy_asset(asset_path, target_path, dirs_exist_ok=True) print(f"copied! new project files are in {os.path.abspath(dest_path)}") diff --git a/activitysim/core/assign.py b/activitysim/core/assign.py index 4054b2aec9..c5f69d1d12 100644 --- a/activitysim/core/assign.py +++ b/activitysim/core/assign.py @@ -96,7 +96,36 @@ def read_assignment_spec( """ try: - cfg = pd.read_csv(file_name, comment="#") + # we use an explicit list of na_values, these are the values that + # Pandas version 1.5 recognized as NaN by default. Notably absent is + # 'None' which is used in some spec files to be the object `None` not + # the float value NaN. + cfg = pd.read_csv( + file_name, + comment="#", + na_values=[ + "", + "#N/A", + "#N/A N/A", + "#NA", + "-1.#IND", + "-1.#QNAN", + "-NaN", + "-nan", + "1.#IND", + "1.#QNAN", + "", + "N/A", + "NA", + "NULL", + "NaN", + "n/a", + "nan", + "null", + ], + keep_default_na=False, + ) + except Exception as e: logger.error(f"Error reading spec file: {file_name}") logger.error(str(e)) diff --git a/activitysim/core/fast_eval.py b/activitysim/core/fast_eval.py new file mode 100644 index 0000000000..189db086c1 --- /dev/null +++ b/activitysim/core/fast_eval.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import pandas as pd +from pandas import eval as _eval + +if TYPE_CHECKING: + from collections.abc import Hashable, Iterator, Mapping, Sequence + + from pandas._typing import ArrayLike + + +def _get_cleaned_column_resolvers( + df: pd.DataFrame, raw: bool = True +) -> dict[Hashable, ArrayLike | pd.Series]: + """ + Return the special character free column resolvers of a dataframe. + + Column names with special characters are 'cleaned up' so that they can + be referred to by backtick quoting. + Used in :meth:`DataFrame.eval`. + """ + from pandas import Series + from pandas.core.computation.parsing import clean_column_name + + if isinstance(df, pd.Series): + return {clean_column_name(df.name): df} + + # CHANGED FROM PANDAS: do not even convert the arrays to pd.Series, just + # give the raw arrays to the compute engine. This is potentially a breaking + # change if any of the operations in the eval string require a pd.Series. + if raw: + # Performance tradeoff: in the dict below, we iterate over `df.items`, + # which yields tuples of (column_name, data as pd.Series). This is marginally + # slower than iterating over `df.columns` and `df._iter_column_arrays()`, + # but the latter is not in Pandas' public API, and may be removed in the future. + return { + clean_column_name(k): v for k, v in df.items() if not isinstance(k, int) + } + + # CHANGED FROM PANDAS: do not call df.dtype inside the dict comprehension loop + # This update has been made in https://github.com/pandas-dev/pandas/pull/59573, + # but appears not to have been released yet as of pandas 2.2.3 + dtypes = df.dtypes + + return { + clean_column_name(k): Series( + v, copy=False, index=df.index, name=k, dtype=dtypes[k] + ).__finalize__(df) + for k, v in zip(df.columns, df._iter_column_arrays()) + if not isinstance(k, int) + } + + +def fast_eval(df: pd.DataFrame, expr: str, **kwargs) -> Any | None: + """ + Evaluate a string describing operations on DataFrame columns. + + Operates on columns only, not specific rows or elements. This allows + `eval` to run arbitrary code, which can make you vulnerable to code + injection if you pass user input to this function. + + This function is a wrapper that replaces :meth:`~pandas.DataFrame.eval` + with a more efficient version than in the default pandas library (as + of pandas 2.2.3). It is recommended to use this function instead of + :meth:`~pandas.DataFrame.eval` for better performance. However, if you + encounter issues with this function, you can switch back to the default + pandas eval by changing the function call from `fast_eval(df, ...)` to + `df.eval(...)`. + + Parameters + ---------- + expr : str + The expression string to evaluate. + **kwargs + See the documentation for :meth:`~pandas.DataFrame.eval` for complete + details on the keyword arguments accepted. + + Returns + ------- + ndarray, scalar, or pandas object + The result of the evaluation. + """ + + inplace = False + kwargs["level"] = kwargs.pop("level", 0) + 1 + index_resolvers = df._get_index_resolvers() + column_resolvers = _get_cleaned_column_resolvers(df) + resolvers = column_resolvers, index_resolvers + if "target" not in kwargs: + kwargs["target"] = df + kwargs["resolvers"] = tuple(kwargs.get("resolvers", ())) + resolvers + + try: + return pd.Series( + _eval(expr, inplace=inplace, **kwargs), index=df.index, name=expr + ).__finalize__(df) + except Exception as e: + # Initially assume that the exception is caused by the potentially + # breaking change in _get_cleaned_column_resolvers, and try again + # TODO: what kind of exception should be caught here so it is less broad + column_resolvers = _get_cleaned_column_resolvers(df, raw=False) + resolvers = column_resolvers, index_resolvers + kwargs["resolvers"] = kwargs["resolvers"][:-2] + resolvers + return _eval(expr, inplace=inplace, **kwargs) diff --git a/activitysim/core/interaction_simulate.py b/activitysim/core/interaction_simulate.py index 1089de3edc..f14f0201d9 100644 --- a/activitysim/core/interaction_simulate.py +++ b/activitysim/core/interaction_simulate.py @@ -14,6 +14,7 @@ from activitysim.core import chunk, logit, simulate, tracing, util, workflow from activitysim.core.configuration.base import ComputeSettings +from activitysim.core.fast_eval import fast_eval logger = logging.getLogger(__name__) @@ -287,7 +288,7 @@ def to_series(x): if expr.startswith("@"): v = to_series(eval(expr[1:], globals(), locals_d)) else: - v = df.eval(expr, resolvers=[locals_d]) + v = fast_eval(df, expr, resolvers=[locals_d]) if check_for_variability and v.std() == 0: logger.info( @@ -556,7 +557,7 @@ def to_series(x): if expr.startswith("@"): v = to_series(eval(expr[1:], globals(), locals_d)) else: - v = df.eval(expr, resolvers=[locals_d]) + v = fast_eval(df, expr, resolvers=[locals_d]) if check_for_variability and v.std() == 0: logger.info( "%s: no variability (%s) in: %s" diff --git a/activitysim/core/los.py b/activitysim/core/los.py index 8586a3018c..5ac90f930e 100644 --- a/activitysim/core/los.py +++ b/activitysim/core/los.py @@ -780,7 +780,15 @@ def get_mazpairs(self, omaz, dmaz, attribute): self.maz_ceiling ) + np.asanyarray(dmaz, dtype=np.int64) else: - i = np.asanyarray(omaz) * self.maz_ceiling + np.asanyarray(dmaz) + # if we have less than a 32-bit index, it will + # overflow so we need to upgrade to at least 32 bit + omaz_as_array = np.asanyarray(omaz) + if omaz_as_array.dtype not in (np.int32, np.int64): + omaz_as_array = omaz_as_array.astype(np.int32) + dmaz_as_array = np.asanyarray(dmaz) + if dmaz_as_array.dtype not in (np.int32, np.int64): + dmaz_as_array = dmaz_as_array.astype(np.int32) + i = omaz_as_array * self.maz_ceiling + dmaz_as_array s = util.quick_loc_df(i, self.maz_to_maz_df, attribute) # FIXME - no point in returning series? diff --git a/activitysim/core/simulate.py b/activitysim/core/simulate.py index fcffcf6066..2afed7f014 100644 --- a/activitysim/core/simulate.py +++ b/activitysim/core/simulate.py @@ -32,6 +32,7 @@ TemplatedLogitComponentSettings, ) from activitysim.core.estimation import Estimator +from activitysim.core.fast_eval import fast_eval from activitysim.core.simulate_consts import ( ALT_LOSER_UTIL, SPEC_DESCRIPTION_NAME, @@ -656,7 +657,7 @@ def eval_utilities( if expr.startswith("@"): expression_value = eval(expr[1:], globals_dict, locals_dict) else: - expression_value = choosers.eval(expr) + expression_value = fast_eval(choosers, expr) if len(w) > 0: for wrn in w: @@ -913,7 +914,7 @@ def to_array(x): if expr.startswith("@"): expr_values = to_array(eval(expr[1:], globals_dict, locals_dict)) else: - expr_values = to_array(df.eval(expr)) + expr_values = to_array(fast_eval(df, expr)) # read model spec should ensure uniqueness, otherwise we should uniquify assert expr not in values values[expr] = expr_values diff --git a/activitysim/core/test/_tools.py b/activitysim/core/test/_tools.py index b8f338ce9f..618b467bb9 100644 --- a/activitysim/core/test/_tools.py +++ b/activitysim/core/test/_tools.py @@ -165,8 +165,8 @@ def progressive_checkpoint_test( if ref_target.exists(): try: state.checkpoint.check_against(ref_target, checkpoint_name=step_name) - except Exception: - print(f"> {name} {step_name}: ERROR") + except Exception as e: + print(f"> {name} {step_name}: ERROR {e}") raise else: print(f"> {name} {step_name}: ok") diff --git a/activitysim/core/util.py b/activitysim/core/util.py index eb874f2841..940796e4bd 100644 --- a/activitysim/core/util.py +++ b/activitysim/core/util.py @@ -289,7 +289,7 @@ def quick_loc_series(loc_list, target_series): left_on = "left" - if isinstance(loc_list, pd.Int64Index): + if isinstance(loc_list, pd.Index): left_df = pd.DataFrame({left_on: loc_list.values}) elif isinstance(loc_list, pd.Series): left_df = loc_list.to_frame(name=left_on) diff --git a/activitysim/core/workflow/state.py b/activitysim/core/workflow/state.py index dd81534605..45aa7a1f74 100644 --- a/activitysim/core/workflow/state.py +++ b/activitysim/core/workflow/state.py @@ -711,7 +711,18 @@ def get_pyarrow( if t is None: raise KeyError(tablename) if isinstance(t, pd.DataFrame): - t = pa.Table.from_pandas(t, preserve_index=True, columns=columns) + df = t + try: + t = pa.Table.from_pandas(df, preserve_index=True, columns=columns) + except (pa.ArrowTypeError, pa.ArrowInvalid): + # if there are object columns, try to convert them to categories + df = df.copy() + for k, dtype in df.dtypes.items(): + if dtype.kind == "O": + df[k] = df[k].astype("str") + elif dtype == "boolean": + df[k] = df[k].astype("str") + t = pa.Table.from_pandas(df, preserve_index=True, columns=columns) if isinstance(t, pa.Table): if columns is not None: t = t.select(columns) diff --git a/activitysim/estimation/larch/scheduling.py b/activitysim/estimation/larch/scheduling.py index c8600f8136..7d9461b5ca 100644 --- a/activitysim/estimation/larch/scheduling.py +++ b/activitysim/estimation/larch/scheduling.py @@ -8,6 +8,8 @@ import pandas as pd import yaml +from activitysim.core.fast_eval import fast_eval + from .general import ( apply_coefficients, construct_nesting_tree, @@ -210,7 +212,7 @@ def construct_availability_ca(model, chooser_data, alt_codes_to_names): ( chooser_data[i.data] if i.data in chooser_data - else chooser_data.eval(i.data) + else fast_eval(chooser_data, i.data) ) for i in model.utility_co[acode] if (i.param == "-999" or i.param == "-999.0") diff --git a/activitysim/estimation/larch/simple_simulate.py b/activitysim/estimation/larch/simple_simulate.py index 2c585506a4..e608a50084 100644 --- a/activitysim/estimation/larch/simple_simulate.py +++ b/activitysim/estimation/larch/simple_simulate.py @@ -7,6 +7,8 @@ import pandas as pd import yaml +from activitysim.core.fast_eval import fast_eval + from .general import ( apply_coefficients, construct_nesting_tree, @@ -43,7 +45,7 @@ def construct_availability(model, chooser_data, alt_codes_to_names): ( chooser_data[i.data] if i.data in chooser_data - else chooser_data.eval(i.data) + else fast_eval(chooser_data, i.data) ) for i in model.utility_co[acode] if i.param == "-999" diff --git a/activitysim/examples/placeholder_sandag/test/test_sandag.py b/activitysim/examples/placeholder_sandag/test/test_sandag.py index a907333ee0..cda0d08725 100644 --- a/activitysim/examples/placeholder_sandag/test/test_sandag.py +++ b/activitysim/examples/placeholder_sandag/test/test_sandag.py @@ -195,6 +195,57 @@ def test_2_zone(data): run_test(zone="2", multiprocess=False) +def test_2_zone_local_compute(data): + def _test_path(dirname): + return os.path.join(os.path.dirname(__file__), dirname) + + import activitysim.abm # register components # noqa: F401 + + state = workflow.State.make_default( + data_dir=example_path("data_2"), + configs_dir=( + _test_path("configs_2_zone"), + example_path("configs_2_zone"), + psrc_example_path("configs"), + ), + output_dir=_test_path("output_2"), + ) + state.run.all(resume_after=None) + # ## regress tours + regress_tours_df = pd.read_csv(_test_path(f"regress/final_2_zone_tours.csv")) + tours_df = pd.read_csv(_test_path(f"output_2/final_2_zone_tours.csv")) + tours_df.to_csv( + _test_path(f"regress/final_2_zone_tours_last_run_localcompute.csv"), index=False + ) + test.assert_frame_substantively_equal( + tours_df, regress_tours_df, rtol=1e-03, check_dtype=False + ) + + # ## regress trips + regress_trips_df = pd.read_csv(_test_path(f"regress/final_2_zone_trips.csv")) + trips_df = pd.read_csv(_test_path(f"output_2/final_2_zone_trips.csv")) + trips_df.to_csv(_test_path(f"regress/final_2_zone_trips_last_run.csv"), index=False) + test.assert_frame_substantively_equal( + trips_df, regress_trips_df, rtol=1e-03, check_dtype=False + ) + + # also test accessibility for the 2-zone system + regress_accessibility_df = pd.read_csv( + _test_path(f"regress/final_2_zone_proto_disaggregate_accessibility.csv") + ) + final_accessibility_df = pd.read_csv( + _test_path(f"output_2/final_2_zone_proto_disaggregate_accessibility.csv") + ) + final_accessibility_df = final_accessibility_df[ + [c for c in final_accessibility_df.columns if not c.startswith("_original_")] + ] + test.assert_frame_substantively_equal( + final_accessibility_df, + regress_accessibility_df, + check_dtype=False, + ) + + def test_2_zone_norecode(data): run_test(zone="2", multiprocess=False, recode=False) diff --git a/activitysim/examples/prototype_mtc_extended/configs/trip_mode_choice_annotate_trips_preprocessor.csv b/activitysim/examples/prototype_mtc_extended/configs/trip_mode_choice_annotate_trips_preprocessor.csv index fefe54a13d..df7059cd85 100644 --- a/activitysim/examples/prototype_mtc_extended/configs/trip_mode_choice_annotate_trips_preprocessor.csv +++ b/activitysim/examples/prototype_mtc_extended/configs/trip_mode_choice_annotate_trips_preprocessor.csv @@ -19,7 +19,7 @@ Description,Target,Expression ,tour_mode_is_drive_transit,i_tour_mode.isin(I_DRIVE_TRANSIT_MODES) ,tour_mode_is_ride_hail,i_tour_mode.isin(I_RIDE_HAIL_MODES) #,, -,selected_tour_vehicle,"reindex(tours.selected_vehicle, df.tour_id)" +,selected_tour_vehicle,"reindex(tours.selected_vehicle, df.tour_id).astype(vehicles.vehicle_type.dtype)" ,auto_op_cost,"reindex(vehicles.groupby('vehicle_type')['auto_operating_cost'].mean(), pd.Series(selected_tour_vehicle, df.index))" ,auto_op_cost,"np.where(pd.isna(auto_op_cost), costPerMile, auto_op_cost)" ,inbound,~df.outbound @@ -92,4 +92,4 @@ dest terminal time not counted at home,_dest_terminal_time,"np.where(inbound & l #,dist_bike,od_skims['DISTBIKE'] #,dist_only,od_skims['DIST'] # added for school escorting model,, -Number of school children in vehicle on trip,num_escortees,df.escort_participants.fillna('').apply(lambda x: len(x.split('_'))) \ No newline at end of file +Number of school children in vehicle on trip,num_escortees,df.escort_participants.fillna('').apply(lambda x: len(x.split('_'))) diff --git a/activitysim/examples/prototype_mtc_extended/test/prototype_mtc_extended_reference_pipeline.zip b/activitysim/examples/prototype_mtc_extended/test/prototype_mtc_extended_reference_pipeline.zip index 62fa47d659..41ed28ba26 100644 Binary files a/activitysim/examples/prototype_mtc_extended/test/prototype_mtc_extended_reference_pipeline.zip and b/activitysim/examples/prototype_mtc_extended/test/prototype_mtc_extended_reference_pipeline.zip differ diff --git a/activitysim/workflows/steps/contrast/transform_data.py b/activitysim/workflows/steps/contrast/transform_data.py index 05b777ea21..a3c8b11c2c 100644 --- a/activitysim/workflows/steps/contrast/transform_data.py +++ b/activitysim/workflows/steps/contrast/transform_data.py @@ -6,6 +6,8 @@ import pandas as pd from pypyr.context import Context +from activitysim.core.fast_eval import fast_eval + from ..progression import reset_progress_step from ..wrapping import workstep @@ -32,7 +34,7 @@ def transform_data( if eval is not None: for key, tableset in tablesets.items(): for target, expr in eval.items(): - tableset[tablename][target] = tableset[tablename].eval(expr) + tableset[tablename][target] = fast_eval(tableset[tablename], expr) return dict(tablesets=tablesets) # collect all series into a common vector, so bins are common diff --git a/conda-environments/activitysim-dev-base.yml b/conda-environments/activitysim-dev-base.yml index 489de345b3..74df60783d 100644 --- a/conda-environments/activitysim-dev-base.yml +++ b/conda-environments/activitysim-dev-base.yml @@ -21,7 +21,7 @@ dependencies: - bump2version # for making a release - coveralls - cytoolz = 0.12.* -- dask = 2023.3.* +- dask = 2023.11.* - descartes - filelock - fsspec @@ -40,11 +40,11 @@ dependencies: - nbmake - numba = 0.57.* - numexpr -- numpy = 1.23.* +- numpy = 1.24.* - numpydoc - openmatrix = 0.3.* - orca = 1.8 -- pandas = 1.4.* +- pandas = 2.2.* - pandera >= 0.15, <0.18.1 - platformdirs = 3.2.* - pre-commit @@ -55,7 +55,7 @@ dependencies: - pydata-sphinx-theme - pyinstrument = 4.4 - pypyr = 5.8.* -- pytables >=3.7 +- pytables >=3.9 - pytest = 7.2.* - pytest-cov - pytest-regressions @@ -72,9 +72,9 @@ dependencies: - sphinx = 6.1.* - sphinx_rtd_theme = 1.2.* - sphinx-argparse = 0.4.* -- xarray = 2023.2.* +- xarray = 2025.01.* - xmle -- zarr = 2.14.* +- zarr>=2,<3 - zstandard - pip: diff --git a/conda-environments/activitysim-dev.yml b/conda-environments/activitysim-dev.yml index b69a69f239..50d67cfc2a 100644 --- a/conda-environments/activitysim-dev.yml +++ b/conda-environments/activitysim-dev.yml @@ -17,7 +17,7 @@ dependencies: - bump2version # for making a release - coveralls - cytoolz = 0.12.* -- dask = 2023.3.* +- dask = 2023.11.* - descartes - filelock - fsspec @@ -36,11 +36,11 @@ dependencies: - nbmake - numba = 0.57.* - numexpr -- numpy = 1.23.* +- numpy = 1.24.* - numpydoc - openmatrix = 0.3.* - orca = 1.8 -- pandas = 1.4.* +- pandas = 2.2.* - pandera >= 0.15, <0.18.1 - platformdirs = 3.2.* - pre-commit @@ -51,7 +51,7 @@ dependencies: - pydata-sphinx-theme - pyinstrument = 4.4 - pypyr = 5.8.* -- pytables >=3.7 +- pytables >=3.9 - pytest = 7.2.* - pytest-cov - pytest-regressions @@ -69,9 +69,9 @@ dependencies: - sphinx = 6.1.* - sphinx_rtd_theme = 1.2.* - sphinx-argparse = 0.4.* -- xarray = 2023.2.* +- xarray = 2025.01.* - xmle -- zarr = 2.14.* +- zarr>=2,<3 - zstandard - pip: diff --git a/conda-environments/docbuild.yml b/conda-environments/docbuild.yml index df20b77291..275709c4e3 100644 --- a/conda-environments/docbuild.yml +++ b/conda-environments/docbuild.yml @@ -39,7 +39,7 @@ dependencies: - pyarrow >= 2.0,<19 - pydantic = 2.6.* - pypyr >= 5.3 -- pytables >=3.7 +- pytables >=3.9 - pytest - pytest-cov - pytest-regressions @@ -54,8 +54,8 @@ dependencies: - sphinx-copybutton - sphinx-remove-toctrees - sphinx_rtd_theme -- xarray >= 0.21 -- zarr +- xarray = 2025.01.* +- zarr>=2,<3 - pip: - autodoc_pydantic diff --git a/conda-environments/github-actions-tests.yml b/conda-environments/github-actions-tests.yml index 4489cba886..019bcccda3 100644 --- a/conda-environments/github-actions-tests.yml +++ b/conda-environments/github-actions-tests.yml @@ -10,22 +10,22 @@ dependencies: - black = 22.12.0 - coveralls = 3.3.1 - cytoolz = 0.12.2 -- dask = 2023.3.2 +- dask = 2023.11.* - isort = 5.12.0 - multimethod <2.0 - nbmake = 1.4.6 - numba = 0.57.* -- numpy = 1.23.5 +- numpy = 1.24.* - openmatrix = 0.3.5.0 - orca = 1.8 - pandera >= 0.15, <0.18.1 -- pandas = 1.4.* +- pandas = 2.2.* - platformdirs = 3.2.* - psutil = 5.9.* - pyarrow = 11.* - pydantic = 2.6.* - pypyr = 5.8.* -- pytables >= 3.7 +- pytables >= 3.9 - pytest = 7.2.* - pytest-cov - pytest-regressions @@ -36,6 +36,6 @@ dependencies: - sharrow >= 2.9.1 - simwrapper > 1.7 - sparse -- xarray = 2023.2.* -- zarr = 2.14.* +- xarray = 2025.01.* +- zarr>=2,<3 - zstandard diff --git a/pyproject.toml b/pyproject.toml index 693a2ac03d..6a76cacb26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ dependencies = [ "numba >= 0.57", "numpy >= 1.16.1, <2", "openmatrix >= 0.3.4.1", - "pandas >= 1.4, <2", + "pandas >= 2.2", "pandera >=0.15, <0.18.1", "platformdirs", "psutil >= 4.1", @@ -29,7 +29,7 @@ dependencies = [ "simwrapper > 1.7", "sparse", "tables >= 3.9", - "xarray >= 0.21" + "xarray >= 2024.05", ] readme = "README.md" requires-python = ">=3.10"