ActivitySim · jpn-- · Apr 19, 2025 · Apr 19, 2025 · Apr 19, 2025 · Apr 19, 2025
diff --git a/.github/workflows/core_tests.yml b/.github/workflows/core_tests.yml
@@ -321,6 +321,39 @@ jobs:
         run: |
           uv run pytest activitysim/estimation/test/test_larch_estimation.py --durations=0
 
+  expression-profiling:
+    needs: foundation
+    env:
+      python-version: "3.10"
+      label: win-64
+    defaults:
+      run:
+        shell: pwsh
+    name: Expression Profiling Tests
+    runs-on: windows-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          version: "0.7.12"
+          enable-cache: true
+          cache-dependency-glob: "uv.lock"
+
+      - name: "Set up Python"
+        uses: actions/setup-python@v5
+        with:
+          python-version-file: ".python-version"
+
+      - name: Install activitysim
+        run: |
+          uv sync --locked --only-group github-action
+
+      - name: Test Expression Profiling
+        run: |
+          uv run pytest test/test_expression_profiling.py
+
   develop-docbuild:
     needs: foundation
     if: github.ref_name == 'main' || github.ref_name == 'docs-fix'

diff --git a/activitysim/abm/models/summarize.py b/activitysim/abm/models/summarize.py
@@ -4,11 +4,12 @@
 
 import logging
 import os
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
 
-from activitysim.core import expressions, workflow
+from activitysim.core import expressions, timing, workflow
 from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable
 from activitysim.core.los import Network_LOS
 
@@ -353,6 +354,12 @@ def summarize(
         }
     )
 
+    if state.settings.expression_profile:
+        perf_log_file = Path(trace_label + ".log")
+    else:
+        perf_log_file = None
+    performance_timer = timing.EvalTiming(perf_log_file)
+
     for i, row in spec.iterrows():
         out_file = row["Output"]
         expr = row["Expression"]
@@ -361,15 +368,19 @@ def summarize(
         if out_file.startswith("_"):
             logger.debug(f"Temp Variable: {expr} -> {out_file}")
 
-            locals_d[out_file] = eval(expr, globals(), locals_d)
+            with performance_timer.time_expression(expr):
+                locals_d[out_file] = eval(expr, globals(), locals_d)
             continue
 
         logger.debug(f"Summary: {expr} -> {out_file}.csv")
 
-        resultset = eval(expr, globals(), locals_d)
+        with performance_timer.time_expression(expr):
+            resultset = eval(expr, globals(), locals_d)
         resultset.to_csv(
             state.get_output_file_path(
                 os.path.join(output_location, f"{out_file}.csv")
             ),
             index=False,
         )
+
+    performance_timer.write_log(state)
diff --git a/activitysim/cli/run.py b/activitysim/cli/run.py
@@ -8,10 +8,11 @@
 import os
 import sys
 import warnings
+from datetime import datetime
 
 import numpy as np
 
-from activitysim.core import chunk, config, mem, tracing, workflow
+from activitysim.core import chunk, config, mem, timing, tracing, workflow
 from activitysim.core.configuration import FileSystem, Settings
 
 from activitysim.abm.models.settings_checker import check_model_settings
@@ -27,6 +28,7 @@
     "cache_dir",
     "settings_file_name",
     "imported_extensions",
+    "run_timestamp",
 ]
 
 
@@ -281,6 +283,14 @@ def run(args):
     if state.settings.rotate_logs:
         state.logging.rotate_log_directory()
 
+    # set a run timestamp
+    timestamp = state.get("run_timestamp", None)
+    if timestamp is None:
+        # if no run timestamp, use current time, and store it so
+        # it can be used later in the same run
+        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+        state.set("run_timestamp", timestamp)
+
     if state.settings.memory_profile and not state.settings.multiprocess:
         # Memory sidecar is only useful for single process runs
         # multiprocess runs log memory usage without blocking in the controlling process.
@@ -450,6 +460,13 @@ def run(args):
     if memory_sidecar_process:
         memory_sidecar_process.stop()
 
+    if state.settings.expression_profile:
+        # generate a summary of slower expression evaluation times
+        # across all models and write to a file
+        analyze = timing.AnalyzeEvalTiming(state)
+        analyze.component_report(style=state.settings.expression_profile_style)
+        analyze.subcomponent_report(style=state.settings.expression_profile_style)
+
     return 0
 
 

diff --git a/activitysim/core/assign.py b/activitysim/core/assign.py
@@ -5,11 +5,12 @@
 import logging
 from builtins import object, zip
 from collections import OrderedDict
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
 
-from activitysim.core import chunk, util, workflow
+from activitysim.core import chunk, timing, util, workflow
 
 logger = logging.getLogger(__name__)
 
@@ -275,6 +276,12 @@ def to_series(x):
 
     assert assignment_expressions.shape[0] > 0
 
+    if state.settings.expression_profile:
+        perf_log_file = Path(trace_label + ".log")
+    else:
+        perf_log_file = None
+    performance_timer = timing.EvalTiming(perf_log_file)
+
     trace_assigned_locals = trace_results = None
     if trace_rows is not None:
         # convert to numpy array so we can slice ndarrays as well as series
@@ -311,24 +318,25 @@ def to_series(x):
             n_randoms += 1
             assignment_expressions.loc[expression_idx, "expression"] = expression
     if n_randoms:
-        try:
-            random_draws = state.get_rn_generator().normal_for_df(
-                df, broadcast=True, size=n_randoms
-            )
-        except RuntimeError:
-            pass
-        else:
-            _locals_dict["random_draws"] = random_draws
+        with performance_timer.time_expression("<RANDOM DRAWS>"):
+            try:
+                random_draws = state.get_rn_generator().normal_for_df(
+                    df, broadcast=True, size=n_randoms
+                )
+            except RuntimeError:
+                pass
+            else:
+                _locals_dict["random_draws"] = random_draws
 
-            def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False):
-                if scale:
-                    x = 1 + ((sigma * sigma) / (mu * mu))
-                    mu = np.log(mu / (np.sqrt(x)))
-                    sigma = np.sqrt(np.log(x))
-                assert broadcast
-                return np.exp(random_draws * sigma + mu)
+                def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False):
+                    if scale:
+                        x = 1 + ((sigma * sigma) / (mu * mu))
+                        mu = np.log(mu / (np.sqrt(x)))
+                        sigma = np.sqrt(np.log(x))
+                    assert broadcast
+                    return np.exp(random_draws * sigma + mu)
 
-            _locals_dict["rng_lognormal"] = rng_lognormal
+                _locals_dict["rng_lognormal"] = rng_lognormal
 
     sharrow_enabled = state.settings.sharrow
 
@@ -356,7 +364,8 @@ def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False):
 
         if is_temp_singular(target) or is_throwaway(target):
             try:
-                x = eval(expression, globals(), _locals_dict)
+                with performance_timer.time_expression(expression):
+                    x = eval(expression, globals(), _locals_dict)
             except Exception as err:
                 logger.error(
                     "assign_variables error: %s: %s", type(err).__name__, str(err)
@@ -384,7 +393,8 @@ def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False):
 
             # FIXME should whitelist globals for security?
             globals_dict = {}
-            expr_values = to_series(eval(expression, globals_dict, _locals_dict))
+            with performance_timer.time_expression(expression):
+                expr_values = to_series(eval(expression, globals_dict, _locals_dict))
 
             if sharrow_enabled:
                 if isinstance(expr_values.dtype, pd.api.types.CategoricalDtype):
@@ -459,4 +469,5 @@ def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False):
         inplace=True,
     )
 
+    performance_timer.write_log(state)
     return variables, trace_results, trace_assigned_locals
diff --git a/activitysim/core/configuration/base.py b/activitysim/core/configuration/base.py
@@ -234,6 +234,28 @@ def pandas_option_context(self):
         else:
             yield
 
+    performance_log: Path | bool | None = None
+    """Log runtime performance to this file.
+
+    The runtime performance log shows the time taken to evaluate each
+    expression in the specification files.  It is useful for debugging
+    performance issues with complex expressions.
+
+    Giving a filename here will override the global performance logging
+    setting, and will log performance to the specified file, unless the
+    global performance logging setting is set to `False`, in which case
+    no performance logging will be done for any component.
+
+    If set to `True`, performance logging will be activated, and the filename
+    will be chosen based on the trace label for this component.
+
+    If this path is set to None (the default), performance logging will be
+    activated based on the global performance logging setting, and the filename
+    will be chosen based on the trace label for this component.  This is strongly
+    recommended for most use cases, unless the trace label causes the filename
+    to be too long or otherwise unsuitable for the filesystem.
+    """
+
     def subcomponent_settings(self, subcomponent: str) -> ComputeSettings:
         """Get the sharrow settings for a particular subcomponent."""
         return ComputeSettings(

diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py
@@ -537,6 +537,60 @@ def _check_store_skims_in_shm(self):
     own that pollutes the collected data.
     """
 
+    expression_profile: bool | None = None
+    """
+    Track the runtime of each individual expression in each spec file.
+
+    .. versionadded:: 1.4
+
+    The default value of this setting is `None`, which sets no global control
+    of expression profiling, and allows this feature to be turned on or off
+    for individual components.  If set to `True`, all components will have
+    expression profiling enabled, and the outputs will be written to files named
+    based on the trace label unless explicitly set in the `compute_settings` for
+    individual components.  If set to `False`, all components will have expression
+    profiling disabled.
+
+    This is generally a developer-only feature and not needed for regular usage.
+    It will add some overhead to the model run, which is only valuable if you
+    expect the review the expression runtimes with an eye towards improving them.
+    Production model users should typically have this set explicitly to `False`.
+    """
+
+    expression_profile_style: Literal["simple", "grid"] = "grid"
+    """
+    The style of the expression profiling report.
+
+    .. versionadded:: 1.4
+
+    This setting controls the style of the expression profiling report that is
+    generated at the end of a model run when :py:attr:`expression_profile` is
+    `True`. The report summarizes the runtime of each expression in each spec
+    file, and can be used to identify slow or inefficient expressions.
+
+    The "simple" style generates a simple HTML table with the expression names,
+    runtimes, and other relevant information. This may be easier to import into
+    other tools (e.g. Excel) for further analysis if desired. The "grid" style
+    generates a JavaScript data grid that allows for sorting and filtering of the
+    expression runtimes, making it easier to analyze large sets of expressions
+    directly in a web browser with no other outside analysis tools.
+    """
+
+    expression_profile_cutoff: float = 0.1
+    """
+    Minimum runtime for an expression to be included in the expression profile.
+
+    .. versionadded:: 1.4
+
+    Expressions that take less than this amount of time to evaluate will not be
+    included in the summary report of expression profiling generated at the end
+    of a model run. For large scale models, this value can be increased to make
+    the report file smaller, as only the largest values will typically be of
+    interest.
+
+    This setting has no effect if :py:attr:`expression_profile` is not `True`.
+    """
+
     benchmarking: bool = False
     """
     Flag this model run as a benchmarking run.

diff --git a/activitysim/core/expressions.py b/activitysim/core/expressions.py
@@ -139,6 +139,7 @@ def compute_columns(
         df,
         _locals_dict,
         trace_rows=state.tracing.trace_targets(df),
+        trace_label=trace_label,
     )
 
     if trace_results is not None: