Skip to content

Expression profiling (legacy mode) #936

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 28 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
c6e2194
expression profiling
jpn-- Apr 19, 2025
f1e1971
two reports
jpn-- Apr 19, 2025
770b336
make simple report sortable
jpn-- Apr 19, 2025
136a12c
change default to grid
jpn-- Apr 19, 2025
642b486
configurable expression_profile_cutoff
jpn-- Apr 19, 2025
877f279
works with MP
jpn-- Apr 19, 2025
25564f0
documentation
jpn-- Apr 20, 2025
219541d
Merge branch 'main' into expression-profiling
jpn-- May 20, 2025
d9d11ae
Merge branch 'main' into expression-profiling
jpn-- May 31, 2025
a7761a1
Merge branch 'main' into expression-profiling
jpn-- Jun 27, 2025
4f8a54a
Merge branch 'main' into expression-profiling
jpn-- Jul 17, 2025
ea7914b
Merge branch 'main' into expression-profiling
jpn-- Jul 21, 2025
0dad230
expression_profile_style
jpn-- Jul 21, 2025
e2b1330
performance_log can be bool
jpn-- Jul 21, 2025
97cc401
fix for pandas 2
jpn-- Jul 21, 2025
a482807
test expr profiling
jpn-- Jul 21, 2025
803f04d
test expression profiling on mtc and semcog examples
jpn-- Jul 25, 2025
fd440bd
set encoding to utf8 because windows
jpn-- Jul 25, 2025
6aaed14
add missing import
jpn-- Jul 25, 2025
77af2b3
add warning about path length in error message
jpn-- Jul 25, 2025
b8602b0
oops, .log not .csv
jpn-- Jul 25, 2025
29d535a
Merge branch 'main' into expression-profiling
jpn-- Jul 29, 2025
1ed0396
add timestamping to expression profile logs
jpn-- Aug 4, 2025
afa1f64
Merge branch 'main' into expression-profiling
jpn-- Aug 4, 2025
e9feab5
fix file naming, update tests
jpn-- Aug 4, 2025
db7b767
all subprocesses to same timestamp
jpn-- Aug 13, 2025
f2dd341
Merge branch 'main' into expression-profiling
jpn-- Aug 13, 2025
cd7eaac
Merge branch 'main' into expression-profiling
jpn-- Aug 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions .github/workflows/core_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,39 @@ jobs:
run: |
uv run pytest activitysim/estimation/test/test_larch_estimation.py --durations=0

expression-profiling:
needs: foundation
env:
python-version: "3.10"
label: win-64
defaults:
run:
shell: pwsh
name: Expression Profiling Tests
runs-on: windows-latest
steps:
- uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v5
with:
version: "0.7.12"
enable-cache: true
cache-dependency-glob: "uv.lock"

- name: "Set up Python"
uses: actions/setup-python@v5
with:
python-version-file: ".python-version"

- name: Install activitysim
run: |
uv sync --locked --only-group github-action

- name: Test Expression Profiling
run: |
uv run pytest test/test_expression_profiling.py

develop-docbuild:
needs: foundation
if: github.ref_name == 'main' || github.ref_name == 'docs-fix'
Expand Down
17 changes: 14 additions & 3 deletions activitysim/abm/models/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@

import logging
import os
from pathlib import Path

import numpy as np
import pandas as pd

from activitysim.core import expressions, workflow
from activitysim.core import expressions, timing, workflow
from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable
from activitysim.core.los import Network_LOS

Expand Down Expand Up @@ -353,6 +354,12 @@ def summarize(
}
)

if state.settings.expression_profile:
perf_log_file = Path(trace_label + ".log")
else:
perf_log_file = None
performance_timer = timing.EvalTiming(perf_log_file)

for i, row in spec.iterrows():
out_file = row["Output"]
expr = row["Expression"]
Expand All @@ -361,15 +368,19 @@ def summarize(
if out_file.startswith("_"):
logger.debug(f"Temp Variable: {expr} -> {out_file}")

locals_d[out_file] = eval(expr, globals(), locals_d)
with performance_timer.time_expression(expr):
locals_d[out_file] = eval(expr, globals(), locals_d)
continue

logger.debug(f"Summary: {expr} -> {out_file}.csv")

resultset = eval(expr, globals(), locals_d)
with performance_timer.time_expression(expr):
resultset = eval(expr, globals(), locals_d)
resultset.to_csv(
state.get_output_file_path(
os.path.join(output_location, f"{out_file}.csv")
),
index=False,
)

performance_timer.write_log(state)
19 changes: 18 additions & 1 deletion activitysim/cli/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
import os
import sys
import warnings
from datetime import datetime

import numpy as np

from activitysim.core import chunk, config, mem, tracing, workflow
from activitysim.core import chunk, config, mem, timing, tracing, workflow
from activitysim.core.configuration import FileSystem, Settings

from activitysim.abm.models.settings_checker import check_model_settings
Expand All @@ -27,6 +28,7 @@
"cache_dir",
"settings_file_name",
"imported_extensions",
"run_timestamp",
]


Expand Down Expand Up @@ -281,6 +283,14 @@ def run(args):
if state.settings.rotate_logs:
state.logging.rotate_log_directory()

# set a run timestamp
timestamp = state.get("run_timestamp", None)
if timestamp is None:
# if no run timestamp, use current time, and store it so
# it can be used later in the same run
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
state.set("run_timestamp", timestamp)

if state.settings.memory_profile and not state.settings.multiprocess:
# Memory sidecar is only useful for single process runs
# multiprocess runs log memory usage without blocking in the controlling process.
Expand Down Expand Up @@ -450,6 +460,13 @@ def run(args):
if memory_sidecar_process:
memory_sidecar_process.stop()

if state.settings.expression_profile:
# generate a summary of slower expression evaluation times
# across all models and write to a file
analyze = timing.AnalyzeEvalTiming(state)
analyze.component_report(style=state.settings.expression_profile_style)
analyze.subcomponent_report(style=state.settings.expression_profile_style)

return 0


Expand Down
49 changes: 30 additions & 19 deletions activitysim/core/assign.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
import logging
from builtins import object, zip
from collections import OrderedDict
from pathlib import Path

import numpy as np
import pandas as pd

from activitysim.core import chunk, util, workflow
from activitysim.core import chunk, timing, util, workflow

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -275,6 +276,12 @@ def to_series(x):

assert assignment_expressions.shape[0] > 0

if state.settings.expression_profile:
perf_log_file = Path(trace_label + ".log")
else:
perf_log_file = None
performance_timer = timing.EvalTiming(perf_log_file)

trace_assigned_locals = trace_results = None
if trace_rows is not None:
# convert to numpy array so we can slice ndarrays as well as series
Expand Down Expand Up @@ -311,24 +318,25 @@ def to_series(x):
n_randoms += 1
assignment_expressions.loc[expression_idx, "expression"] = expression
if n_randoms:
try:
random_draws = state.get_rn_generator().normal_for_df(
df, broadcast=True, size=n_randoms
)
except RuntimeError:
pass
else:
_locals_dict["random_draws"] = random_draws
with performance_timer.time_expression("<RANDOM DRAWS>"):
try:
random_draws = state.get_rn_generator().normal_for_df(
df, broadcast=True, size=n_randoms
)
except RuntimeError:
pass
else:
_locals_dict["random_draws"] = random_draws

def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False):
if scale:
x = 1 + ((sigma * sigma) / (mu * mu))
mu = np.log(mu / (np.sqrt(x)))
sigma = np.sqrt(np.log(x))
assert broadcast
return np.exp(random_draws * sigma + mu)
def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False):
if scale:
x = 1 + ((sigma * sigma) / (mu * mu))
mu = np.log(mu / (np.sqrt(x)))
sigma = np.sqrt(np.log(x))
assert broadcast
return np.exp(random_draws * sigma + mu)

_locals_dict["rng_lognormal"] = rng_lognormal
_locals_dict["rng_lognormal"] = rng_lognormal

sharrow_enabled = state.settings.sharrow

Expand Down Expand Up @@ -356,7 +364,8 @@ def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False):

if is_temp_singular(target) or is_throwaway(target):
try:
x = eval(expression, globals(), _locals_dict)
with performance_timer.time_expression(expression):
x = eval(expression, globals(), _locals_dict)
except Exception as err:
logger.error(
"assign_variables error: %s: %s", type(err).__name__, str(err)
Expand Down Expand Up @@ -384,7 +393,8 @@ def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False):

# FIXME should whitelist globals for security?
globals_dict = {}
expr_values = to_series(eval(expression, globals_dict, _locals_dict))
with performance_timer.time_expression(expression):
expr_values = to_series(eval(expression, globals_dict, _locals_dict))

if sharrow_enabled:
if isinstance(expr_values.dtype, pd.api.types.CategoricalDtype):
Expand Down Expand Up @@ -459,4 +469,5 @@ def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False):
inplace=True,
)

performance_timer.write_log(state)
return variables, trace_results, trace_assigned_locals
22 changes: 22 additions & 0 deletions activitysim/core/configuration/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,28 @@ def pandas_option_context(self):
else:
yield

performance_log: Path | bool | None = None
"""Log runtime performance to this file.

The runtime performance log shows the time taken to evaluate each
expression in the specification files. It is useful for debugging
performance issues with complex expressions.

Giving a filename here will override the global performance logging
setting, and will log performance to the specified file, unless the
global performance logging setting is set to `False`, in which case
no performance logging will be done for any component.

If set to `True`, performance logging will be activated, and the filename
will be chosen based on the trace label for this component.

If this path is set to None (the default), performance logging will be
activated based on the global performance logging setting, and the filename
will be chosen based on the trace label for this component. This is strongly
recommended for most use cases, unless the trace label causes the filename
to be too long or otherwise unsuitable for the filesystem.
"""

def subcomponent_settings(self, subcomponent: str) -> ComputeSettings:
"""Get the sharrow settings for a particular subcomponent."""
return ComputeSettings(
Expand Down
54 changes: 54 additions & 0 deletions activitysim/core/configuration/top.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,60 @@ def _check_store_skims_in_shm(self):
own that pollutes the collected data.
"""

expression_profile: bool | None = None
"""
Track the runtime of each individual expression in each spec file.

.. versionadded:: 1.4

The default value of this setting is `None`, which sets no global control
of expression profiling, and allows this feature to be turned on or off
for individual components. If set to `True`, all components will have
expression profiling enabled, and the outputs will be written to files named
based on the trace label unless explicitly set in the `compute_settings` for
individual components. If set to `False`, all components will have expression
profiling disabled.

This is generally a developer-only feature and not needed for regular usage.
It will add some overhead to the model run, which is only valuable if you
expect the review the expression runtimes with an eye towards improving them.
Production model users should typically have this set explicitly to `False`.
"""

expression_profile_style: Literal["simple", "grid"] = "grid"
"""
The style of the expression profiling report.

.. versionadded:: 1.4

This setting controls the style of the expression profiling report that is
generated at the end of a model run when :py:attr:`expression_profile` is
`True`. The report summarizes the runtime of each expression in each spec
file, and can be used to identify slow or inefficient expressions.

The "simple" style generates a simple HTML table with the expression names,
runtimes, and other relevant information. This may be easier to import into
other tools (e.g. Excel) for further analysis if desired. The "grid" style
generates a JavaScript data grid that allows for sorting and filtering of the
expression runtimes, making it easier to analyze large sets of expressions
directly in a web browser with no other outside analysis tools.
"""

expression_profile_cutoff: float = 0.1
"""
Minimum runtime for an expression to be included in the expression profile.

.. versionadded:: 1.4

Expressions that take less than this amount of time to evaluate will not be
included in the summary report of expression profiling generated at the end
of a model run. For large scale models, this value can be increased to make
the report file smaller, as only the largest values will typically be of
interest.

This setting has no effect if :py:attr:`expression_profile` is not `True`.
"""

benchmarking: bool = False
"""
Flag this model run as a benchmarking run.
Expand Down
1 change: 1 addition & 0 deletions activitysim/core/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def compute_columns(
df,
_locals_dict,
trace_rows=state.tracing.trace_targets(df),
trace_label=trace_label,
)

if trace_results is not None:
Expand Down
Loading
Loading