diff --git a/doc/cli.rst b/doc/cli.rst index 7c4229e5..3cac1f4a 100644 --- a/doc/cli.rst +++ b/doc/cli.rst @@ -491,6 +491,7 @@ Usage [--duplicate DUPLICATE] [-s SETUP] [--teardown TEARDOWN] + [--profile PROFILE] stmt [stmt ...] Options: @@ -512,6 +513,7 @@ Options: * ``--duplicate=DUPLICATE``: Duplicate statements (``stmt`` statements, not ``SETUP``) to reduce the overhead of the outer loop and multiply inner loops by DUPLICATE (see ``--inner-loops`` option). +* ``--profile=PROFILE``: Run the benchmark inside the cProfile profiler and output to the given file. This is a convenient way to profile a specific benchmark, but it will make the actual benchmark timings much less accurate. .. note:: timeit ``-n`` (number) and ``-r`` (repeat) options become ``-l`` (loops) and diff --git a/pyperf/_manager.py b/pyperf/_manager.py index f29af1fa..d478708d 100644 --- a/pyperf/_manager.py +++ b/pyperf/_manager.py @@ -66,6 +66,9 @@ def worker_cmd(self, calibrate_loops, calibrate_warmups, wpipe): if args.track_memory: cmd.append('--track-memory') + if args.profile: + cmd.extend(['--profile', args.profile]) + if self.runner._add_cmdline_args: self.runner._add_cmdline_args(cmd, args) diff --git a/pyperf/_process_time.py b/pyperf/_process_time.py index d3589b28..7010195d 100644 --- a/pyperf/_process_time.py +++ b/pyperf/_process_time.py @@ -17,6 +17,7 @@ import os import subprocess import sys +import tempfile import time try: @@ -35,11 +36,30 @@ def get_max_rss(): return 0 -def bench_process(loops, args, kw): +def merge_profile_stats_files(src, dst): + """ + Merging one existing pstats file into another. + """ + import pstats + if os.path.isfile(dst): + src_stats = pstats.Stats(src) + dst_stats = pstats.Stats(dst) + dst_stats.add(src_stats) + dst_stats.dump_stats(dst) + os.unlink(src) + else: + os.rename(src, dst) + + +def bench_process(loops, args, kw, profile_filename=None): max_rss = 0 range_it = range(loops) start_time = time.perf_counter() + if profile_filename: + temp_profile_filename = tempfile.mktemp() + args = [args[0], "-m", "cProfile", "-o", temp_profile_filename] + args[1:] + for loop in range_it: start_rss = get_max_rss() @@ -51,6 +71,8 @@ def bench_process(loops, args, kw): if exitcode != 0: print("Command failed with exit code %s" % exitcode, file=sys.stderr) + if profile_filename: + os.unlink(temp_profile_filename) sys.exit(exitcode) proc = None @@ -58,6 +80,11 @@ def bench_process(loops, args, kw): rss = get_max_rss() - start_rss max_rss = max(max_rss, rss) + if profile_filename: + merge_profile_stats_files( + temp_profile_filename, profile_filename + ) + dt = time.perf_counter() - start_time return (dt, max_rss) @@ -70,10 +97,18 @@ def main(): sys.exit(1) if len(sys.argv) < 3: - print("Usage: %s %s loops program [arg1 arg2 ...]" + print("Usage: %s %s loops program [arg1 arg2 ...] [--profile profile]" % (os.path.basename(sys.executable), __file__)) sys.exit(1) + if "--profile" in sys.argv: + profile_idx = sys.argv.index("--profile") + profile_filename = sys.argv[profile_idx + 1] + del sys.argv[profile_idx] + del sys.argv[profile_idx] + else: + profile_filename = None + loops = int(sys.argv[1]) args = sys.argv[2:] @@ -88,7 +123,7 @@ def main(): kw['stdout'] = devnull kw['stderr'] = subprocess.STDOUT - dt, max_rss = bench_process(loops, args, kw) + dt, max_rss = bench_process(loops, args, kw, profile_filename) if devnull is not None: devnull.close() diff --git a/pyperf/_runner.py b/pyperf/_runner.py index b70e3e52..6c0d9e2b 100644 --- a/pyperf/_runner.py +++ b/pyperf/_runner.py @@ -12,7 +12,8 @@ set_highest_priority) from pyperf._formatter import format_timedelta from pyperf._utils import (MS_WINDOWS, MAC_OS, abs_executable, - WritePipe, get_python_names) + WritePipe, get_python_names, + merge_profile_stats) from pyperf._worker import WorkerProcessTask @@ -48,6 +49,19 @@ def parse_python_names(names): return parts +def profiling_wrapper(func): + """ + Wrap a function to collect profiling. + """ + import cProfile + profiler = cProfile.Profile() + + def profiling_func(*args): + return profiler.runcall(func, *args) + + return profiler, profiling_func + + class CLIError(Exception): pass @@ -222,6 +236,11 @@ def __init__(self, values=None, warmups=None, processes=None, 'PYTHON as CHANGED_NAME ' 'and REF_PYTHON as REF_NAME in results') + parser.add_argument('--profile', + type=str, + help='Collect profile data using cProfile ' + 'and output to the given file.') + memory = parser.add_mutually_exclusive_group() memory.add_argument('--tracemalloc', action="store_true", help='Trace memory allocations using tracemalloc') @@ -454,12 +473,21 @@ def bench_time_func(self, name, time_func, *args, **kwargs): if not self._check_worker_task(): return None + if self.args.profile: + profiler, time_func = profiling_wrapper(time_func) + def task_func(task, loops): return time_func(loops, *args) task = WorkerProcessTask(self, name, task_func, metadata) + task.inner_loops = inner_loops - return self._main(task) + result = self._main(task) + + if self.args.profile: + merge_profile_stats(profiler, self.args.profile) + + return result def bench_func(self, name, func, *args, **kwargs): """"Benchmark func(*args).""" @@ -474,6 +502,9 @@ def bench_func(self, name, func, *args, **kwargs): if args: func = functools.partial(func, *args) + if self.args.profile: + profiler, func = profiling_wrapper(func) + def task_func(task, loops): # use fast local variables local_timer = time.perf_counter @@ -494,7 +525,12 @@ def task_func(task, loops): task = WorkerProcessTask(self, name, task_func, metadata) task.inner_loops = inner_loops - return self._main(task) + result = self._main(task) + + if self.args.profile: + merge_profile_stats(profiler, self.args.profile) + + return result def bench_async_func(self, name, func, *args, **kwargs): """Benchmark await func(*args)""" @@ -509,6 +545,9 @@ def bench_async_func(self, name, func, *args, **kwargs): if args: func = functools.partial(func, *args) + if self.args.profile: + profiler, func = profiling_wrapper(func) + def task_func(task, loops): if loops != 1: async def main(): @@ -549,7 +588,12 @@ async def main(): task = WorkerProcessTask(self, name, task_func, metadata) task.inner_loops = inner_loops - return self._main(task) + result = self._main(task) + + if self.args.profile: + merge_profile_stats(profiler, self.args.profile) + + return result def timeit(self, name, stmt=None, setup="pass", teardown="pass", inner_loops=None, duplicate=None, metadata=None, globals=None): @@ -671,6 +715,9 @@ def bench_command(self, name, command): if not self._check_worker_task(): return None + if self.args.profile: + command.extend(["--profile", self.args.profile]) + # Use lazy import to limit imports on 'import pyperf' from pyperf._command import BenchCommandTask task = BenchCommandTask(self, name, command) diff --git a/pyperf/_utils.py b/pyperf/_utils.py index 05dc54a6..99225466 100644 --- a/pyperf/_utils.py +++ b/pyperf/_utils.py @@ -405,3 +405,24 @@ def geometric_mean(data): if not data: raise ValueError("empty data") return _geometric_mean(data) + + +def merge_profile_stats(profiler, dst): + """ + Save pstats by merging into an existing file. + """ + import pstats + if os.path.isfile(dst): + try: + src_stats = pstats.Stats(profiler) + except TypeError: + # If no actual stats were collected, a TypeError is raised + # and we don't need to merge anything into the output. + return + + dst_stats = pstats.Stats(dst) + dst_stats.add(src_stats) + dst_stats.dump_stats(dst) + else: + profiler.dump_stats(dst) + diff --git a/pyperf/tests/test_runner.py b/pyperf/tests/test_runner.py index a61053ba..c456a9ae 100644 --- a/pyperf/tests/test_runner.py +++ b/pyperf/tests/test_runner.py @@ -1,5 +1,6 @@ import collections import os.path +import pstats import sys import tempfile import textwrap @@ -73,7 +74,6 @@ class ClockInfo: self.assertEqual(bench.get_nrun(), 1) return Result(runner, bench, stdout) - def test_worker(self): result = self.exec_runner('--worker', '-l1', '-w1') self.assertRegex(result.stdout, @@ -83,6 +83,53 @@ def test_debug_single_value(self): result = self.exec_runner('--debug-single-value', '--worker') self.assertEqual(result.bench.get_nvalue(), 1) + def test_profile_time_func(self): + with tempfile.NamedTemporaryFile('wb+') as tmp: + name = tmp.name + args = ['--worker', '-l1', '-w1', '--profile', name] + runner = self.create_runner(args) + + def time_func(loops): + return 1.0 + + runner.bench_time_func('bench1', time_func) + + try: + s = pstats.Stats(name) + if sys.version_info < (3, 9): + assert len(s.stats) + else: + assert len(s.get_stats_profile().func_profiles) + finally: + if os.path.isfile(name): + os.unlink(name) + + def test_profile_func(self): + with tempfile.NamedTemporaryFile('wb+') as tmp: + name = tmp.name + args = ['--worker', '-l1', '-w1', '--profile', name] + runner = self.create_runner(args) + + def external(): + return [1] * 1000 + + def func(): + external() + return 1.0 + + runner.bench_func('bench1', func) + + try: + import pstats + s = pstats.Stats(name) + if sys.version_info < (3, 9): + assert len(s.stats) + else: + assert len(s.get_stats_profile().func_profiles) + finally: + if os.path.isfile(name): + os.unlink(name) + def test_pipe(self): rpipe, wpipe = create_pipe() with rpipe: