Skip to content

Commit a685a14

Browse files
committed
Address comments in the PR
1 parent 7c47af1 commit a685a14

File tree

5 files changed

+47
-23
lines changed

5 files changed

+47
-23
lines changed

doc/api.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,19 @@ Benchmark class
345345

346346
Raise an exception if the benchmark has no values.
347347

348+
.. method:: required_nprocesses()
349+
350+
Determines the number of separate process runs that would be required
351+
achieve stable results. Specifically, the target is to have 95% certainty
352+
that there is a variance of less than 1%. If the result is greater than
353+
the number of processes recorded in the input data, the value is
354+
meaningless and only means "more samples are required".
355+
356+
The method used is described in this Wikipedia article about estimating
357+
the sampling of a mean:
358+
359+
https://en.wikipedia.org/wiki/Sample_size_determination#Estimation_of_a_mean
360+
348361
.. method:: update_metadata(metadata: dict)
349362

350363
Update metadata of all runs of the benchmark.

pyperf/__main__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,8 @@ def display_benchmarks(args, show_metadata=False, hist=False, stats=False,
455455
dump=dump,
456456
checks=checks,
457457
result=result,
458-
display_runs_args=display_runs_args)
458+
display_runs_args=display_runs_args,
459+
only_checks=only_checks)
459460

460461
if bench_lines:
461462
empty_line(lines)

pyperf/_bench.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -424,17 +424,23 @@ def median_abs_dev(self):
424424
raise ValueError("MAD must be >= 0")
425425
return value
426426

427-
def required_nsamples(self):
427+
def required_nprocesses(self):
428428
"""
429-
Determines the number of samples that would be required to have 95%
430-
certainty that the samples have a variance of less than 1%.
429+
Determines the number of separate process runs that would be required
430+
achieve stable results. Specifically, the target is to have 95%
431+
certainty that there is a variance of less than 1%. If the result is
432+
greater than the number of processes recorded in the input data, the
433+
value is meaningless and only means "more samples are required".
431434
432-
This is described in this Wikipedia article about estimating the sampling of
433-
a mean:
435+
The method used is described in this Wikipedia article about estimating
436+
the sampling of a mean:
434437
435438
https://en.wikipedia.org/wiki/Sample_size_determination#Estimation_of_a_mean
436439
"""
437-
# Get the means of the values per run
440+
# Get the means of the values per process. The values within the process
441+
# often vary considerably (e.g. due to cache effects), but the variances
442+
# between processes should be fairly consistent. Additionally, this
443+
# value is intended to be advice for the number of processes to run.
438444
values = []
439445
for run in self._runs:
440446
if len(run.values):
@@ -446,6 +452,7 @@ def required_nsamples(self):
446452
total = math.fsum(values)
447453
mean = total / len(values)
448454
stddev = statistics.stdev(values)
455+
449456
# Normalize the stddev so we can target "percentage changed" rather than
450457
# absolute time
451458
sigma = stddev / mean
@@ -455,6 +462,7 @@ def required_nsamples(self):
455462
# 1% variation
456463
W = 0.01
457464

465+
# (4Z²σ²)/(W²)
458466
return int(math.ceil((4 * Z ** 2 * sigma ** 2) / (W ** 2)))
459467

460468
def percentile(self, p):

pyperf/_cli.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ def value_bucket(value):
400400
return lines
401401

402402

403-
def format_checks(bench, lines=None):
403+
def format_checks(bench, lines=None, check_too_many_processes=False):
404404
if lines is None:
405405
lines = []
406406

@@ -413,7 +413,7 @@ def format_checks(bench, lines=None):
413413
warnings = []
414414
warn = warnings.append
415415

416-
required_nsamples = bench.required_nsamples()
416+
required_nprocesses = bench.required_nprocesses()
417417

418418
# Display a warning if the standard deviation is greater than 10%
419419
# of the mean
@@ -426,8 +426,8 @@ def format_checks(bench, lines=None):
426426
else:
427427
# display a warning if the number of samples isn't enough to get a stable result
428428
if (
429-
required_nsamples is not None and
430-
required_nsamples > len(bench._runs)
429+
required_nprocesses is not None and
430+
required_nprocesses > len(bench._runs)
431431
):
432432
warn("Not enough samples to get a stable result (95% certainly of less than 1% variation)")
433433

@@ -467,13 +467,14 @@ def format_checks(bench, lines=None):
467467
lines.append("Use --quiet option to hide these warnings.")
468468

469469
if (
470-
required_nsamples is not None and
471-
required_nsamples < len(bench._runs) * 0.75
470+
check_too_many_processes and
471+
required_nprocesses is not None and
472+
required_nprocesses < len(bench._runs) * 0.75
472473
):
473474
lines.append("Benchmark was run more times than necessary to get a stable result.")
474475
lines.append(
475476
"Consider passing processes=%d to the Runner constructor to save time." %
476-
required_nsamples
477+
required_nprocesses
477478
)
478479

479480
# Warn if nohz_full+intel_pstate combo if found in cpu_config metadata
@@ -568,7 +569,7 @@ def format_result(bench):
568569

569570
def format_benchmark(bench, checks=True, metadata=False,
570571
dump=False, stats=False, hist=False, show_name=False,
571-
result=True, display_runs_args=None):
572+
result=True, display_runs_args=None, only_checks=False):
572573
lines = []
573574

574575
if metadata:
@@ -587,7 +588,7 @@ def format_benchmark(bench, checks=True, metadata=False,
587588
format_stats(bench, lines=lines)
588589

589590
if checks:
590-
format_checks(bench, lines=lines)
591+
format_checks(bench, lines=lines, check_too_many_processes=only_checks)
591592

592593
if result:
593594
empty_line(lines)

pyperf/tests/test_perf_cli.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -478,16 +478,11 @@ def test_hist(self):
478478
22.8 ms: 3 ##############
479479
22.9 ms: 4 ###################
480480
22.9 ms: 4 ###################
481-
Benchmark was run more times than necessary to get a stable result.
482-
Consider passing processes=7 to the Runner constructor to save time.
483481
""")
484482
self.check_command(expected, 'hist', TELCO, env=env)
485483

486484
def test_show(self):
487485
expected = ("""
488-
Benchmark was run more times than necessary to get a stable result.
489-
Consider passing processes=7 to the Runner constructor to save time.
490-
491486
Mean +- std dev: 22.5 ms +- 0.2 ms
492487
""")
493488
self.check_command(expected, 'show', TELCO)
@@ -523,8 +518,6 @@ def test_stats(self):
523518
100th percentile: 22.9 ms (+2% of the mean) -- maximum
524519
525520
Number of outlier (out of 22.0 ms..23.0 ms): 0
526-
Benchmark was run more times than necessary to get a stable result.
527-
Consider passing processes=7 to the Runner constructor to save time.
528521
""")
529522
self.check_command(expected, 'stats', TELCO)
530523

@@ -635,6 +628,14 @@ def test_slowest(self):
635628

636629
def test_check_stable(self):
637630
stdout = self.run_command('check', TELCO)
631+
self.assertTrue(
632+
textwrap.dedent(
633+
"""
634+
Benchmark was run more times than necessary to get a stable result.
635+
Consider passing processes=7 to the Runner constructor to save time.
636+
"""
637+
).strip() in stdout.rstrip()
638+
)
638639
self.assertTrue(
639640
'The benchmark seems to be stable' in
640641
stdout.rstrip()

0 commit comments

Comments
 (0)