|
1 | 1 | benchmarks:
|
2 | 2 | llama-3-8b:
|
3 | 3 | name: Llama 3.0 8B
|
4 |
| - step_time_lower_bound: 0.894678 |
5 |
| - step_time_upper_bound: 4.54563437 |
6 |
| - confidence_interval: 1.82548 |
7 |
| - average: 2.7202 |
8 |
| - sample_size: 114 |
| 4 | + step_time_lower_bound: 2.68109009 |
| 5 | + step_time_upper_bound: 2.789223 |
| 6 | + confidence_interval: 0.05407 |
| 7 | + average: 2.7352 |
| 8 | + sample_size: 427 |
| 9 | + llama-3-8b-pure-mlp: |
| 10 | + name: Llama 3.0 8B (@assume_pure) |
| 11 | + # Bounds are copied from `llama-3-8b`. They will be overwritten the next time |
| 12 | + # somebody runs `e2e_testing/update_step_time.py`. |
| 13 | + step_time_lower_bound: 2.68109009 |
| 14 | + step_time_upper_bound: 2.789223 |
| 15 | + confidence_interval: 0.05407 |
| 16 | + average: 2.7352 |
| 17 | + sample_size: 1 |
9 | 18 | llama-3_1-8b-sa:
|
10 | 19 | name: Llama 3.1 8B (Splash Attention)
|
11 |
| - step_time_lower_bound: 2.35428493 |
12 |
| - step_time_upper_bound: 2.470571 |
13 |
| - confidence_interval: 0.05814 |
14 |
| - average: 2.4124 |
15 |
| - sample_size: 112 |
| 20 | + step_time_lower_bound: 2.34653077 |
| 21 | + step_time_upper_bound: 2.467111 |
| 22 | + confidence_interval: 0.06029 |
| 23 | + average: 2.4068 |
| 24 | + sample_size: 428 |
16 | 25 | llama-3_1-8b-scan-offload:
|
17 | 26 | name: Llama 3.1 8B (Scan + Offload)
|
18 |
| - step_time_lower_bound: 2.74872464 |
19 |
| - step_time_upper_bound: 2.871284 |
20 |
| - confidence_interval: 0.06128 |
21 |
| - average: 2.81 |
22 |
| - sample_size: 94 |
| 27 | + step_time_lower_bound: 2.74099553 |
| 28 | + step_time_upper_bound: 2.860302 |
| 29 | + confidence_interval: 0.05965 |
| 30 | + average: 2.8006 |
| 31 | + sample_size: 428 |
23 | 32 | llama-3-8b-2d:
|
24 | 33 | name: Llama 3.0 8B (2D sharding)
|
25 |
| - step_time_lower_bound: 3.31281298 |
26 |
| - step_time_upper_bound: 3.41371084 |
27 |
| - confidence_interval: 0.05045 |
28 |
| - average: 3.3633 |
29 |
| - sample_size: 114 |
| 34 | + step_time_lower_bound: 3.28827914 |
| 35 | + step_time_upper_bound: 3.38842977 |
| 36 | + confidence_interval: 0.05008 |
| 37 | + average: 3.3384 |
| 38 | + sample_size: 428 |
30 | 39 | mixtral-8x7b:
|
31 | 40 | name: Mixtral 8x7B
|
32 |
| - step_time_lower_bound: 3.12225098 |
33 |
| - step_time_upper_bound: 3.21734492 |
34 |
| - confidence_interval: 0.04755 |
35 |
| - average: 3.1698 |
36 |
| - sample_size: 114 |
| 41 | + step_time_lower_bound: 3.09900735 |
| 42 | + step_time_upper_bound: 3.19339336 |
| 43 | + confidence_interval: 0.04719 |
| 44 | + average: 3.1462 |
| 45 | + sample_size: 427 |
37 | 46 | llama-3-8b-2-slice:
|
38 | 47 | name: Llama 3.0 8B (2 Slice)
|
39 |
| - step_time_lower_bound: 3.47510115 |
40 |
| - step_time_upper_bound: 4.505638 |
41 |
| - confidence_interval: 0.51527 |
42 |
| - average: 3.9904 |
43 |
| - sample_size: 110 |
| 48 | + step_time_lower_bound: 3.82985294 |
| 49 | + step_time_upper_bound: 4.087614 |
| 50 | + confidence_interval: 0.12888 |
| 51 | + average: 3.9587 |
| 52 | + sample_size: 416 |
44 | 53 | llama-3-8b-ddp-fsdp:
|
45 | 54 | name: Llama 3.0 8B (ddp + fsdp)
|
46 |
| - step_time_lower_bound: 3.2263914 |
47 |
| - step_time_upper_bound: 3.341676 |
48 |
| - confidence_interval: 0.05764 |
49 |
| - average: 3.284 |
50 |
| - sample_size: 110 |
| 55 | + step_time_lower_bound: 3.22420277 |
| 56 | + step_time_upper_bound: 3.351676 |
| 57 | + confidence_interval: 0.06374 |
| 58 | + average: 3.2879 |
| 59 | + sample_size: 47 |
51 | 60 | metadata:
|
52 |
| - query_start: '2025-06-12T22:37:43+00:00' |
53 |
| - query_end: '2025-06-17T22:37:43+00:00' |
| 61 | + query_start: '2025-05-26T18:37:58.674556-07:00' |
| 62 | + query_end: '2025-06-13T13:20:09-07:00' |
54 | 63 | confidence_level: 0.999
|
0 commit comments