Skip to content

Commit 33ea8ce

Browse files
Gate P-value calculations behind --pvalue
1 parent 7bd2f2d commit 33ea8ce

File tree

5 files changed

+69
-16
lines changed

5 files changed

+69
-16
lines changed

lib/argument_parser.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class ArgumentParser
2323
:skip_yjit,
2424
:skip_zjit,
2525
:with_pre_init,
26+
:pvalue,
2627
keyword_init: true
2728
)
2829

@@ -144,6 +145,10 @@ def parse(argv)
144145
args.rss = true
145146
end
146147

148+
opts.on("--pvalue", "show p-value and significance columns for each comparison") do
149+
args.pvalue = true
150+
end
151+
147152
opts.on("--graph", "generate a graph image of benchmark results") do
148153
args.graph = true
149154
end
@@ -224,6 +229,7 @@ def default_args
224229
name_filters: [],
225230
excludes: [],
226231
rss: false,
232+
pvalue: false,
227233
graph: false,
228234
no_pinning: false,
229235
force_pinning: false,

lib/benchmark_runner/cli.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ def run
6969
builder = ResultsTableBuilder.new(
7070
executable_names: ruby_descriptions.keys,
7171
bench_data: bench_data,
72-
include_rss: args.rss
72+
include_rss: args.rss,
73+
include_pvalue: args.pvalue
7374
)
7475
table, format = builder.build
7576

lib/results_table_builder.rb

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@ class ResultsTableBuilder
55
SECONDS_TO_MS = 1000.0
66
BYTES_TO_MIB = 1024.0 * 1024.0
77

8-
def initialize(executable_names:, bench_data:, include_rss: false)
8+
def initialize(executable_names:, bench_data:, include_rss: false, include_pvalue: false)
99
@executable_names = executable_names
1010
@bench_data = bench_data
1111
@include_rss = include_rss
12+
@include_pvalue = include_pvalue
1213
@base_name = executable_names.first
1314
@other_names = executable_names[1..]
1415
@bench_names = compute_bench_names
@@ -47,7 +48,10 @@ def build_header
4748
end
4849

4950
@other_names.each do |name|
50-
header << "#{@base_name}/#{name}" << "p-value" << "sig"
51+
header << "#{@base_name}/#{name}"
52+
if @include_pvalue
53+
header << "p-value" << "sig"
54+
end
5155
end
5256

5357
header
@@ -66,7 +70,10 @@ def build_format
6670
end
6771

6872
@other_names.each do |_name|
69-
format << "%.3f" << "%s" << "%s"
73+
format << "%.3f"
74+
if @include_pvalue
75+
format << "%s" << "%s"
76+
end
7077
end
7178

7279
format
@@ -108,10 +115,12 @@ def build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts)
108115
row.concat(ratio_1sts)
109116

110117
other_ts.each do |other_t|
111-
pval = Stats.welch_p_value(base_t, other_t)
112118
row << mean(base_t) / mean(other_t)
113-
row << format_p_value(pval)
114-
row << significance_level(pval)
119+
if @include_pvalue
120+
pval = Stats.welch_p_value(base_t, other_t)
121+
row << format_p_value(pval)
122+
row << significance_level(pval)
123+
end
115124
end
116125
end
117126

test/argument_parser_test.rb

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def setup_mock_ruby(path)
4949
assert_equal [], args.categories
5050
assert_equal [], args.name_filters
5151
assert_equal false, args.rss
52+
assert_equal false, args.pvalue
5253
assert_equal false, args.graph
5354
assert_equal false, args.no_pinning
5455
assert_equal false, args.turbo
@@ -428,6 +429,15 @@ def setup_mock_ruby(path)
428429
end
429430
end
430431

432+
describe '--pvalue option' do
433+
it 'sets pvalue flag' do
434+
parser = ArgumentParser.new
435+
args = parser.parse(['--pvalue'])
436+
437+
assert_equal true, args.pvalue
438+
end
439+
end
440+
431441
describe '--graph option' do
432442
it 'sets graph flag' do
433443
parser = ArgumentParser.new

test/results_table_builder_test.rb

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,15 @@
5656

5757
table, format = builder.build
5858

59-
assert_equal ['bench', 'ruby (ms)', 'stddev (%)', 'ruby-yjit (ms)', 'stddev (%)', 'ruby-yjit 1st itr', 'ruby/ruby-yjit', 'p-value', 'sig'], table[0]
59+
assert_equal ['bench', 'ruby (ms)', 'stddev (%)', 'ruby-yjit (ms)', 'stddev (%)', 'ruby-yjit 1st itr', 'ruby/ruby-yjit'], table[0]
6060

61-
assert_equal ['%s', '%.1f', '%.1f', '%.1f', '%.1f', '%.3f', '%.3f', '%s', '%s'], format
61+
assert_equal ['%s', '%.1f', '%.1f', '%.1f', '%.1f', '%.3f', '%.3f'], format
6262

6363
assert_equal 'fib', table[1][0]
6464
assert_in_delta 100.0, table[1][1], 1.0
6565
assert_in_delta 50.0, table[1][3], 1.0
6666
assert_in_delta 2.0, table[1][5], 0.1
6767
assert_in_delta 2.0, table[1][6], 0.1
68-
assert_instance_of String, table[1][7]
69-
assert_instance_of String, table[1][8]
7068
end
7169

7270
it 'includes RSS columns when include_rss is true' do
@@ -173,12 +171,12 @@
173171
'ruby-rjit (ms)', 'stddev (%)',
174172
'ruby-yjit 1st itr',
175173
'ruby-rjit 1st itr',
176-
'ruby/ruby-yjit', 'p-value', 'sig',
177-
'ruby/ruby-rjit', 'p-value', 'sig'
174+
'ruby/ruby-yjit',
175+
'ruby/ruby-rjit'
178176
]
179177
assert_equal expected_header, table[0]
180178

181-
expected_format = ['%s', '%.1f', '%.1f', '%.1f', '%.1f', '%.1f', '%.1f', '%.3f', '%.3f', '%.3f', '%s', '%s', '%.3f', '%s', '%s']
179+
expected_format = ['%s', '%.1f', '%.1f', '%.1f', '%.1f', '%.1f', '%.1f', '%.3f', '%.3f', '%.3f', '%.3f']
182180
assert_equal expected_format, format
183181
end
184182

@@ -332,7 +330,7 @@
332330
builder = ResultsTableBuilder.new(
333331
executable_names: executable_names,
334332
bench_data: bench_data,
335-
include_rss: false
333+
include_pvalue: true
336334
)
337335

338336
table, _format = builder.build
@@ -364,14 +362,43 @@
364362
builder = ResultsTableBuilder.new(
365363
executable_names: executable_names,
366364
bench_data: bench_data,
367-
include_rss: false
365+
include_pvalue: true
368366
)
369367

370368
table, _format = builder.build
371369
assert_equal 'N/A', table[1][-2]
372370
assert_equal '', table[1].last
373371
end
374372

373+
it 'omits p-value columns when include_pvalue is false' do
374+
executable_names = ['ruby', 'ruby-yjit']
375+
bench_data = {
376+
'ruby' => {
377+
'fib' => {
378+
'warmup' => [0.1],
379+
'bench' => [0.100, 0.101, 0.099],
380+
'rss' => 1024 * 1024 * 10
381+
}
382+
},
383+
'ruby-yjit' => {
384+
'fib' => {
385+
'warmup' => [0.05],
386+
'bench' => [0.050, 0.051, 0.049],
387+
'rss' => 1024 * 1024 * 12
388+
}
389+
}
390+
}
391+
392+
builder = ResultsTableBuilder.new(
393+
executable_names: executable_names,
394+
bench_data: bench_data
395+
)
396+
397+
table, _format = builder.build
398+
refute_includes table[0], 'p-value'
399+
refute_includes table[0], 'sig'
400+
end
401+
375402
it 'handles only headline benchmarks' do
376403
executable_names = ['ruby']
377404
bench_data = {

0 commit comments

Comments
 (0)