Skip to content

Commit 0c63ef7

Browse files
Merge pull request #515 from ruby/mvh-improve-rss
Improve RSS measurement
2 parents fa46c95 + 55c1d3a commit 0c63ef7

7 files changed

Lines changed: 206 additions & 9 deletions

File tree

README.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,12 +284,22 @@ after each iteration with the default harness.
284284

285285
## Measuring memory usage
286286

287-
`--rss` option of `run_benchmarks.rb` allows you to measure RSS after benchmark iterations.
287+
`--rss` option of `run_benchmarks.rb` allows you to measure RSS (resident set size).
288288

289289
```
290290
./run_benchmarks.rb --rss
291291
```
292292

293+
The harness samples RSS once per iteration across the benchmarking window (after
294+
warmup), so the `RSS (MiB)` column reports the mean working set during measurement
295+
along with its run-to-run variability (`mean ± stddev%`), and the `RSS` ratio is
296+
computed from those means. The raw per-iteration samples are stored in the JSON
297+
output under `rss_samples` (bytes).
298+
299+
For reference, the JSON output also keeps `rss`, a single snapshot taken after a
300+
full GC at the end of the run (the retained set, a lower bound), and `maxrss`, the
301+
process's lifetime peak from `getrusage`.
302+
293303
## Rendering a graph
294304

295305
`--graph` option of `run_benchmarks.rb` allows you to render benchmark results as a graph.

harness-gc/harness.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def gc_stat_heap_delta(before, after)
3333

3434
def run_benchmark(_num_itrs_hint, **, &block)
3535
times = []
36+
rss_samples = []
3637
marking_times = []
3738
sweeping_times = []
3839
gc_counts = []
@@ -82,6 +83,7 @@ def run_benchmark(_num_itrs_hint, **, &block)
8283
puts itr_str
8384

8485
times << time
86+
rss_samples << get_rss
8587
marking_times << mark_delta
8688
sweeping_times << sweep_delta
8789
gc_counts << count_delta
@@ -95,6 +97,8 @@ def run_benchmark(_num_itrs_hint, **, &block)
9597
bench_range = WARMUP_ITRS..-1
9698

9799
extra = {}
100+
rss_bench = rss_samples[bench_range] || []
101+
extra["rss_samples"] = rss_bench unless rss_bench.empty?
98102
extra["gc_marking_time_warmup"] = marking_times[warmup_range]
99103
extra["gc_marking_time_bench"] = marking_times[bench_range]
100104
extra["gc_sweeping_time_warmup"] = sweeping_times[warmup_range]

harness-warmup/harness.rb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,12 @@ def print_stats(bench, elapsed)
3636
def run_benchmark(num_itrs_hint, **)
3737
start = monotonic_time
3838
times = []
39+
rss_samples = []
3940

4041
begin
4142
time = Benchmark.realtime { yield }
4243
times << time
44+
rss_samples << get_rss
4345

4446
stats = Stats.new(times)
4547
median = stats.median
@@ -63,7 +65,9 @@ def run_benchmark(num_itrs_hint, **)
6365
end until times.size >= MIN_ITERS and elapsed >= MIN_TIME and mad <= threshold
6466

6567
warmup, bench = times[0...times.size/2], times[times.size/2..-1]
66-
return_results(warmup, bench)
68+
rss_bench = rss_samples[times.size/2..-1] || []
69+
extra = rss_bench.empty? ? {} : { "rss_samples" => rss_bench }
70+
return_results(warmup, bench, **extra)
6771

6872
print_stats(bench, elapsed)
6973
end

harness/harness-common.rb

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
require 'rbconfig'
2+
require_relative '../misc/stats'
23

34
# Ensure the ruby in PATH is the ruby running this, so we can safely shell out to other commands
45
ruby_in_path = `ruby -e 'print RbConfig.ruby'`
@@ -214,6 +215,17 @@ def return_results(warmup_iterations, bench_iterations, **extra)
214215
puts "MAXRSS: %.1fMiB" % (maxrss / 1024.0 / 1024.0)
215216
end
216217

218+
rss_samples = ruby_bench_results["rss_samples"]
219+
if rss_samples.is_a?(Array) && !rss_samples.empty?
220+
mib = rss_samples.map { |bytes| bytes / 1024.0 / 1024.0 }
221+
stats = Stats.new(mib)
222+
median = stats.median
223+
mad = stats.median_absolute_deviation(median)
224+
puts "RSS sampled (n=%d): median %.1fMiB \u00b1 %.1fMiB (MAD), range [%.1f, %.1f]MiB" % [
225+
mib.size, median, mad, stats.min, stats.max
226+
]
227+
end
228+
217229
write_json_file(ruby_bench_results)
218230
end
219231

harness/harness.rb

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def realtime
3434
# Takes a block as input
3535
def run_benchmark(_num_itrs_hint, **, &block)
3636
times = []
37+
rss_samples = []
3738
total_time = 0
3839
num_itrs = 0
3940
header = "itr: time"
@@ -75,10 +76,15 @@ def run_benchmark(_num_itrs_hint, **, &block)
7576
# We internally save the time in seconds to avoid loss of precision
7677
times << time
7778
total_time += time
79+
# Sample current RSS between iterations (outside the timed block) so we can
80+
# report the working set across the window with variance.
81+
rss_samples << get_rss
7882
end until num_itrs >= WARMUP_ITRS + MIN_BENCH_ITRS and total_time >= MIN_BENCH_TIME
7983

8084
warmup, bench = times[0...WARMUP_ITRS], times[WARMUP_ITRS..-1]
81-
return_results(warmup, bench)
85+
rss_bench = rss_samples[WARMUP_ITRS..-1] || []
86+
extra = rss_bench.empty? ? {} : { "rss_samples" => rss_bench }
87+
return_results(warmup, bench, **extra)
8288

8389
non_warmups = times[WARMUP_ITRS..-1]
8490
if non_warmups.size > 1

lib/results_table_builder.rb

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ def initialize(executable_names:, bench_data:, include_rss: false, include_pvalu
1212
@include_pvalue = include_pvalue
1313
@zjit_stats = zjit_stats || []
1414
@include_gc = detect_gc_data(bench_data)
15+
@rss_has_samples = @include_rss && detect_rss_samples(bench_data)
1516
@base_name = executable_names.first
1617
@other_names = executable_names[1..]
1718
@bench_names = compute_bench_names
@@ -86,7 +87,7 @@ def build_format
8687

8788
@executable_names.each do |_name|
8889
format << "%s"
89-
format << "%.1f" if @include_rss
90+
format << (@rss_has_samples ? "%s" : "%.1f") if @include_rss
9091
@zjit_stats.each { format << "%s" }
9192
if @include_gc
9293
format << "%s"
@@ -125,11 +126,15 @@ def build_row(bench_name)
125126
t0s = extract_first_iteration_times(bench_name)
126127
times_no_warmup = extract_benchmark_times(bench_name)
127128
rsss = extract_rss_values(bench_name)
129+
rss_series = @rss_has_samples ? extract_rss_series(bench_name) : nil
128130

129131
base_t0, *other_t0s = t0s
130132
base_t, *other_ts = times_no_warmup
131133
base_rss, *other_rsss = rsss
132134

135+
base_rss_cell = rss_cell(base_rss, rss_series && rss_series[0])
136+
other_rss_cells = other_rsss.each_index.map { |i| rss_cell(other_rsss[i], rss_series && rss_series[i + 1]) }
137+
133138
# Extract zjit stats: { stat_name => [base_val, other1_val, ...] }
134139
zjit_stat_values = @zjit_stats.map do |stat|
135140
[stat, extract_zjit_stat(bench_name, stat)]
@@ -143,8 +148,8 @@ def build_row(bench_name)
143148
end
144149

145150
row = [bench_name]
146-
build_base_columns(row, base_t, base_rss, zjit_stat_values, 0, base_mark, base_sweep)
147-
build_comparison_columns(row, other_ts, other_rsss, zjit_stat_values, other_marks, other_sweeps)
151+
build_base_columns(row, base_t, base_rss_cell, zjit_stat_values, 0, base_mark, base_sweep)
152+
build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values, other_marks, other_sweeps)
148153
build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts)
149154
build_rss_ratio_columns(row, base_rss, other_rsss)
150155
build_gc_ratio_columns(row, base_mark, other_marks, base_sweep, other_sweeps)
@@ -162,10 +167,10 @@ def build_base_columns(row, base_t, base_rss, zjit_stat_values, exe_index, base_
162167
end
163168
end
164169

165-
def build_comparison_columns(row, other_ts, other_rsss, zjit_stat_values, other_marks, other_sweeps)
170+
def build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values, other_marks, other_sweeps)
166171
other_ts.each_with_index do |other_t, i|
167172
row << format_time_with_stddev(other_t)
168-
row << other_rsss[i] if @include_rss
173+
row << other_rss_cells[i] if @include_rss
169174
zjit_stat_values.each { |_stat, values| row << format_stat(values[i + 1]) }
170175
if @include_gc
171176
row << format_time_with_stddev(other_marks[i])
@@ -283,9 +288,38 @@ def extract_benchmark_times(bench_name)
283288
end
284289
end
285290

291+
# Numeric RSS (MiB) per executable, used for the RSS ratio. When per-iteration
292+
# samples are present we use their mean so the ratio matches the displayed value.
286293
def extract_rss_values(bench_name)
287294
@executable_names.map do |name|
288-
bench_data_for(name, bench_name)['rss'] / BYTES_TO_MIB
295+
data = bench_data_for(name, bench_name)
296+
samples = data['rss_samples']
297+
if samples.is_a?(Array) && !samples.empty?
298+
mean(samples) / BYTES_TO_MIB
299+
else
300+
data['rss'] / BYTES_TO_MIB
301+
end
302+
end
303+
end
304+
305+
# Per-iteration RSS samples (MiB) per executable, or nil when a run lacks them.
306+
def extract_rss_series(bench_name)
307+
@executable_names.map do |name|
308+
samples = bench_data_for(name, bench_name)['rss_samples']
309+
next nil unless samples.is_a?(Array) && !samples.empty?
310+
samples.map { |bytes| bytes / BYTES_TO_MIB }
311+
end
312+
end
313+
314+
# Display value for an RSS column: mean ± stddev% when samples exist (matching
315+
# the timing columns), otherwise a plain MiB value. Returns a Float when no run
316+
# in the suite has samples, preserving the legacy "%.1f" formatting.
317+
def rss_cell(mean_value, series)
318+
return mean_value unless @rss_has_samples
319+
if series && !series.empty?
320+
format_time_with_stddev(series)
321+
else
322+
"%.1f" % mean_value
289323
end
290324
end
291325

@@ -305,6 +339,12 @@ def detect_gc_data(bench_data)
305339
bench_data.values.any? { |benchmarks| benchmarks.values.any? { |d| d.is_a?(Hash) && d.key?('gc_marking_time_bench') } }
306340
end
307341

342+
def detect_rss_samples(bench_data)
343+
bench_data.values.any? do |benchmarks|
344+
benchmarks.values.any? { |d| d.is_a?(Hash) && d['rss_samples'].is_a?(Array) && !d['rss_samples'].empty? }
345+
end
346+
end
347+
308348
def bench_data_for(name, bench_name)
309349
@bench_data[name][bench_name]
310350
end

test/results_table_builder_test.rb

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,4 +549,125 @@
549549
assert_equal 'fib', bench_names[4]
550550
end
551551
end
552+
553+
describe 'RSS sampling (rss_samples)' do
554+
MIB = 1024 * 1024
555+
556+
it 'shows mean ± stddev% and uses %s format when samples are present' do
557+
bench_data = {
558+
'ruby' => {
559+
'fib' => {
560+
'warmup' => [0.1],
561+
'bench' => [0.1, 0.1, 0.1],
562+
'rss' => 10 * MIB,
563+
'rss_samples' => [9 * MIB, 10 * MIB, 11 * MIB]
564+
}
565+
}
566+
}
567+
568+
builder = ResultsTableBuilder.new(
569+
executable_names: ['ruby'],
570+
bench_data: bench_data,
571+
include_rss: true
572+
)
573+
574+
table, format = builder.build
575+
576+
assert_equal ['bench', 'ruby (ms)', 'RSS (MiB)'], table[0]
577+
assert_equal ['%s', '%s', '%s'], format
578+
579+
m = table[1][2].match(/\A(\d+\.\d) ± (\d+\.\d)%\z/)
580+
assert m, "expected mean ± stddev%, got #{table[1][2].inspect}"
581+
assert_in_delta 10.0, m[1].to_f, 0.1
582+
assert_operator m[2].to_f, :>, 0.0
583+
end
584+
585+
it 'computes the RSS ratio from the mean of samples' do
586+
bench_data = {
587+
'ruby' => {
588+
'fib' => {
589+
'warmup' => [0.1],
590+
'bench' => [0.1, 0.1, 0.1],
591+
'rss' => 99 * MIB, # should be ignored in favour of samples
592+
'rss_samples' => [10 * MIB, 10 * MIB, 10 * MIB]
593+
}
594+
},
595+
'ruby-yjit' => {
596+
'fib' => {
597+
'warmup' => [0.05],
598+
'bench' => [0.05, 0.05, 0.05],
599+
'rss' => 1 * MIB,
600+
'rss_samples' => [18 * MIB, 20 * MIB, 22 * MIB]
601+
}
602+
}
603+
}
604+
605+
builder = ResultsTableBuilder.new(
606+
executable_names: ['ruby', 'ruby-yjit'],
607+
bench_data: bench_data,
608+
include_rss: true
609+
)
610+
611+
table, _format = builder.build
612+
613+
# ratio = mean(ruby samples) / mean(yjit samples) = 10 / 20 = 0.5
614+
assert_in_delta 0.5, table[1].last, 0.001
615+
end
616+
617+
it 'falls back to a plain MiB value for runs without samples in a mixed suite' do
618+
bench_data = {
619+
'ruby' => {
620+
'fib' => {
621+
'warmup' => [0.1],
622+
'bench' => [0.1, 0.1],
623+
'rss' => 10 * MIB,
624+
'rss_samples' => [10 * MIB, 10 * MIB]
625+
},
626+
'loop' => {
627+
'warmup' => [0.2],
628+
'bench' => [0.2, 0.2],
629+
'rss' => 15 * MIB
630+
# no rss_samples for this benchmark
631+
}
632+
}
633+
}
634+
635+
builder = ResultsTableBuilder.new(
636+
executable_names: ['ruby'],
637+
bench_data: bench_data,
638+
include_rss: true
639+
)
640+
641+
table, format = builder.build
642+
643+
# Suite has samples somewhere, so the RSS column is string-formatted.
644+
assert_equal ['%s', '%s', '%s'], format
645+
646+
rows = table[1..].each_with_object({}) { |row, h| h[row[0]] = row }
647+
assert_match(/\A\d+\.\d ± \d+\.\d%\z/, rows['fib'][2])
648+
# The sample-less benchmark still renders as a bare MiB value.
649+
assert_equal '15.0', rows['loop'][2]
650+
end
651+
652+
it 'keeps %.1f formatting when no run in the suite has samples' do
653+
bench_data = {
654+
'ruby' => {
655+
'fib' => {
656+
'warmup' => [0.1],
657+
'bench' => [0.1],
658+
'rss' => 10 * MIB
659+
}
660+
}
661+
}
662+
663+
builder = ResultsTableBuilder.new(
664+
executable_names: ['ruby'],
665+
bench_data: bench_data,
666+
include_rss: true
667+
)
668+
669+
_table, format = builder.build
670+
assert_equal ['%s', '%s', '%.1f'], format
671+
end
672+
end
552673
end

0 commit comments

Comments
 (0)