I’m seeing a pretty consistent regression from Node.js v22.22.3 to v24.16.0 on a small stream benchmark that creates lots of short-lived Readables and drains them immediately. The code in the benchmark file benchmark/streams/readable-push-buffer-burst-minimal.js is not optimal, but that's not the point as it demonstrates the issue.
'use strict';
const common = require('../common.js');
const assert = require('assert');
const { Readable } = require('stream');
const bench = common.createBenchmark(main, {
n: [2e4],
bodySize: [120 * 1024],
});
function repeat(size, ch) {
return ch.repeat(size);
}
function makeBurstChunks(bodySize) {
return [
'<!doctype html><html><head><title>burst</title><style>',
repeat(1024, 'a'),
'</style></head><body><header>',
repeat(2048, 'b'),
'</header><main>',
repeat(bodySize, 'c'),
'</main><script>',
repeat(2048, 'd'),
'</script></body></html>',
];
}
function toBuffers(chunks) {
return chunks.map((chunk) => Buffer.from(chunk, 'utf8'));
}
function buildAndDrain(buffersIn) {
const buffersOut = [];
for (let i = 0; i < buffersIn.length; i++) {
const readable = new Readable({ read() {} });
readable.push(buffersIn[i]);
readable.push(null);
let chunk;
while ((chunk = readable.read()) !== null)
buffersOut.push(chunk);
}
return Buffer.concat(buffersOut);
}
function main({ n, bodySize }) {
const inputBuffers = toBuffers(makeBurstChunks(bodySize));
const expectedLength = inputBuffers.reduce(
(sum, chunk) => sum + chunk.length,
0,
);
const warmup = buildAndDrain(inputBuffers);
assert.strictEqual(warmup.length, expectedLength);
bench.start();
for (let i = 0; i < n; ++i) {
const buffer = buildAndDrain(inputBuffers);
if (buffer.length !== expectedLength)
throw new Error(`Unexpected buffer length: ${buffer.length}`);
}
bench.end(n);
}
So running the benchmark comparing v22.22.3 vs v24.16.0, I see the following results:
$ node benchmark/compare.js --old ~/.nvm/versions/node/v22.22.3/bin/node --new ~/.nvm/versions/node/v24.16.0/bin/node --filter readable-push-buffer-burst-minimal streams > readable-push-buffer-burst-minimal.csv
[00:00:58|% 100| 1/1 files | 60/60 runs | 1/1 configs]: Done
$ npx node-benchmark-compare readable-push-buffer-burst-minimal.csv
confidence improvement accuracy (*) (**) (***)
streams/readable-push-buffer-burst-minimal.js bodySize=122880 n=20000 *** -37.18 % ±4.12% ±5.55% ±7.36%
I also profiled both cases with perf and generated flamegraphs. The main difference seems to be GC behavior: v22 is mostly dominated by scavenges, while v24 is much more major-GC-heavy. See attached flamegraph-22.svg and flamegraph-24.svg.
One more datapoint that seems relevant: if I run v24 with --external-memory-accounted-in-global-limit, the regression drops a lot. Here are the results using a patched benchmark/compare.js that supports per-binary flags:
$ node benchmark/compare.js --old ~/.nvm/versions/node/v22.22.3/bin/node --new ~/.nvm/versions/node/v24.16.0/bin/node --new-flags "--external-memory-accounted-in-global-limit" --filter readable-push-buffer-burst-minimal streams > readable-push-buffer-burst-minimal_with_flag.csv
[00:00:50|% 100| 1/1 files | 60/60 runs | 1/1 configs]: Done
$ npx node-benchmark-compare readable-push-buffer-burst-minimal_with_flag.csv
confidence improvement accuracy (*) (**) (***)
streams/readable-push-buffer-burst-minimal.js bodySize=122880 n=20000 *** -11.18 % ±4.28% ±5.76% ±7.62%
Be aware that when doing many comparisons the risk of a false-positive result increases.
In this case, there are 1 comparisons, you can thus expect the following amount of false-positive results:
0.05 false positives, when considering a 5% risk acceptance (*, **, ***),
0.01 false positives, when considering a 1% risk acceptance (**, ***),
0.00 false positives, when considering a 0.1% risk acceptance (***)
So in summary, when running the benchmark, I see a 37% regression which drops to only 11% when using v24 with the --external-memory-accounted-in-global-limit flag.
Does this ring a bell as a known regression or is this to be expected in v24?
Flamegraphs
/cc @RafaelGSS
I’m seeing a pretty consistent regression from Node.js
v22.22.3tov24.16.0on a small stream benchmark that creates lots of short-livedReadables and drains them immediately. The code in the benchmark filebenchmark/streams/readable-push-buffer-burst-minimal.jsis not optimal, but that's not the point as it demonstrates the issue.So running the benchmark comparing
v22.22.3vsv24.16.0, I see the following results:I also profiled both cases with perf and generated flamegraphs. The main difference seems to be GC behavior:
v22is mostly dominated by scavenges, whilev24is much more major-GC-heavy. See attachedflamegraph-22.svgandflamegraph-24.svg.One more datapoint that seems relevant: if I run
v24with--external-memory-accounted-in-global-limit, the regression drops a lot. Here are the results using a patched benchmark/compare.js that supports per-binary flags:So in summary, when running the benchmark, I see a 37% regression which drops to only 11% when using
v24with the--external-memory-accounted-in-global-limitflag.Does this ring a bell as a known regression or is this to be expected in
v24?Flamegraphs
v22.22.3)v24.16.0)v24.16.0 --external-memory-accounted-in-global-limit)/cc @RafaelGSS