Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
81dc810
refactor: move spamoor benchmark into testify suite in test/e2e/bench…
chatton Feb 25, 2026
cc56590
merge main into cian/bench-refactor
chatton Feb 25, 2026
18fc15a
fix: correct BENCH_JSON_OUTPUT path for spamoor benchmark
chatton Feb 25, 2026
fccd9db
fix: place package pattern before test binary flags in benchmark CI
chatton Feb 25, 2026
ae525ca
fix: adjust evm-binary path for benchmark subpackage working directory
chatton Feb 25, 2026
039eaf7
wip: erc20 benchmark test
chatton Feb 25, 2026
85c9d2d
fix: exclude benchmark subpackage from make test-e2e
chatton Feb 25, 2026
fe8d166
fix: replace FilterLogs with header iteration and optimize spamoor co…
chatton Feb 26, 2026
99e3e42
fix: improve benchmark measurement window and reliability
chatton Feb 26, 2026
e4e06c5
fix: address PR review feedback for benchmark suite
chatton Mar 2, 2026
1c3b560
Merge branch 'main' into cian/bench-refactor
chatton Mar 2, 2026
03b9239
chore: specify http
chatton Mar 2, 2026
fe3ca23
chore: filter out benchmark tests from test-e2e
chatton Mar 2, 2026
8752fee
Merge branch 'main' into cian/bench-refactor
chatton Mar 2, 2026
06f532b
Merge branch 'cian/bench-refactor' into cian/erc20-benchmark
chatton Mar 2, 2026
560974a
Merge remote-tracking branch 'origin/main' into cian/erc20-benchmark
chatton Mar 2, 2026
054f2c6
refactor: centralize reth config and lower ERC20 spammer count
chatton Mar 2, 2026
26bb117
chore: collect all traces at once
chatton Mar 2, 2026
b88cae3
chore: self review
chatton Mar 2, 2026
272ab71
refactor: extract benchmark helpers to slim down ERC20 test body
chatton Mar 2, 2026
676e0d1
docs: add detailed documentation to benchmark helper methods
chatton Mar 2, 2026
f4949a1
ci: add ERC20 throughput benchmark job
chatton Mar 2, 2026
4c7b7e1
chore: remove span assertions
chatton Mar 2, 2026
6f56c80
chore: adding gas burner test
chatton Mar 2, 2026
3eb733a
chore: merge main
chatton Mar 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions test/e2e/benchmark/gasburner_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
//go:build evm

package benchmark

import (
"context"
"fmt"
"time"

"github.com/celestiaorg/tastora/framework/docker/evstack/spamoor"
)

// TestGasBurner measures gas throughput using a deterministic gasburner
// workload. The result is tracked via BENCH_JSON_OUTPUT as seconds_per_gigagas
// (lower is better) on the benchmark dashboard.
func (s *SpamoorSuite) TestGasBurner() {
const (
numSpammers = 4
countPerSpammer = 2500
totalCount = numSpammers * countPerSpammer
warmupTxs = 50
serviceName = "ev-node-gasburner"
waitTimeout = 5 * time.Minute
)

t := s.T()
ctx := t.Context()
w := newResultWriter(t, "GasBurner")
defer w.flush()

e := s.setupEnv(config{
serviceName: serviceName,
})
api := e.spamoorAPI

s.Require().NoError(deleteAllSpammers(api), "failed to delete stale spammers")

gasburnerCfg := map[string]any{
"gas_units_to_burn": 5_000_000,
"total_count": countPerSpammer,
"throughput": 25,
"max_pending": 5000,
"max_wallets": 500,
"rebroadcast": 0,
"base_fee": 20,
"tip_fee": 5,
"refill_amount": "5000000000000000000",
"refill_balance": "2000000000000000000",
"refill_interval": 300,
}

for i := range numSpammers {
name := fmt.Sprintf("bench-gasburner-%d", i)
id, err := api.CreateSpammer(name, spamoor.ScenarioGasBurnerTX, gasburnerCfg, true)
s.Require().NoError(err, "failed to create spammer %s", name)
t.Cleanup(func() { _ = api.DeleteSpammer(id) })
}

// wait for wallet prep and contract deployment to finish before
// recording start block so warmup is excluded from the measurement.
pollSentTotal := func() (float64, error) {
metrics, mErr := api.GetMetrics()
if mErr != nil {
return 0, mErr
}
return sumCounter(metrics["spamoor_transactions_sent_total"]), nil
}
waitForMetricTarget(t, "spamoor_transactions_sent_total (warmup)", pollSentTotal, warmupTxs, waitTimeout)

startHeader, err := e.ethClient.HeaderByNumber(ctx, nil)
s.Require().NoError(err, "failed to get start block header")
startBlock := startHeader.Number.Uint64()
loadStart := time.Now()
t.Logf("start block: %d (after warmup)", startBlock)

// wait for all transactions to be sent
waitForMetricTarget(t, "spamoor_transactions_sent_total", pollSentTotal, float64(totalCount), waitTimeout)

// wait for pending txs to drain
drainCtx, drainCancel := context.WithTimeout(ctx, 30*time.Second)
defer drainCancel()
waitForDrain(drainCtx, t.Logf, e.ethClient, 10)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Handle waitForDrain errors instead of dropping them.

Line 82 ignores the returned error, so the test can proceed with a non-drained mempool and publish skewed throughput numbers.

✅ Proposed fix
-	waitForDrain(drainCtx, t.Logf, e.ethClient, 10)
+	s.Require().NoError(waitForDrain(drainCtx, t.Logf, e.ethClient, 10), "failed waiting for mempool drain")

As per coding guidelines, "Return errors early" and "Ensure tests are deterministic".

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@test/e2e/benchmark/gasburner_test.go` at line 82, The call to
waitForDrain(drainCtx, t.Logf, e.ethClient, 10) currently discards its error;
update the test to capture the returned error from waitForDrain and fail the
test on non-nil (e.g., if err != nil then t.Fatalf("waitForDrain failed: %v",
err)) so the test stops on drain failures and avoids running with an undrained
mempool. Ensure you reference the existing symbols waitForDrain, drainCtx,
t.Logf, and e.ethClient when making the change.

wallClock := time.Since(loadStart)

endHeader, err := e.ethClient.HeaderByNumber(ctx, nil)
s.Require().NoError(err, "failed to get end block header")
endBlock := endHeader.Number.Uint64()
t.Logf("end block: %d (range %d blocks)", endBlock, endBlock-startBlock)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Fix inclusive block-range count in log message.

Line 88 reports endBlock-startBlock as “range X blocks”, but metrics are collected over an inclusive range [startBlock, endBlock].

🛠️ Proposed fix
-	t.Logf("end block: %d (range %d blocks)", endBlock, endBlock-startBlock)
+	t.Logf("end block: %d (range %d blocks)", endBlock, endBlock-startBlock+1)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
t.Logf("end block: %d (range %d blocks)", endBlock, endBlock-startBlock)
t.Logf("end block: %d (range %d blocks)", endBlock, endBlock-startBlock+1)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@test/e2e/benchmark/gasburner_test.go` at line 88, The log message reports the
number of blocks as endBlock-startBlock but the range is inclusive [startBlock,
endBlock]; update the t.Logf call that currently prints "end block: %d (range %d
blocks)" to compute the range as endBlock - startBlock + 1 so the logged block
count matches the inclusive range used for metrics collection in
gasburner_test.go.


// collect block-level gas/tx metrics
bm, err := collectBlockMetrics(ctx, e.ethClient, startBlock, endBlock)
s.Require().NoError(err, "failed to collect block metrics")

summary := bm.summarize()
s.Require().Greater(summary.SteadyState, time.Duration(0), "expected non-zero steady-state duration")
summary.log(t, startBlock, endBlock, bm.TotalBlockCount, bm.BlockCount, wallClock)

// derive seconds_per_gigagas from the summary's MGas/s
var secsPerGigagas float64
if summary.AchievedMGas > 0 {
// MGas/s -> Ggas/s = MGas/s / 1000, then invert
secsPerGigagas = 1000.0 / summary.AchievedMGas
}
t.Logf("seconds_per_gigagas: %.4f", secsPerGigagas)

// collect and report traces
traces := s.collectTraces(e, serviceName)

if overhead, ok := evNodeOverhead(traces.evNode); ok {
t.Logf("ev-node overhead: %.1f%%", overhead)
w.addEntry(entry{Name: "GasBurner - ev-node overhead", Unit: "%", Value: overhead})
}

w.addEntries(summary.entries("GasBurner"))
w.addSpans(traces.allSpans())
w.addEntry(entry{
Name: fmt.Sprintf("%s - seconds_per_gigagas", w.label),
Unit: "s/Ggas",
Value: secsPerGigagas,
})
}
37 changes: 33 additions & 4 deletions test/e2e/benchmark/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,8 @@ type blockMetricsSummary struct {
AvgTx float64
// BlocksPerSec is non-empty blocks / steady-state seconds.
BlocksPerSec float64
// AvgBlockInterval is the mean time between all consecutive blocks.
AvgBlockInterval time.Duration
// NonEmptyRatio is (non-empty blocks / total blocks) * 100.
NonEmptyRatio float64
}
Expand All @@ -287,6 +289,15 @@ func (m *blockMetrics) summarize() *blockMetricsSummary {
achievedTPS = float64(m.TotalTxCount) / ss.Seconds()
}

var avgBlockInterval time.Duration
if len(m.BlockIntervals) > 0 {
var total time.Duration
for _, d := range m.BlockIntervals {
total += d
}
avgBlockInterval = total / time.Duration(len(m.BlockIntervals))
}

return &blockMetricsSummary{
SteadyState: ss,
AchievedMGas: mgasPerSec(m.TotalGasUsed, ss),
Expand All @@ -300,8 +311,9 @@ func (m *blockMetrics) summarize() *blockMetricsSummary {
TxP99: txP99,
AvgGas: m.avgGasPerBlock(),
AvgTx: m.avgTxPerBlock(),
BlocksPerSec: blocksPerSec,
NonEmptyRatio: m.nonEmptyRatio(),
BlocksPerSec: blocksPerSec,
AvgBlockInterval: avgBlockInterval,
NonEmptyRatio: m.nonEmptyRatio(),
}
}

Expand All @@ -312,8 +324,8 @@ func (m *blockMetrics) summarize() *blockMetricsSummary {
func (s *blockMetricsSummary) log(t testing.TB, startBlock, endBlock uint64, totalBlocks, nonEmptyBlocks int, wallClock time.Duration) {
t.Logf("block range: %d-%d (%d total, %d non-empty, %.1f%% non-empty)",
startBlock, endBlock, totalBlocks, nonEmptyBlocks, s.NonEmptyRatio)
t.Logf("block intervals: p50=%s, p99=%s, max=%s",
s.IntervalP50.Round(time.Millisecond), s.IntervalP99.Round(time.Millisecond), s.IntervalMax.Round(time.Millisecond))
t.Logf("block intervals: avg=%s, p50=%s, p99=%s, max=%s",
s.AvgBlockInterval.Round(time.Millisecond), s.IntervalP50.Round(time.Millisecond), s.IntervalP99.Round(time.Millisecond), s.IntervalMax.Round(time.Millisecond))
t.Logf("gas/block (non-empty): avg=%.0f, p50=%.0f, p99=%.0f", s.AvgGas, s.GasP50, s.GasP99)
t.Logf("tx/block (non-empty): avg=%.1f, p50=%.0f, p99=%.0f", s.AvgTx, s.TxP50, s.TxP99)
t.Logf("throughput: %.2f MGas/s, %.1f TPS over %s steady-state (%s wall clock)",
Expand All @@ -332,6 +344,7 @@ func (s *blockMetricsSummary) entries(prefix string) []entry {
{Name: prefix + " - avg tx/block", Unit: "count", Value: s.AvgTx},
{Name: prefix + " - blocks/s", Unit: "blocks/s", Value: s.BlocksPerSec},
{Name: prefix + " - non-empty block ratio", Unit: "%", Value: s.NonEmptyRatio},
{Name: prefix + " - avg block interval", Unit: "ms", Value: float64(s.AvgBlockInterval.Milliseconds())},
{Name: prefix + " - block interval p50", Unit: "ms", Value: float64(s.IntervalP50.Milliseconds())},
{Name: prefix + " - block interval p99", Unit: "ms", Value: float64(s.IntervalP99.Milliseconds())},
{Name: prefix + " - gas/block p50", Unit: "gas", Value: s.GasP50},
Expand Down Expand Up @@ -368,6 +381,22 @@ func evNodeOverhead(spans []e2e.TraceSpan) (float64, bool) {
return (produceAvg - executeAvg) / produceAvg * 100, true
}

// waitForMetricTarget polls a metric getter function every 2s until the
// returned value >= target, or fails the test on timeout.
func waitForMetricTarget(t testing.TB, name string, poll func() (float64, error), target float64, timeout time.Duration) {
t.Helper()
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
v, err := poll()
if err == nil && v >= target {
t.Logf("metric %s reached %.0f (target %.0f)", name, v, target)
return
}
time.Sleep(2 * time.Second)
}
t.Fatalf("metric %s did not reach target %.0f within %v", name, target, timeout)
}

// collectBlockMetrics iterates all headers in [startBlock, endBlock] to collect
// gas and transaction metrics. Empty blocks are skipped for gas/tx aggregation
// but included in block interval tracking.
Expand Down
Loading