evstack · chatton · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -8,6 +8,12 @@ permissions: {}
   pull_request:
     branches:
       - main
+    paths:
+      - 'test/e2e/benchmark/**'
+      - 'test/e2e/evm_contract_bench_test.go'
+      - 'test/e2e/evm_test_common.go'
+      - 'test/e2e/sut_helper.go'
+      - '.github/workflows/benchmark.yml'
   workflow_dispatch:
 
 jobs:
@@ -58,12 +64,13 @@ jobs:
       - name: Run Spamoor smoke test
         run: |
           cd test/e2e && BENCH_JSON_OUTPUT=spamoor_bench.json go test -tags evm \
-            -run='^TestSpamoorSmoke$' -v -timeout=15m --evm-binary=../../build/evm
+            -run='^TestSpamoorSuite$/^TestSpamoorSmoke$' -v -timeout=15m \
+            ./benchmark/ --evm-binary=../../../build/evm
       - name: Upload benchmark results
         uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
         with:
           name: spamoor-benchmark-results
-          path: test/e2e/spamoor_bench.json
+          path: test/e2e/benchmark/spamoor_bench.json
 
   # single job to push all results to gh-pages sequentially, avoiding race conditions
   publish-benchmarks:
@@ -84,7 +91,7 @@ jobs:
         uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
         with:
           name: spamoor-benchmark-results
-          path: test/e2e/
+          path: test/e2e/benchmark/
 
       # only update the benchmark baseline on push/dispatch, not on PRs
       - name: Store EVM Contract Roundtrip result
@@ -131,7 +138,7 @@ jobs:
         with:
           name: Spamoor Trace Benchmarks
           tool: 'customSmallerIsBetter'
-          output-file-path: test/e2e/spamoor_bench.json
+          output-file-path: test/e2e/benchmark/spamoor_bench.json
           auto-push: ${{ github.event_name != 'pull_request' }}
           save-data-file: ${{ github.event_name != 'pull_request' }}
           github-token: ${{ secrets.GITHUB_TOKEN }}

diff --git a/scripts/test.mk b/scripts/test.mk
@@ -24,7 +24,7 @@ test-integration:
 ## test-e2e: Running e2e tests
 test-e2e: build build-da build-evm docker-build-if-local
 	@echo "--> Running e2e tests"
-	@cd test/e2e && go test -mod=readonly -failfast -timeout=15m -tags='e2e evm' ./... --binary=../../build/testapp --evm-binary=../../build/evm
+	@cd test/e2e && go test -mod=readonly -failfast -timeout=15m -tags='e2e evm' $$(go list -tags='e2e evm' ./... | grep -v /benchmark) --binary=../../build/testapp --evm-binary=../../build/evm
 .PHONY: test-e2e
 
 ## test-integration-cover: generate code coverage report for integration tests.

diff --git a/test/e2e/benchmark/metrics.go b/test/e2e/benchmark/metrics.go
@@ -0,0 +1,48 @@
+//go:build evm
+
+package benchmark
+
+import (
+	"fmt"
+	"net/http"
+	"testing"
+	"time"
+
+	dto "github.com/prometheus/client_model/go"
+)
+
+// requireHTTP polls a URL until it returns a 2xx status code or the timeout expires.
+func requireHTTP(t testing.TB, url string, timeout time.Duration) {
+	t.Helper()
+	client := &http.Client{Timeout: 200 * time.Millisecond}
+	deadline := time.Now().Add(timeout)
+	var lastErr error
+	for time.Now().Before(deadline) {
+		resp, err := client.Get(url)
+		if err == nil {
+			_ = resp.Body.Close()
+			if resp.StatusCode >= 200 && resp.StatusCode < 300 {
+				return
+			}
+			lastErr = fmt.Errorf("status %d", resp.StatusCode)
+		} else {
+			lastErr = err
+		}
+		time.Sleep(100 * time.Millisecond)
+	}
+	t.Fatalf("daemon not ready at %s: %v", url, lastErr)
+}
+
+// sumCounter sums all counter values in a prometheus MetricFamily.
+func sumCounter(f *dto.MetricFamily) float64 {
+	if f == nil || f.GetType() != dto.MetricType_COUNTER {
+		return 0
+	}
+	var sum float64
+	for _, m := range f.GetMetric() {
+		if m.GetCounter() != nil && m.GetCounter().Value != nil {
+			sum += m.GetCounter().GetValue()
+		}
+	}
+	return sum
+}
diff --git a/test/e2e/benchmark/output.go b/test/e2e/benchmark/output.go
@@ -0,0 +1,77 @@
+//go:build evm
+
+package benchmark
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"sort"
+	"testing"
+
+	e2e "github.com/evstack/ev-node/test/e2e"
+	"github.com/stretchr/testify/require"
+)
+
+// entry matches the customSmallerIsBetter format for github-action-benchmark.
+type entry struct {
+	Name  string  `json:"name"`
+	Unit  string  `json:"unit"`
+	Value float64 `json:"value"`
+}
+
+// resultWriter accumulates benchmark entries and writes them to a JSON file
+// when flush is called. Create one early in a test and defer flush so results
+// are written regardless of where the test exits.
+type resultWriter struct {
+	t       testing.TB
+	label   string
+	entries []entry
+}
+
+func newResultWriter(t testing.TB, label string) *resultWriter {
+	return &resultWriter{t: t, label: label}
+}
+
+// addSpans aggregates trace spans into per-operation avg duration entries.
+func (w *resultWriter) addSpans(spans []e2e.TraceSpan) {
+	m := e2e.AggregateSpanStats(spans)
+	if len(m) == 0 {
+		return
+	}
+
+	names := make([]string, 0, len(m))
+	for name := range m {
+		names = append(names, name)
+	}
+	sort.Strings(names)
+
+	for _, name := range names {
+		s := m[name]
+		avg := float64(s.Total.Microseconds()) / float64(s.Count)
+		w.entries = append(w.entries, entry{
+			Name:  fmt.Sprintf("%s - %s (avg)", w.label, name),
+			Unit:  "us",
+			Value: avg,
+		})
+	}
+}
+
+// addEntry appends a custom entry to the results.
+func (w *resultWriter) addEntry(e entry) {
+	w.entries = append(w.entries, e)
+}
+
+// flush writes accumulated entries to the path in BENCH_JSON_OUTPUT.
+// It is a no-op when the env var is unset or no entries were added.
+func (w *resultWriter) flush() {
+	outputPath := os.Getenv("BENCH_JSON_OUTPUT")
+	if outputPath == "" || len(w.entries) == 0 {
+		return
+	}
+
+	data, err := json.MarshalIndent(w.entries, "", "  ")
+	require.NoError(w.t, err, "failed to marshal benchmark JSON")
+	require.NoError(w.t, os.WriteFile(outputPath, data, 0644), "failed to write benchmark JSON to %s", outputPath)
+	w.t.Logf("wrote %d benchmark entries to %s", len(w.entries), outputPath)
+}
diff --git a/test/e2e/benchmark/spamoor_smoke_test.go b/test/e2e/benchmark/spamoor_smoke_test.go
@@ -0,0 +1,114 @@
+//go:build evm
+
+package benchmark
+
+import (
+	"time"
+
+	"github.com/celestiaorg/tastora/framework/docker/evstack/spamoor"
+	e2e "github.com/evstack/ev-node/test/e2e"
+)
+
+// TestSpamoorSmoke spins up reth + sequencer and a Spamoor node, starts a few
+// basic spammers, waits briefly, then validates trace spans and prints a concise
+// metrics summary.
+func (s *SpamoorSuite) TestSpamoorSmoke() {
+	t := s.T()
+	w := newResultWriter(t, "SpamoorSmoke")
+	defer w.flush()
+
+	e := s.setupEnv(config{
+		rethTag:     "pr-140",
+		serviceName: "ev-node-smoke",
+	})
+	api := e.spamoorAPI
+
+	eoatx := map[string]any{
+		"throughput":      100,
+		"total_count":     3000,
+		"max_pending":     4000,
+		"max_wallets":     300,
+		"amount":          100,
+		"random_amount":   true,
+		"random_target":   true,
+		"base_fee":        20,
+		"tip_fee":         2,
+		"refill_amount":   "1000000000000000000",
+		"refill_balance":  "500000000000000000",
+		"refill_interval": 600,
+	}
+
+	gasburner := map[string]any{
+		"throughput":        25,
+		"total_count":       2000,
+		"max_pending":       8000,
+		"max_wallets":       500,
+		"gas_units_to_burn": 3000000,
+		"base_fee":          20,
+		"tip_fee":           5,
+		"rebroadcast":       5,
+		"refill_amount":     "5000000000000000000",
+		"refill_balance":    "2000000000000000000",
+		"refill_interval":   300,
+	}
+
+	var ids []int
+	id, err := api.CreateSpammer("smoke-eoatx", spamoor.ScenarioEOATX, eoatx, true)
+	s.Require().NoError(err, "failed to create eoatx spammer")
+	ids = append(ids, id)
+	id, err = api.CreateSpammer("smoke-gasburner", spamoor.ScenarioGasBurnerTX, gasburner, true)
+	s.Require().NoError(err, "failed to create gasburner spammer")
+	ids = append(ids, id)
+
+	for _, id := range ids {
+		idToDelete := id
+		t.Cleanup(func() { _ = api.DeleteSpammer(idToDelete) })
+	}
+
+	// allow spamoor enough time to generate transaction throughput
+	// so that the expected tracing spans appear in Jaeger.
+	time.Sleep(60 * time.Second)
+
+	// fetch parsed metrics and print a concise summary.
+	metrics, err := api.GetMetrics()
+	s.Require().NoError(err, "failed to get metrics")
+	sent := sumCounter(metrics["spamoor_transactions_sent_total"])
+	fail := sumCounter(metrics["spamoor_transactions_failed_total"])
+
+	// collect traces
+	evNodeSpans := s.collectServiceTraces(e, "ev-node-smoke")
+	evRethSpans := s.collectServiceTraces(e, "ev-reth")
+	e2e.PrintTraceReport(t, "ev-node-smoke", evNodeSpans)
+	e2e.PrintTraceReport(t, "ev-reth", evRethSpans)
+
+	w.addSpans(append(evNodeSpans, evRethSpans...))
+
+	// assert expected ev-node span names
+	assertSpanNames(t, evNodeSpans, []string{
+		"BlockExecutor.ProduceBlock",
+		"BlockExecutor.ApplyBlock",
+		"BlockExecutor.CreateBlock",
+		"BlockExecutor.RetrieveBatch",
+		"Executor.ExecuteTxs",
+		"Executor.SetFinal",
+		"Engine.ForkchoiceUpdated",
+		"Engine.NewPayload",
+		"Engine.GetPayload",
+		"Eth.GetBlockByNumber",
+		"Sequencer.GetNextBatch",
+		"DASubmitter.SubmitHeaders",
+		"DASubmitter.SubmitData",
+		"DA.Submit",
+	}, "ev-node-smoke")
+
+	// assert expected ev-reth span names
+	assertSpanNames(t, evRethSpans, []string{
+		"build_payload",
+		"execute_tx",
+		"try_build",
+		"validate_transaction",
+	}, "ev-reth")
+
+	s.Require().Greater(sent, float64(0), "at least one transaction should have been sent")
+	s.Require().Zero(fail, "no transactions should have failed")
+}