Skip to content
Merged
37 changes: 37 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ A distributed system for processing Ethereum execution layer data with support f
## Features

- **Transaction Structlog Processing**: Extract and store detailed execution traces for every transaction
- **Structlog Aggregation**: Aggregate per-opcode gas data into call frame rows with resource gas decomposition
- **Distributed Processing**: Redis-backed task queues with priority-based processing
- **Leader Election**: Built-in leader election for coordinated block processing
- **Dual Processing Modes**: Forwards (real-time) and backwards (backfill) processing
Expand Down Expand Up @@ -214,6 +215,42 @@ curl -X POST http://localhost:8080/api/v1/queue/blocks/transaction_structlog \
- Allows reprocessing of already processed blocks
- Each API call creates new tasks (calling multiple times will create duplicates)

## Structlog Aggregation

The `structlog_agg` processor aggregates per-opcode structlog data into call frame rows suitable for ClickHouse storage. It produces two types of rows per call frame:

- **Summary row** (`operation=""`): Frame-level metadata including gas totals, call type, target address, intrinsic gas, and gas refund
- **Per-opcode rows** (`operation="SLOAD"` etc.): Gas and count aggregated by opcode within each frame

### Resource Gas Decomposition

The aggregator computes building-block columns that enable downstream SQL to decompose EVM gas into resource categories (compute, memory, storage access):

| Column | Description |
|--------|-------------|
| `memory_words_sum_before` | SUM(ceil(memory_bytes/32)) before each opcode |
| `memory_words_sum_after` | SUM(ceil(memory_bytes/32)) after each opcode |
| `memory_words_sq_sum_before` | SUM(words_before^2) for quadratic cost extraction |
| `memory_words_sq_sum_after` | SUM(words_after^2) for quadratic cost extraction |
| `cold_access_count` | Number of cold storage/account accesses (EIP-2929) |

These columns are computed by two functions in the `structlog` package:

- **`ComputeMemoryWords`**: Derives per-opcode memory size in 32-byte words using the pending-index technique. Handles depth transitions and RETURN/REVERT last-in-frame expansion via stack operands.
- **`ClassifyColdAccess`**: Classifies each opcode's cold vs warm access using gas values, memory expansion costs, and range-based detection. Supports both embedded mode (pre-computed tracer fields) and RPC mode (stack-based fallbacks).

### Gas Computation Pipeline

```
StructLogs -> ComputeGasUsed -> ComputeGasSelf -> ComputeMemoryWords -> ClassifyColdAccess
|
v
ProcessStructlog (per opcode)
|
v
Finalize -> CallFrameRows
```

## Architecture

### Leader Election
Expand Down
15 changes: 15 additions & 0 deletions pkg/ethereum/execution/structlog.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,19 @@ type StructLog struct {
// In embedded mode: pre-extracted by tracer from stack[len-2].
// In RPC mode: nil, extracted post-hoc from Stack by extractCallAddress().
CallToAddress *string `json:"callToAddress,omitempty"`

// MemorySize is the EVM memory size in bytes at the time this opcode executes.
// Used to compute memory expansion gas between consecutive opcodes.
// In embedded mode: captured by tracer from scope.MemoryData().
// In RPC mode: 0 (not available).
MemorySize uint32 `json:"memSize,omitempty"`

// CallTransfersValue indicates whether a CALL/CALLCODE transfers non-zero ETH value.
// True only for CALL/CALLCODE with value > 0 on the stack.
// Used to normalize CALL gas for cold access detection.
CallTransfersValue bool `json:"callTransfersValue,omitempty"`

// ExtCodeCopySize is the size parameter for EXTCODECOPY opcodes.
// Used to compute the copy cost component for cold access detection.
ExtCodeCopySize uint32 `json:"extCodeCopySize,omitempty"`
}
16 changes: 8 additions & 8 deletions pkg/processor/transaction/structlog/call_tracker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,8 +313,8 @@ func TestIsPrecompile(t *testing.T) {

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
result := isPrecompile(tc.addr)
assert.Equal(t, tc.expected, result, "isPrecompile(%q) = %v, want %v", tc.addr, result, tc.expected)
result := IsPrecompile(tc.addr)
assert.Equal(t, tc.expected, result, "IsPrecompile(%q) = %v, want %v", tc.addr, result, tc.expected)
})
}
}
Expand Down Expand Up @@ -347,7 +347,7 @@ func TestIsPrecompile_HardcodedList(t *testing.T) {

// Verify all expected precompiles are detected
for _, addr := range expectedPrecompiles {
assert.True(t, isPrecompile(addr),
assert.True(t, IsPrecompile(addr),
"precompile %s should be detected", addr)
}

Expand Down Expand Up @@ -411,7 +411,7 @@ func TestEOADetectionLogic(t *testing.T) {
// Depth increase = entered contract code (not EOA)
// Depth decrease = call returned/failed (not EOA)
// Depth same = called EOA or precompile (immediate return)
if nextDepth == currentDepth && !isPrecompile(callToAddr) {
if nextDepth == currentDepth && !IsPrecompile(callToAddr) {
return true
}

Expand Down Expand Up @@ -589,11 +589,11 @@ func TestEOADetectionBugScenario_DepthDecrease(t *testing.T) {
callToAddr := "0xde9c774cde34f85ee69c22e9a1077a0c9091f09b"

// Old buggy logic: nextDepth <= currentDepth → 2 <= 3 → TRUE (wrong!)
buggyLogic := nextDepth <= currentDepth && !isPrecompile(callToAddr)
buggyLogic := nextDepth <= currentDepth && !IsPrecompile(callToAddr)
assert.True(t, buggyLogic, "Old buggy logic would have created synthetic frame")

// Fixed logic: nextDepth == currentDepth → 2 == 3 → FALSE (correct!)
fixedLogic := nextDepth == currentDepth && !isPrecompile(callToAddr)
fixedLogic := nextDepth == currentDepth && !IsPrecompile(callToAddr)
assert.False(t, fixedLogic, "Fixed logic should NOT create synthetic frame")
}

Expand All @@ -609,11 +609,11 @@ func TestEOADetectionBugScenario_OutOfGas(t *testing.T) {
hasNextOpcode := false

// Old buggy logic: "Last opcode is a CALL - if not precompile, must be EOA"
buggyLogic := !hasNextOpcode && !isPrecompile(callToAddr)
buggyLogic := !hasNextOpcode && !IsPrecompile(callToAddr)
assert.True(t, buggyLogic, "Old buggy logic would have created synthetic frame")

// Fixed logic: Don't assume last CALL is EOA - we can't determine without next opcode
fixedLogic := hasNextOpcode && !isPrecompile(callToAddr) // Always false when !hasNextOpcode
fixedLogic := hasNextOpcode && !IsPrecompile(callToAddr) // Always false when !hasNextOpcode
assert.False(t, fixedLogic, "Fixed logic should NOT create synthetic frame for last opcode")
}

Expand Down
Loading