Skip to content

Commit aa14324

Browse files
committed
chore: decompile avm bytecode into FuzzerData instructions
1 parent a9cc4f4 commit aa14324

23 files changed

+2775
-42
lines changed

barretenberg/cpp/src/barretenberg/avm_fuzzer/CMakeLists.txt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,19 @@ if(FUZZING_AVM)
1111
file(GLOB_RECURSE BENCH_SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.bench.cpp")
1212
file(GLOB_RECURSE FUZZERS_SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.fuzzer.cpp")
1313

14-
# Exclude harness subdirectory files from this module
14+
# Exclude harness and tools subdirectory files from this module
1515
list(FILTER SOURCE_FILES EXCLUDE REGEX ".*/harness/.*")
1616
list(FILTER HEADER_FILES EXCLUDE REGEX ".*/harness/.*")
1717
list(FILTER TEST_SOURCE_FILES EXCLUDE REGEX ".*/harness/.*")
1818
list(FILTER BENCH_SOURCE_FILES EXCLUDE REGEX ".*/harness/.*")
1919
list(FILTER FUZZERS_SOURCE_FILES EXCLUDE REGEX ".*/harness/.*")
2020

21+
list(FILTER SOURCE_FILES EXCLUDE REGEX ".*/tools/.*")
22+
list(FILTER HEADER_FILES EXCLUDE REGEX ".*/tools/.*")
23+
list(FILTER TEST_SOURCE_FILES EXCLUDE REGEX ".*/tools/.*")
24+
list(FILTER BENCH_SOURCE_FILES EXCLUDE REGEX ".*/tools/.*")
25+
list(FILTER FUZZERS_SOURCE_FILES EXCLUDE REGEX ".*/tools/.*")
26+
2127
# Filter out test, bench, and fuzzer files from regular sources
2228
list(FILTER SOURCE_FILES EXCLUDE REGEX ".*\\.(fuzzer|test|bench)\\.cpp$")
2329

@@ -34,4 +40,7 @@ if(FUZZING_AVM)
3440

3541
# Add the harness subdirectory which will create the alu_fuzzer target
3642
add_subdirectory(harness)
43+
44+
# Add the tools subdirectory for CLI utilities
45+
add_subdirectory(tools)
3746
endif()

barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/bytecode_decompiler.cpp

Lines changed: 526 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#pragma once
2+
3+
#include "barretenberg/avm_fuzzer/fuzz_lib/fuzzer_data.hpp"
4+
#include "barretenberg/avm_fuzzer/fuzz_lib/instruction.hpp"
5+
#include "barretenberg/vm2/common/field.hpp"
6+
#include "barretenberg/vm2/common/opcodes.hpp"
7+
8+
#include <cstdint>
9+
#include <vector>
10+
11+
namespace bb::avm_fuzzer {
12+
13+
/**
14+
* @brief Decompile raw bytecode into FuzzerData format
15+
*
16+
* Converts ALL instructions including control flow into FuzzInstruction variants.
17+
* The resulting FuzzerData will produce identical bytecode when rebuilt.
18+
*
19+
* This enables seeding the fuzzer corpus with real-world transactions and
20+
* bytecode generated by TypeScript tooling.
21+
*
22+
* @param bytecode Raw AVM bytecode
23+
* @param calldata Function calldata (field elements)
24+
* @return FuzzerData Structured representation for the fuzzer
25+
*
26+
* @note Assumes all bytecode uses direct addressing mode (indirect=0)
27+
* @note Control flow instructions (JUMP, JUMPI, RETURN, REVERT, INTERNALCALL, INTERNALRETURN)
28+
* are preserved with their raw operands for bytecode equivalence
29+
*/
30+
FuzzerData decompile_bytecode(const std::vector<uint8_t>& bytecode, const std::vector<bb::avm2::FF>& calldata);
31+
32+
} // namespace bb::avm_fuzzer
Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
#include <gtest/gtest.h>
2+
3+
#include "barretenberg/avm_fuzzer/fuzz_lib/bytecode_decompiler.hpp"
4+
#include "barretenberg/avm_fuzzer/fuzz_lib/control_flow.hpp"
5+
#include "barretenberg/avm_fuzzer/fuzz_lib/instruction.hpp"
6+
#include "barretenberg/vm2/common/field.hpp"
7+
#include "barretenberg/vm2/common/opcodes.hpp"
8+
#include "barretenberg/vm2/testing/instruction_builder.hpp"
9+
10+
using namespace bb::avm_fuzzer;
11+
using namespace bb::avm2;
12+
namespace avm2_testing = bb::avm2::testing;
13+
14+
namespace {
15+
16+
// Helper to build bytecode from raw simulation::Instruction
17+
std::vector<uint8_t> build_bytecode(const std::vector<simulation::Instruction>& instructions)
18+
{
19+
std::vector<uint8_t> bytecode;
20+
for (const auto& instr : instructions) {
21+
auto serialized = instr.serialize();
22+
bytecode.insert(bytecode.end(), serialized.begin(), serialized.end());
23+
}
24+
return bytecode;
25+
}
26+
27+
// Rebuild bytecode from FuzzerData to test round-trip
28+
std::vector<uint8_t> rebuild_bytecode(const FuzzerData& data)
29+
{
30+
// Make a mutable copy since ControlFlow modifies the blocks
31+
auto instruction_blocks = data.instruction_blocks;
32+
ControlFlow control_flow(instruction_blocks);
33+
for (const auto& cfg_instr : data.cfg_instructions) {
34+
control_flow.process_cfg_instruction(cfg_instr);
35+
}
36+
return control_flow.build_bytecode(data.return_options);
37+
}
38+
39+
} // namespace
40+
41+
// Test that decompiling and recompiling produces identical bytecode
42+
TEST(BytecodeDecompiler, RoundTripSimpleSetReturn)
43+
{
44+
// Build: SET_16 (value 42 at address 0) + RETURN
45+
auto set_instr = avm2_testing::InstructionBuilder(WireOpCode::SET_16)
46+
.operand(uint16_t{ 0 })
47+
.operand(MemoryTag::U32)
48+
.operand(uint16_t{ 42 })
49+
.build();
50+
auto return_instr =
51+
avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 0 }).build();
52+
53+
auto original_bytecode = build_bytecode({ set_instr, return_instr });
54+
55+
// Decompile
56+
auto fuzzer_data = decompile_bytecode(original_bytecode, {});
57+
58+
// Rebuild
59+
auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data);
60+
61+
EXPECT_EQ(original_bytecode.size(), rebuilt_bytecode.size());
62+
EXPECT_EQ(original_bytecode, rebuilt_bytecode);
63+
}
64+
65+
TEST(BytecodeDecompiler, RoundTripAdd16)
66+
{
67+
// Build: SET_16 (value 5 at address 0) + SET_16 (value 3 at address 1) + ADD_16 + RETURN
68+
auto set1 = avm2_testing::InstructionBuilder(WireOpCode::SET_16)
69+
.operand(uint16_t{ 0 })
70+
.operand(MemoryTag::U32)
71+
.operand(uint16_t{ 5 })
72+
.build();
73+
auto set2 = avm2_testing::InstructionBuilder(WireOpCode::SET_16)
74+
.operand(uint16_t{ 1 })
75+
.operand(MemoryTag::U32)
76+
.operand(uint16_t{ 3 })
77+
.build();
78+
auto add_instr = avm2_testing::InstructionBuilder(WireOpCode::ADD_16)
79+
.operand(uint16_t{ 0 })
80+
.operand(uint16_t{ 1 })
81+
.operand(uint16_t{ 2 })
82+
.build();
83+
auto return_instr =
84+
avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 2 }).build();
85+
86+
auto original_bytecode = build_bytecode({ set1, set2, add_instr, return_instr });
87+
88+
// Decompile
89+
auto fuzzer_data = decompile_bytecode(original_bytecode, {});
90+
91+
// Rebuild
92+
auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data);
93+
94+
EXPECT_EQ(original_bytecode, rebuilt_bytecode);
95+
}
96+
97+
TEST(BytecodeDecompiler, RoundTripJump)
98+
{
99+
// Build: JUMP to offset 0 + RETURN
100+
auto jump_instr = avm2_testing::InstructionBuilder(WireOpCode::JUMP_32).operand(uint32_t{ 0 }).build();
101+
auto return_instr =
102+
avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 0 }).build();
103+
104+
auto original_bytecode = build_bytecode({ jump_instr, return_instr });
105+
106+
// Decompile
107+
auto fuzzer_data = decompile_bytecode(original_bytecode, {});
108+
109+
// Rebuild
110+
auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data);
111+
112+
EXPECT_EQ(original_bytecode, rebuilt_bytecode);
113+
}
114+
115+
TEST(BytecodeDecompiler, RoundTripJumpI)
116+
{
117+
// Build: SET_8 (condition at address 0) + JUMPI_32 + RETURN
118+
auto set_cond = avm2_testing::InstructionBuilder(WireOpCode::SET_8)
119+
.operand(uint8_t{ 0 })
120+
.operand(MemoryTag::U1)
121+
.operand(uint8_t{ 1 })
122+
.build();
123+
auto jumpi_instr =
124+
avm2_testing::InstructionBuilder(WireOpCode::JUMPI_32).operand(uint16_t{ 0 }).operand(uint32_t{ 5 }).build();
125+
auto return_instr =
126+
avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 0 }).build();
127+
128+
auto original_bytecode = build_bytecode({ set_cond, jumpi_instr, return_instr });
129+
130+
// Decompile
131+
auto fuzzer_data = decompile_bytecode(original_bytecode, {});
132+
133+
// Rebuild
134+
auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data);
135+
136+
EXPECT_EQ(original_bytecode, rebuilt_bytecode);
137+
}
138+
139+
TEST(BytecodeDecompiler, RoundTripRevert)
140+
{
141+
// Build: SET_16 + REVERT_16
142+
auto set_instr = avm2_testing::InstructionBuilder(WireOpCode::SET_16)
143+
.operand(uint16_t{ 0 })
144+
.operand(MemoryTag::U32)
145+
.operand(uint16_t{ 0 })
146+
.build();
147+
auto revert_instr =
148+
avm2_testing::InstructionBuilder(WireOpCode::REVERT_16).operand(uint16_t{ 0 }).operand(uint16_t{ 0 }).build();
149+
150+
auto original_bytecode = build_bytecode({ set_instr, revert_instr });
151+
152+
// Decompile
153+
auto fuzzer_data = decompile_bytecode(original_bytecode, {});
154+
155+
// Rebuild
156+
auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data);
157+
158+
EXPECT_EQ(original_bytecode, rebuilt_bytecode);
159+
}
160+
161+
TEST(BytecodeDecompiler, RoundTripInternalCallReturn)
162+
{
163+
// Build: INTERNALCALL + INTERNALRETURN + RETURN
164+
auto internal_call = avm2_testing::InstructionBuilder(WireOpCode::INTERNALCALL).operand(uint32_t{ 5 }).build();
165+
auto internal_return = avm2_testing::InstructionBuilder(WireOpCode::INTERNALRETURN).build();
166+
auto return_instr =
167+
avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 0 }).build();
168+
169+
auto original_bytecode = build_bytecode({ internal_call, internal_return, return_instr });
170+
171+
// Decompile
172+
auto fuzzer_data = decompile_bytecode(original_bytecode, {});
173+
174+
// Rebuild
175+
auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data);
176+
177+
EXPECT_EQ(original_bytecode, rebuilt_bytecode);
178+
}
179+
180+
TEST(BytecodeDecompiler, RoundTripCast)
181+
{
182+
// Build: SET_16 + CAST_16 + RETURN
183+
auto set_instr = avm2_testing::InstructionBuilder(WireOpCode::SET_16)
184+
.operand(uint16_t{ 0 })
185+
.operand(MemoryTag::U32)
186+
.operand(uint16_t{ 255 })
187+
.build();
188+
auto cast_instr = avm2_testing::InstructionBuilder(WireOpCode::CAST_16)
189+
.operand(uint16_t{ 0 })
190+
.operand(uint16_t{ 1 })
191+
.operand(MemoryTag::U64)
192+
.build();
193+
auto return_instr =
194+
avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 1 }).build();
195+
196+
auto original_bytecode = build_bytecode({ set_instr, cast_instr, return_instr });
197+
198+
// Decompile
199+
auto fuzzer_data = decompile_bytecode(original_bytecode, {});
200+
201+
// Rebuild
202+
auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data);
203+
204+
EXPECT_EQ(original_bytecode, rebuilt_bytecode);
205+
}
206+
207+
TEST(BytecodeDecompiler, RoundTripMov)
208+
{
209+
// Build: SET_16 + MOV_16 + RETURN
210+
auto set_instr = avm2_testing::InstructionBuilder(WireOpCode::SET_16)
211+
.operand(uint16_t{ 0 })
212+
.operand(MemoryTag::U32)
213+
.operand(uint16_t{ 42 })
214+
.build();
215+
auto mov_instr =
216+
avm2_testing::InstructionBuilder(WireOpCode::MOV_16).operand(uint16_t{ 0 }).operand(uint16_t{ 1 }).build();
217+
auto return_instr =
218+
avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 1 }).build();
219+
220+
auto original_bytecode = build_bytecode({ set_instr, mov_instr, return_instr });
221+
222+
// Decompile
223+
auto fuzzer_data = decompile_bytecode(original_bytecode, {});
224+
225+
// Rebuild
226+
auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data);
227+
228+
EXPECT_EQ(original_bytecode, rebuilt_bytecode);
229+
}
230+
231+
TEST(BytecodeDecompiler, RoundTripNot)
232+
{
233+
// Build: SET_16 + NOT_16 + RETURN
234+
auto set_instr = avm2_testing::InstructionBuilder(WireOpCode::SET_16)
235+
.operand(uint16_t{ 0 })
236+
.operand(MemoryTag::U8)
237+
.operand(uint16_t{ 0x0F })
238+
.build();
239+
auto not_instr =
240+
avm2_testing::InstructionBuilder(WireOpCode::NOT_16).operand(uint16_t{ 0 }).operand(uint16_t{ 1 }).build();
241+
auto return_instr =
242+
avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 1 }).build();
243+
244+
auto original_bytecode = build_bytecode({ set_instr, not_instr, return_instr });
245+
246+
// Decompile
247+
auto fuzzer_data = decompile_bytecode(original_bytecode, {});
248+
249+
// Rebuild
250+
auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data);
251+
252+
EXPECT_EQ(original_bytecode, rebuilt_bytecode);
253+
}
254+
255+
TEST(BytecodeDecompiler, RoundTripSetFF)
256+
{
257+
// Build: SET_FF + RETURN
258+
FF large_value = FF::random_element();
259+
auto set_instr = avm2_testing::InstructionBuilder(WireOpCode::SET_FF)
260+
.operand(uint16_t{ 0 })
261+
.operand(MemoryTag::FF)
262+
.operand(large_value)
263+
.build();
264+
auto return_instr =
265+
avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 0 }).build();
266+
267+
auto original_bytecode = build_bytecode({ set_instr, return_instr });
268+
269+
// Decompile
270+
auto fuzzer_data = decompile_bytecode(original_bytecode, {});
271+
272+
// Rebuild
273+
auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data);
274+
275+
EXPECT_EQ(original_bytecode, rebuilt_bytecode);
276+
}
277+
278+
TEST(BytecodeDecompiler, PreservesCalldata)
279+
{
280+
std::vector<FF> calldata = { FF(1), FF(2), FF(42) };
281+
282+
auto return_instr =
283+
avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 0 }).build();
284+
auto bytecode = build_bytecode({ return_instr });
285+
286+
auto fuzzer_data = decompile_bytecode(bytecode, calldata);
287+
288+
EXPECT_EQ(fuzzer_data.calldata, calldata);
289+
}
290+
291+
TEST(BytecodeDecompiler, EmptyBytecode)
292+
{
293+
std::vector<uint8_t> empty_bytecode;
294+
std::vector<FF> calldata;
295+
296+
auto fuzzer_data = decompile_bytecode(empty_bytecode, calldata);
297+
298+
EXPECT_TRUE(fuzzer_data.instruction_blocks[0].empty());
299+
}

0 commit comments

Comments
 (0)