diff --git a/barretenberg/cpp/src/barretenberg/avm_fuzzer/CMakeLists.txt b/barretenberg/cpp/src/barretenberg/avm_fuzzer/CMakeLists.txt index c3701c0bb0f8..22ef1b7448fb 100644 --- a/barretenberg/cpp/src/barretenberg/avm_fuzzer/CMakeLists.txt +++ b/barretenberg/cpp/src/barretenberg/avm_fuzzer/CMakeLists.txt @@ -11,13 +11,19 @@ if(FUZZING_AVM) file(GLOB_RECURSE BENCH_SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.bench.cpp") file(GLOB_RECURSE FUZZERS_SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.fuzzer.cpp") - # Exclude harness subdirectory files from this module + # Exclude harness and tools subdirectory files from this module list(FILTER SOURCE_FILES EXCLUDE REGEX ".*/harness/.*") list(FILTER HEADER_FILES EXCLUDE REGEX ".*/harness/.*") list(FILTER TEST_SOURCE_FILES EXCLUDE REGEX ".*/harness/.*") list(FILTER BENCH_SOURCE_FILES EXCLUDE REGEX ".*/harness/.*") list(FILTER FUZZERS_SOURCE_FILES EXCLUDE REGEX ".*/harness/.*") + list(FILTER SOURCE_FILES EXCLUDE REGEX ".*/tools/.*") + list(FILTER HEADER_FILES EXCLUDE REGEX ".*/tools/.*") + list(FILTER TEST_SOURCE_FILES EXCLUDE REGEX ".*/tools/.*") + list(FILTER BENCH_SOURCE_FILES EXCLUDE REGEX ".*/tools/.*") + list(FILTER FUZZERS_SOURCE_FILES EXCLUDE REGEX ".*/tools/.*") + # Filter out test, bench, and fuzzer files from regular sources list(FILTER SOURCE_FILES EXCLUDE REGEX ".*\\.(fuzzer|test|bench)\\.cpp$") @@ -34,4 +40,7 @@ if(FUZZING_AVM) # Add the harness subdirectory which will create the alu_fuzzer target add_subdirectory(harness) + + # Add the tools subdirectory for CLI utilities + add_subdirectory(tools) endif() diff --git a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/bytecode_decompiler.cpp b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/bytecode_decompiler.cpp new file mode 100644 index 000000000000..482625db75fe --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/bytecode_decompiler.cpp @@ -0,0 +1,526 @@ +#include "barretenberg/avm_fuzzer/fuzz_lib/bytecode_decompiler.hpp" + +#include "barretenberg/vm2/common/memory_types.hpp" +#include "barretenberg/vm2/simulation/lib/serialization.hpp" + +#include +#include +#include + +namespace bb::avm_fuzzer { + +// Custom exception for decompilation errors with detailed info +class DecompilationError : public std::runtime_error { + public: + DecompilationError(const std::string& msg) + : std::runtime_error(msg) + {} +}; + +namespace { + +using namespace bb::avm2; +using simulation::Instruction; + +// ============================================================================ +// Addressing Mode Extraction Helpers +// ============================================================================ + +// Extract addressing mode from INDIRECT8 byte (1 bit per operand: bit = indirect flag) +AddressingMode get_addressing_mode_8(uint8_t indirect, size_t operand_idx) +{ + bool is_indirect = (indirect >> operand_idx) & 1; + return is_indirect ? AddressingMode::Indirect : AddressingMode::Direct; +} + +// Extract addressing mode from INDIRECT16 (2 bits per operand: bit0=indirect, bit1=relative) +AddressingMode get_addressing_mode_16(uint16_t indirect, size_t operand_idx) +{ + uint8_t bits = (indirect >> (operand_idx * 2)) & 0b11; + switch (bits) { + case 0b00: + return AddressingMode::Direct; + case 0b01: + return AddressingMode::Indirect; + case 0b10: + return AddressingMode::Relative; + case 0b11: + return AddressingMode::IndirectRelative; + default: + return AddressingMode::Direct; + } +} + +// ============================================================================ +// Decompiler-Specific Ref Constructors +// These store raw_operand to ensure byte-perfect bytecode reconstruction +// ============================================================================ + +// Create VariableRef for decompiled bytecode (8-bit operand) +VariableRef make_var_ref_8(MemoryTag tag, uint8_t address, AddressingMode mode) +{ + return VariableRef{ .tag = MemoryTagWrapper(tag), + .index = address, + .pointer_address_seed = 0, + .base_offset_seed = 0, + .mode = AddressingModeWrapper(mode), + .raw_operand = address }; +} + +// Create VariableRef for decompiled bytecode (16-bit operand) +VariableRef make_var_ref_16(MemoryTag tag, uint16_t address, AddressingMode mode) +{ + return VariableRef{ .tag = MemoryTagWrapper(tag), + .index = address, + .pointer_address_seed = 0, + .base_offset_seed = 0, + .mode = AddressingModeWrapper(mode), + .raw_operand = address }; +} + +// Create AddressRef for decompiled bytecode (8-bit operand) +AddressRef make_addr_ref_8(uint8_t address, AddressingMode mode) +{ + return AddressRef{ .address = address, + .pointer_address_seed = 0, + .base_offset_seed = 0, + .mode = AddressingModeWrapper(mode), + .raw_operand = address }; +} + +// Create AddressRef for decompiled bytecode (16-bit operand) +AddressRef make_addr_ref_16(uint16_t address, AddressingMode mode) +{ + return AddressRef{ .address = address, + .pointer_address_seed = 0, + .base_offset_seed = 0, + .mode = AddressingModeWrapper(mode), + .raw_operand = address }; +} + +// Map a vm2 Instruction to a FuzzInstruction +FuzzInstruction map_vm2_to_fuzz(const Instruction& instr) +{ + // Helper lambdas to extract operands with proper types + // NOTE: INDIRECT bytes are NOT in the operands vector - they set instr.indirect instead + // So operands[0] is the first non-indirect operand + auto get_u8 = [&](size_t i) { return instr.operands[i].to(); }; + auto get_u16 = [&](size_t i) { return instr.operands[i].to(); }; + auto get_u32 = [&](size_t i) { return instr.operands[i].to(); }; + auto get_u64 = [&](size_t i) { return instr.operands[i].to(); }; + auto get_u128 = [&](size_t i) { return instr.operands[i].to(); }; + auto get_ff = [&](size_t i) { return instr.operands[i].to(); }; + auto get_tag = [&](size_t i) { return static_cast(instr.operands[i].to()); }; + + // Helper to get addressing mode for INDIRECT8 opcodes (most opcodes) + auto mode8 = [&](size_t i) { return get_addressing_mode_8(static_cast(instr.indirect), i); }; + // Helper to get addressing mode for INDIRECT16 opcodes (CALL, STATICCALL, etc.) + // NOTE: Currently unused - CALL/STATICCALL have complex FuzzInstruction structure + // that doesn't map directly from wire format. Will be used when those are added. + auto mode16 = [&](size_t i) { return get_addressing_mode_16(instr.indirect, i); }; + (void)mode16; + + switch (instr.opcode) { + // ========================== + // Arithmetic 8-bit variants + // ========================== + // Wire format: [indirect, a, b, dst] → operands: [a, b, dst] + case WireOpCode::ADD_8: + return ADD_8_Instruction{ .a_address = make_var_ref_8(MemoryTag::FF, get_u8(0), mode8(0)), + .b_address = make_var_ref_8(MemoryTag::FF, get_u8(1), mode8(1)), + .result_address = make_addr_ref_8(get_u8(2), mode8(2)) }; + case WireOpCode::SUB_8: + return SUB_8_Instruction{ .a_address = make_var_ref_8(MemoryTag::FF, get_u8(0), mode8(0)), + .b_address = make_var_ref_8(MemoryTag::FF, get_u8(1), mode8(1)), + .result_address = make_addr_ref_8(get_u8(2), mode8(2)) }; + case WireOpCode::MUL_8: + return MUL_8_Instruction{ .a_address = make_var_ref_8(MemoryTag::FF, get_u8(0), mode8(0)), + .b_address = make_var_ref_8(MemoryTag::FF, get_u8(1), mode8(1)), + .result_address = make_addr_ref_8(get_u8(2), mode8(2)) }; + case WireOpCode::DIV_8: + return DIV_8_Instruction{ .a_address = make_var_ref_8(MemoryTag::FF, get_u8(0), mode8(0)), + .b_address = make_var_ref_8(MemoryTag::FF, get_u8(1), mode8(1)), + .result_address = make_addr_ref_8(get_u8(2), mode8(2)) }; + case WireOpCode::FDIV_8: + return FDIV_8_Instruction{ .a_address = make_var_ref_8(MemoryTag::FF, get_u8(0), mode8(0)), + .b_address = make_var_ref_8(MemoryTag::FF, get_u8(1), mode8(1)), + .result_address = make_addr_ref_8(get_u8(2), mode8(2)) }; + case WireOpCode::EQ_8: + return EQ_8_Instruction{ .a_address = make_var_ref_8(MemoryTag::FF, get_u8(0), mode8(0)), + .b_address = make_var_ref_8(MemoryTag::FF, get_u8(1), mode8(1)), + .result_address = make_addr_ref_8(get_u8(2), mode8(2)) }; + case WireOpCode::LT_8: + return LT_8_Instruction{ .a_address = make_var_ref_8(MemoryTag::FF, get_u8(0), mode8(0)), + .b_address = make_var_ref_8(MemoryTag::FF, get_u8(1), mode8(1)), + .result_address = make_addr_ref_8(get_u8(2), mode8(2)) }; + case WireOpCode::LTE_8: + return LTE_8_Instruction{ .a_address = make_var_ref_8(MemoryTag::FF, get_u8(0), mode8(0)), + .b_address = make_var_ref_8(MemoryTag::FF, get_u8(1), mode8(1)), + .result_address = make_addr_ref_8(get_u8(2), mode8(2)) }; + case WireOpCode::AND_8: + return AND_8_Instruction{ .a_address = make_var_ref_8(MemoryTag::FF, get_u8(0), mode8(0)), + .b_address = make_var_ref_8(MemoryTag::FF, get_u8(1), mode8(1)), + .result_address = make_addr_ref_8(get_u8(2), mode8(2)) }; + case WireOpCode::OR_8: + return OR_8_Instruction{ .a_address = make_var_ref_8(MemoryTag::FF, get_u8(0), mode8(0)), + .b_address = make_var_ref_8(MemoryTag::FF, get_u8(1), mode8(1)), + .result_address = make_addr_ref_8(get_u8(2), mode8(2)) }; + case WireOpCode::XOR_8: + return XOR_8_Instruction{ .a_address = make_var_ref_8(MemoryTag::FF, get_u8(0), mode8(0)), + .b_address = make_var_ref_8(MemoryTag::FF, get_u8(1), mode8(1)), + .result_address = make_addr_ref_8(get_u8(2), mode8(2)) }; + case WireOpCode::SHL_8: + return SHL_8_Instruction{ .a_address = make_var_ref_8(MemoryTag::FF, get_u8(0), mode8(0)), + .b_address = make_var_ref_8(MemoryTag::FF, get_u8(1), mode8(1)), + .result_address = make_addr_ref_8(get_u8(2), mode8(2)) }; + case WireOpCode::SHR_8: + return SHR_8_Instruction{ .a_address = make_var_ref_8(MemoryTag::FF, get_u8(0), mode8(0)), + .b_address = make_var_ref_8(MemoryTag::FF, get_u8(1), mode8(1)), + .result_address = make_addr_ref_8(get_u8(2), mode8(2)) }; + // NOT_8: wire [indirect, src, dst] → operands [src, dst] + case WireOpCode::NOT_8: + return NOT_8_Instruction{ .a_address = make_var_ref_8(MemoryTag::FF, get_u8(0), mode8(0)), + .result_address = make_addr_ref_8(get_u8(1), mode8(1)) }; + + // ========================== + // Arithmetic 16-bit variants + // ========================== + // Wire format: [indirect, a, b, dst] → operands: [a, b, dst] + case WireOpCode::ADD_16: + return ADD_16_Instruction{ .a_address = make_var_ref_16(MemoryTag::FF, get_u16(0), mode8(0)), + .b_address = make_var_ref_16(MemoryTag::FF, get_u16(1), mode8(1)), + .result_address = make_addr_ref_16(get_u16(2), mode8(2)) }; + case WireOpCode::SUB_16: + return SUB_16_Instruction{ .a_address = make_var_ref_16(MemoryTag::FF, get_u16(0), mode8(0)), + .b_address = make_var_ref_16(MemoryTag::FF, get_u16(1), mode8(1)), + .result_address = make_addr_ref_16(get_u16(2), mode8(2)) }; + case WireOpCode::MUL_16: + return MUL_16_Instruction{ .a_address = make_var_ref_16(MemoryTag::FF, get_u16(0), mode8(0)), + .b_address = make_var_ref_16(MemoryTag::FF, get_u16(1), mode8(1)), + .result_address = make_addr_ref_16(get_u16(2), mode8(2)) }; + case WireOpCode::DIV_16: + return DIV_16_Instruction{ .a_address = make_var_ref_16(MemoryTag::FF, get_u16(0), mode8(0)), + .b_address = make_var_ref_16(MemoryTag::FF, get_u16(1), mode8(1)), + .result_address = make_addr_ref_16(get_u16(2), mode8(2)) }; + case WireOpCode::FDIV_16: + return FDIV_16_Instruction{ .a_address = make_var_ref_16(MemoryTag::FF, get_u16(0), mode8(0)), + .b_address = make_var_ref_16(MemoryTag::FF, get_u16(1), mode8(1)), + .result_address = make_addr_ref_16(get_u16(2), mode8(2)) }; + case WireOpCode::EQ_16: + return EQ_16_Instruction{ .a_address = make_var_ref_16(MemoryTag::FF, get_u16(0), mode8(0)), + .b_address = make_var_ref_16(MemoryTag::FF, get_u16(1), mode8(1)), + .result_address = make_addr_ref_16(get_u16(2), mode8(2)) }; + case WireOpCode::LT_16: + return LT_16_Instruction{ .a_address = make_var_ref_16(MemoryTag::FF, get_u16(0), mode8(0)), + .b_address = make_var_ref_16(MemoryTag::FF, get_u16(1), mode8(1)), + .result_address = make_addr_ref_16(get_u16(2), mode8(2)) }; + case WireOpCode::LTE_16: + return LTE_16_Instruction{ .a_address = make_var_ref_16(MemoryTag::FF, get_u16(0), mode8(0)), + .b_address = make_var_ref_16(MemoryTag::FF, get_u16(1), mode8(1)), + .result_address = make_addr_ref_16(get_u16(2), mode8(2)) }; + case WireOpCode::AND_16: + return AND_16_Instruction{ .a_address = make_var_ref_16(MemoryTag::FF, get_u16(0), mode8(0)), + .b_address = make_var_ref_16(MemoryTag::FF, get_u16(1), mode8(1)), + .result_address = make_addr_ref_16(get_u16(2), mode8(2)) }; + case WireOpCode::OR_16: + return OR_16_Instruction{ .a_address = make_var_ref_16(MemoryTag::FF, get_u16(0), mode8(0)), + .b_address = make_var_ref_16(MemoryTag::FF, get_u16(1), mode8(1)), + .result_address = make_addr_ref_16(get_u16(2), mode8(2)) }; + case WireOpCode::XOR_16: + return XOR_16_Instruction{ .a_address = make_var_ref_16(MemoryTag::FF, get_u16(0), mode8(0)), + .b_address = make_var_ref_16(MemoryTag::FF, get_u16(1), mode8(1)), + .result_address = make_addr_ref_16(get_u16(2), mode8(2)) }; + case WireOpCode::SHL_16: + return SHL_16_Instruction{ .a_address = make_var_ref_16(MemoryTag::FF, get_u16(0), mode8(0)), + .b_address = make_var_ref_16(MemoryTag::FF, get_u16(1), mode8(1)), + .result_address = make_addr_ref_16(get_u16(2), mode8(2)) }; + case WireOpCode::SHR_16: + return SHR_16_Instruction{ .a_address = make_var_ref_16(MemoryTag::FF, get_u16(0), mode8(0)), + .b_address = make_var_ref_16(MemoryTag::FF, get_u16(1), mode8(1)), + .result_address = make_addr_ref_16(get_u16(2), mode8(2)) }; + // NOT_16: wire [indirect, src, dst] → operands [src, dst] + case WireOpCode::NOT_16: + return NOT_16_Instruction{ .a_address = make_var_ref_16(MemoryTag::FF, get_u16(0), mode8(0)), + .result_address = make_addr_ref_16(get_u16(1), mode8(1)) }; + + // ========================== + // CAST instructions + // ========================== + // CAST_8: wire [indirect, src, dst, tag] → operands [src, dst, tag] + case WireOpCode::CAST_8: { + MemoryTag src_tag = MemoryTag::FF; + MemoryTag target_tag = get_tag(2); + return CAST_8_Instruction{ .src_tag = MemoryTagWrapper(src_tag), + .src_address = make_var_ref_8(src_tag, get_u8(0), mode8(0)), + .result_address = make_addr_ref_8(get_u8(1), mode8(1)), + .target_tag = MemoryTagWrapper(target_tag) }; + } + // CAST_16: wire [indirect, src, dst, tag] → operands [src, dst, tag] + case WireOpCode::CAST_16: { + MemoryTag src_tag = MemoryTag::FF; + MemoryTag target_tag = get_tag(2); + return CAST_16_Instruction{ .src_tag = MemoryTagWrapper(src_tag), + .src_address = make_var_ref_16(src_tag, get_u16(0), mode8(0)), + .result_address = make_addr_ref_16(get_u16(1), mode8(1)), + .target_tag = MemoryTagWrapper(target_tag) }; + } + + // ========================== + // SET instructions + // ========================== + // SET_8: wire [indirect, dst, tag, val] → operands [dst, tag, val] + case WireOpCode::SET_8: + return SET_8_Instruction{ .value_tag = MemoryTagWrapper(get_tag(1)), + .result_address = make_addr_ref_8(get_u8(0), mode8(0)), + .value = get_u8(2) }; + // SET_16: wire [indirect, dst, tag, val] → operands [dst, tag, val] + case WireOpCode::SET_16: + return SET_16_Instruction{ .value_tag = MemoryTagWrapper(get_tag(1)), + .result_address = make_addr_ref_16(get_u16(0), mode8(0)), + .value = get_u16(2) }; + // SET_32: wire [indirect, dst, tag, val] → operands [dst, tag, val] + case WireOpCode::SET_32: + return SET_32_Instruction{ .value_tag = MemoryTagWrapper(get_tag(1)), + .result_address = make_addr_ref_16(get_u16(0), mode8(0)), + .value = get_u32(2) }; + // SET_64: wire [indirect, dst, tag, val] → operands [dst, tag, val] + case WireOpCode::SET_64: + return SET_64_Instruction{ .value_tag = MemoryTagWrapper(get_tag(1)), + .result_address = make_addr_ref_16(get_u16(0), mode8(0)), + .value = get_u64(2) }; + // SET_128: wire [indirect, dst, tag, val] → operands [dst, tag, val] + case WireOpCode::SET_128: { + uint128_t value = get_u128(2); + return SET_128_Instruction{ .value_tag = MemoryTagWrapper(get_tag(1)), + .result_address = make_addr_ref_16(get_u16(0), mode8(0)), + .value_low = static_cast(value), + .value_high = static_cast(value >> 64) }; + } + // SET_FF: wire [indirect, dst, tag, val] → operands [dst, tag, val] + case WireOpCode::SET_FF: + return SET_FF_Instruction{ .value_tag = MemoryTagWrapper(get_tag(1)), + .result_address = make_addr_ref_16(get_u16(0), mode8(0)), + .value = get_ff(2) }; + + // ========================== + // MOV instructions + // ========================== + // MOV_8: wire [indirect, src, dst] → operands [src, dst] + case WireOpCode::MOV_8: + return MOV_8_Instruction{ .value_tag = MemoryTagWrapper(MemoryTag::FF), + .src_address = make_var_ref_8(MemoryTag::FF, get_u8(0), mode8(0)), + .result_address = make_addr_ref_8(get_u8(1), mode8(1)) }; + // MOV_16: wire [indirect, src, dst] → operands [src, dst] + case WireOpCode::MOV_16: + return MOV_16_Instruction{ .value_tag = MemoryTagWrapper(MemoryTag::FF), + .src_address = make_var_ref_16(MemoryTag::FF, get_u16(0), mode8(0)), + .result_address = make_addr_ref_16(get_u16(1), mode8(1)) }; + + // ========================== + // Control Flow Instructions + // ========================== + // JUMP_32: wire [dst] → operands [dst] (no indirect) + case WireOpCode::JUMP_32: + return JUMP_32_Instruction{ .destination = get_u32(0) }; + // JUMPI_32: wire [indirect, cond, dst] → operands [cond, dst] + case WireOpCode::JUMPI_32: + return JUMPI_32_Instruction{ .condition_address = make_addr_ref_16(get_u16(0), mode8(0)), + .destination = get_u32(1) }; + // RETURN: wire [indirect, size_offset, data_offset] → operands [size_offset, data_offset] + case WireOpCode::RETURN: + return RETURN_Instruction{ .return_data_size_address = make_addr_ref_16(get_u16(0), mode8(0)), + .return_data_address = make_addr_ref_16(get_u16(1), mode8(1)) }; + // REVERT_8: wire [indirect, size_offset, data_offset] → operands [size_offset, data_offset] + case WireOpCode::REVERT_8: + return REVERT_8_Instruction{ .return_data_size_address = make_addr_ref_8(get_u8(0), mode8(0)), + .return_data_address = make_addr_ref_8(get_u8(1), mode8(1)) }; + // REVERT_16: wire [indirect, size_offset, data_offset] → operands [size_offset, data_offset] + case WireOpCode::REVERT_16: + return REVERT_16_Instruction{ .return_data_size_address = make_addr_ref_16(get_u16(0), mode8(0)), + .return_data_address = make_addr_ref_16(get_u16(1), mode8(1)) }; + // INTERNALCALL: wire [dst] → operands [dst] (no indirect) + case WireOpCode::INTERNALCALL: + return INTERNALCALL_Instruction{ .destination = get_u32(0) }; + // INTERNALRETURN: wire [] → operands [] (no operands) + case WireOpCode::INTERNALRETURN: + return INTERNALRETURN_Instruction{}; + + // ========================== + // Environment Instructions + // ========================== + // GETENVVAR_16: wire [indirect, dst, type] → operands [dst, type] + case WireOpCode::GETENVVAR_16: + return GETENVVAR_Instruction{ .result_address = make_addr_ref_16(get_u16(0), mode8(0)), .type = get_u8(1) }; + + // ========================== + // Storage Instructions + // ========================== + // SLOAD: wire [indirect, slot, dst] → operands [slot, dst] + case WireOpCode::SLOAD: + return SLOAD_Instruction{ .slot_index = 0, + .slot_address = make_addr_ref_16(get_u16(0), mode8(0)), + .result_address = make_addr_ref_16(get_u16(1), mode8(1)) }; + // SSTORE: wire [indirect, src, slot] → operands [src, slot] + case WireOpCode::SSTORE: + return SSTORE_Instruction{ .src_address = make_var_ref_16(MemoryTag::FF, get_u16(0), mode8(0)), + .result_address = make_addr_ref_16(get_u16(1), mode8(1)), + .slot = FF::zero() }; + + // ========================== + // Calldata Instructions + // ========================== + // CALLDATACOPY: wire [indirect, dst, cdStart, copySize] → operands [dst, cdStart, copySize] + case WireOpCode::CALLDATACOPY: + return CALLDATACOPY_Instruction{ .dst_address = make_addr_ref_16(get_u16(0), mode8(0)), + .copy_size = 0, + .copy_size_address = make_addr_ref_16(get_u16(2), mode8(2)), + .cd_start = 0, + .cd_start_address = make_addr_ref_16(get_u16(1), mode8(1)) }; + + // ========================== + // Note/Nullifier Instructions + // ========================== + // EMITNULLIFIER: wire [indirect, nullifier] → operands [nullifier] + case WireOpCode::EMITNULLIFIER: + return EMITNULLIFIER_Instruction{ .nullifier_address = make_var_ref_16(MemoryTag::FF, get_u16(0), mode8(0)) }; + // EMITNOTEHASH: wire [indirect, notehash] → operands [notehash] + case WireOpCode::EMITNOTEHASH: + return EMITNOTEHASH_Instruction{ .note_hash_address = make_addr_ref_16(get_u16(0), mode8(0)), + .note_hash = FF::zero() }; + // NULLIFIEREXISTS: wire [indirect, nullifier, address, result] → operands [nullifier, address, result] + case WireOpCode::NULLIFIEREXISTS: + return NULLIFIEREXISTS_Instruction{ .nullifier_address = make_var_ref_16(MemoryTag::FF, get_u16(0), mode8(0)), + .contract_address_address = make_addr_ref_16(get_u16(1), mode8(1)), + .result_address = make_addr_ref_16(get_u16(2), mode8(2)) }; + // NOTEHASHEXISTS: wire [indirect, notehash, leafIndex, result] → operands [notehash, leafIndex, result] + case WireOpCode::NOTEHASHEXISTS: + return NOTEHASHEXISTS_Instruction{ .notehash_index = 0, + .notehash_address = make_addr_ref_16(get_u16(0), mode8(0)), + .leaf_index_address = make_addr_ref_16(get_u16(1), mode8(1)), + .result_address = make_addr_ref_16(get_u16(2), mode8(2)) }; + + // ========================== + // Log/Message Instructions + // ========================== + // SENDL2TOL1MSG: wire [indirect, recipient, content] → operands [recipient, content] + case WireOpCode::SENDL2TOL1MSG: + return SENDL2TOL1MSG_Instruction{ .recipient = FF::zero(), + .recipient_address = make_addr_ref_16(get_u16(0), mode8(0)), + .content = FF::zero(), + .content_address = make_addr_ref_16(get_u16(1), mode8(1)) }; + // EMITUNENCRYPTEDLOG: wire [indirect, logSize, logStart] → operands [logSize, logStart] + case WireOpCode::EMITUNENCRYPTEDLOG: + return EMITUNENCRYPTEDLOG_Instruction{ .log_size = 0, + .log_size_address = make_addr_ref_16(get_u16(0), mode8(0)), + .log_values = {}, + .log_values_address_start = get_u16(1) }; + + // ========================== + // Return Data Instructions + // ========================== + // RETURNDATASIZE: wire [indirect, dstOffset] → operands [dstOffset] + case WireOpCode::RETURNDATASIZE: + return RAW_RETURNDATASIZE_Instruction{ .dst_address = make_addr_ref_16(get_u16(0), mode8(0)) }; + // RETURNDATACOPY: wire [indirect, dstOffset, rdStartOffset, copySizeOffset] → operands [...] + case WireOpCode::RETURNDATACOPY: + return RAW_RETURNDATACOPY_Instruction{ .dst_address = make_addr_ref_16(get_u16(0), mode8(0)), + .rd_start_address = make_addr_ref_16(get_u16(1), mode8(1)), + .copy_size_address = make_addr_ref_16(get_u16(2), mode8(2)) }; + + // ========================== + // Contract Instance Instructions + // ========================== + // L1TOL2MSGEXISTS: wire [indirect, msgKeyOffset, msgLeafIndexOffset, resultOffset] → operands [...] + case WireOpCode::L1TOL2MSGEXISTS: + return RAW_L1TOL2MSGEXISTS_Instruction{ .msg_key_address = make_addr_ref_16(get_u16(0), mode8(0)), + .msg_leaf_index_address = make_addr_ref_16(get_u16(1), mode8(1)), + .result_address = make_addr_ref_16(get_u16(2), mode8(2)) }; + // GETCONTRACTINSTANCE: wire [indirect, addrOffset, resultOffset, memberIndex] → operands [...] + case WireOpCode::GETCONTRACTINSTANCE: + return RAW_GETCONTRACTINSTANCE_Instruction{ .addr_address = make_addr_ref_16(get_u16(0), mode8(0)), + .result_address = make_addr_ref_16(get_u16(1), mode8(1)), + .member_index = get_u8(2) }; + + // ========================== + // External Call Instructions + // ========================== + // CALL: wire [indirect16, l2GasOffset, daGasOffset, addrOffset, argsOffset, argsSizeOffset] + case WireOpCode::CALL: + return RAW_CALL_Instruction{ .l2_gas_address = make_addr_ref_16(get_u16(0), mode16(0)), + .da_gas_address = make_addr_ref_16(get_u16(1), mode16(1)), + .addr_address = make_addr_ref_16(get_u16(2), mode16(2)), + .args_address = make_addr_ref_16(get_u16(3), mode16(3)), + .args_size_address = make_addr_ref_16(get_u16(4), mode16(4)), + .is_static_call = false }; + // STATICCALL: same format as CALL + case WireOpCode::STATICCALL: + return RAW_CALL_Instruction{ .l2_gas_address = make_addr_ref_16(get_u16(0), mode16(0)), + .da_gas_address = make_addr_ref_16(get_u16(1), mode16(1)), + .addr_address = make_addr_ref_16(get_u16(2), mode16(2)), + .args_address = make_addr_ref_16(get_u16(3), mode16(3)), + .args_size_address = make_addr_ref_16(get_u16(4), mode16(4)), + .is_static_call = true }; + + // ========================== + // Unsupported opcodes + // ========================== + default: + // For unsupported opcodes, create a placeholder SET_8 instruction + // This preserves the bytecode position but loses semantic information + return SET_8_Instruction{ .value_tag = MemoryTagWrapper(MemoryTag::U8), + .result_address = make_addr_ref_8(0, AddressingMode::Direct), + .value = 0 }; + } +} + +} // anonymous namespace + +FuzzerData decompile_bytecode(const std::vector& bytecode, const std::vector& calldata) +{ + std::vector instructions; + + size_t pc = 0; + while (pc < bytecode.size()) { + try { + // Use vm2's instruction parsing + auto instr = simulation::deserialize_instruction(std::span(bytecode), pc); + instructions.push_back(map_vm2_to_fuzz(instr)); + pc += instr.size_in_bytes(); + } catch (const simulation::InstrDeserializationError& e) { + // Convert to std::exception with detailed message + std::string msg = "Deserialization failed at pc=" + std::to_string(pc); + if (e.message.has_value()) { + msg += ": " + e.message.value(); + } + switch (e.type) { + case simulation::InstrDeserializationEventError::PC_OUT_OF_RANGE: + msg += " (PC_OUT_OF_RANGE)"; + break; + case simulation::InstrDeserializationEventError::OPCODE_OUT_OF_RANGE: + msg += " (OPCODE_OUT_OF_RANGE)"; + break; + case simulation::InstrDeserializationEventError::INSTRUCTION_OUT_OF_RANGE: + msg += " (INSTRUCTION_OUT_OF_RANGE)"; + break; + default: + msg += " (UNKNOWN_ERROR)"; + break; + } + throw DecompilationError(msg); + } + } + + // Build FuzzerData with single block, minimal CFG + // Note: NO FinalizeWithReturn - bytecode already contains RETURN + FuzzerData result; + result.instruction_blocks = { instructions }; + result.cfg_instructions = { InsertSimpleInstructionBlock{ .instruction_block_idx = 0 } }; + result.calldata = calldata; + result.return_options = ReturnOptions{ .return_size = 0, + .return_value_tag = MemoryTagWrapper(MemoryTag::FF), + .return_value_offset_index = 0 }; // Unused - explicit RETURN in bytecode + + return result; +} + +} // namespace bb::avm_fuzzer diff --git a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/bytecode_decompiler.hpp b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/bytecode_decompiler.hpp new file mode 100644 index 000000000000..5ea3ddae944f --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/bytecode_decompiler.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include "barretenberg/avm_fuzzer/fuzz_lib/fuzzer_data.hpp" +#include "barretenberg/avm_fuzzer/fuzz_lib/instruction.hpp" +#include "barretenberg/vm2/common/field.hpp" +#include "barretenberg/vm2/common/opcodes.hpp" + +#include +#include + +namespace bb::avm_fuzzer { + +/** + * @brief Decompile raw bytecode into FuzzerData format + * + * Converts ALL instructions including control flow into FuzzInstruction variants. + * The resulting FuzzerData will produce identical bytecode when rebuilt. + * + * This enables seeding the fuzzer corpus with real-world transactions and + * bytecode generated by TypeScript tooling. + * + * @param bytecode Raw AVM bytecode + * @param calldata Function calldata (field elements) + * @return FuzzerData Structured representation for the fuzzer + * + * @note Assumes all bytecode uses direct addressing mode (indirect=0) + * @note Control flow instructions (JUMP, JUMPI, RETURN, REVERT, INTERNALCALL, INTERNALRETURN) + * are preserved with their raw operands for bytecode equivalence + */ +FuzzerData decompile_bytecode(const std::vector& bytecode, const std::vector& calldata); + +} // namespace bb::avm_fuzzer diff --git a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/bytecode_decompiler.test.cpp b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/bytecode_decompiler.test.cpp new file mode 100644 index 000000000000..42af3d4ac836 --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/bytecode_decompiler.test.cpp @@ -0,0 +1,299 @@ +#include + +#include "barretenberg/avm_fuzzer/fuzz_lib/bytecode_decompiler.hpp" +#include "barretenberg/avm_fuzzer/fuzz_lib/control_flow.hpp" +#include "barretenberg/avm_fuzzer/fuzz_lib/instruction.hpp" +#include "barretenberg/vm2/common/field.hpp" +#include "barretenberg/vm2/common/opcodes.hpp" +#include "barretenberg/vm2/testing/instruction_builder.hpp" + +using namespace bb::avm_fuzzer; +using namespace bb::avm2; +namespace avm2_testing = bb::avm2::testing; + +namespace { + +// Helper to build bytecode from raw simulation::Instruction +std::vector build_bytecode(const std::vector& instructions) +{ + std::vector bytecode; + for (const auto& instr : instructions) { + auto serialized = instr.serialize(); + bytecode.insert(bytecode.end(), serialized.begin(), serialized.end()); + } + return bytecode; +} + +// Rebuild bytecode from FuzzerData to test round-trip +std::vector rebuild_bytecode(const FuzzerData& data) +{ + // Make a mutable copy since ControlFlow modifies the blocks + auto instruction_blocks = data.instruction_blocks; + ControlFlow control_flow(instruction_blocks); + for (const auto& cfg_instr : data.cfg_instructions) { + control_flow.process_cfg_instruction(cfg_instr); + } + return control_flow.build_bytecode(data.return_options); +} + +} // namespace + +// Test that decompiling and recompiling produces identical bytecode +TEST(BytecodeDecompiler, RoundTripSimpleSetReturn) +{ + // Build: SET_16 (value 42 at address 0) + RETURN + auto set_instr = avm2_testing::InstructionBuilder(WireOpCode::SET_16) + .operand(uint16_t{ 0 }) + .operand(MemoryTag::U32) + .operand(uint16_t{ 42 }) + .build(); + auto return_instr = + avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 0 }).build(); + + auto original_bytecode = build_bytecode({ set_instr, return_instr }); + + // Decompile + auto fuzzer_data = decompile_bytecode(original_bytecode, {}); + + // Rebuild + auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data); + + EXPECT_EQ(original_bytecode.size(), rebuilt_bytecode.size()); + EXPECT_EQ(original_bytecode, rebuilt_bytecode); +} + +TEST(BytecodeDecompiler, RoundTripAdd16) +{ + // Build: SET_16 (value 5 at address 0) + SET_16 (value 3 at address 1) + ADD_16 + RETURN + auto set1 = avm2_testing::InstructionBuilder(WireOpCode::SET_16) + .operand(uint16_t{ 0 }) + .operand(MemoryTag::U32) + .operand(uint16_t{ 5 }) + .build(); + auto set2 = avm2_testing::InstructionBuilder(WireOpCode::SET_16) + .operand(uint16_t{ 1 }) + .operand(MemoryTag::U32) + .operand(uint16_t{ 3 }) + .build(); + auto add_instr = avm2_testing::InstructionBuilder(WireOpCode::ADD_16) + .operand(uint16_t{ 0 }) + .operand(uint16_t{ 1 }) + .operand(uint16_t{ 2 }) + .build(); + auto return_instr = + avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 2 }).build(); + + auto original_bytecode = build_bytecode({ set1, set2, add_instr, return_instr }); + + // Decompile + auto fuzzer_data = decompile_bytecode(original_bytecode, {}); + + // Rebuild + auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data); + + EXPECT_EQ(original_bytecode, rebuilt_bytecode); +} + +TEST(BytecodeDecompiler, RoundTripJump) +{ + // Build: JUMP to offset 0 + RETURN + auto jump_instr = avm2_testing::InstructionBuilder(WireOpCode::JUMP_32).operand(uint32_t{ 0 }).build(); + auto return_instr = + avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 0 }).build(); + + auto original_bytecode = build_bytecode({ jump_instr, return_instr }); + + // Decompile + auto fuzzer_data = decompile_bytecode(original_bytecode, {}); + + // Rebuild + auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data); + + EXPECT_EQ(original_bytecode, rebuilt_bytecode); +} + +TEST(BytecodeDecompiler, RoundTripJumpI) +{ + // Build: SET_8 (condition at address 0) + JUMPI_32 + RETURN + auto set_cond = avm2_testing::InstructionBuilder(WireOpCode::SET_8) + .operand(uint8_t{ 0 }) + .operand(MemoryTag::U1) + .operand(uint8_t{ 1 }) + .build(); + auto jumpi_instr = + avm2_testing::InstructionBuilder(WireOpCode::JUMPI_32).operand(uint16_t{ 0 }).operand(uint32_t{ 5 }).build(); + auto return_instr = + avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 0 }).build(); + + auto original_bytecode = build_bytecode({ set_cond, jumpi_instr, return_instr }); + + // Decompile + auto fuzzer_data = decompile_bytecode(original_bytecode, {}); + + // Rebuild + auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data); + + EXPECT_EQ(original_bytecode, rebuilt_bytecode); +} + +TEST(BytecodeDecompiler, RoundTripRevert) +{ + // Build: SET_16 + REVERT_16 + auto set_instr = avm2_testing::InstructionBuilder(WireOpCode::SET_16) + .operand(uint16_t{ 0 }) + .operand(MemoryTag::U32) + .operand(uint16_t{ 0 }) + .build(); + auto revert_instr = + avm2_testing::InstructionBuilder(WireOpCode::REVERT_16).operand(uint16_t{ 0 }).operand(uint16_t{ 0 }).build(); + + auto original_bytecode = build_bytecode({ set_instr, revert_instr }); + + // Decompile + auto fuzzer_data = decompile_bytecode(original_bytecode, {}); + + // Rebuild + auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data); + + EXPECT_EQ(original_bytecode, rebuilt_bytecode); +} + +TEST(BytecodeDecompiler, RoundTripInternalCallReturn) +{ + // Build: INTERNALCALL + INTERNALRETURN + RETURN + auto internal_call = avm2_testing::InstructionBuilder(WireOpCode::INTERNALCALL).operand(uint32_t{ 5 }).build(); + auto internal_return = avm2_testing::InstructionBuilder(WireOpCode::INTERNALRETURN).build(); + auto return_instr = + avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 0 }).build(); + + auto original_bytecode = build_bytecode({ internal_call, internal_return, return_instr }); + + // Decompile + auto fuzzer_data = decompile_bytecode(original_bytecode, {}); + + // Rebuild + auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data); + + EXPECT_EQ(original_bytecode, rebuilt_bytecode); +} + +TEST(BytecodeDecompiler, RoundTripCast) +{ + // Build: SET_16 + CAST_16 + RETURN + auto set_instr = avm2_testing::InstructionBuilder(WireOpCode::SET_16) + .operand(uint16_t{ 0 }) + .operand(MemoryTag::U32) + .operand(uint16_t{ 255 }) + .build(); + auto cast_instr = avm2_testing::InstructionBuilder(WireOpCode::CAST_16) + .operand(uint16_t{ 0 }) + .operand(uint16_t{ 1 }) + .operand(MemoryTag::U64) + .build(); + auto return_instr = + avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 1 }).build(); + + auto original_bytecode = build_bytecode({ set_instr, cast_instr, return_instr }); + + // Decompile + auto fuzzer_data = decompile_bytecode(original_bytecode, {}); + + // Rebuild + auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data); + + EXPECT_EQ(original_bytecode, rebuilt_bytecode); +} + +TEST(BytecodeDecompiler, RoundTripMov) +{ + // Build: SET_16 + MOV_16 + RETURN + auto set_instr = avm2_testing::InstructionBuilder(WireOpCode::SET_16) + .operand(uint16_t{ 0 }) + .operand(MemoryTag::U32) + .operand(uint16_t{ 42 }) + .build(); + auto mov_instr = + avm2_testing::InstructionBuilder(WireOpCode::MOV_16).operand(uint16_t{ 0 }).operand(uint16_t{ 1 }).build(); + auto return_instr = + avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 1 }).build(); + + auto original_bytecode = build_bytecode({ set_instr, mov_instr, return_instr }); + + // Decompile + auto fuzzer_data = decompile_bytecode(original_bytecode, {}); + + // Rebuild + auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data); + + EXPECT_EQ(original_bytecode, rebuilt_bytecode); +} + +TEST(BytecodeDecompiler, RoundTripNot) +{ + // Build: SET_16 + NOT_16 + RETURN + auto set_instr = avm2_testing::InstructionBuilder(WireOpCode::SET_16) + .operand(uint16_t{ 0 }) + .operand(MemoryTag::U8) + .operand(uint16_t{ 0x0F }) + .build(); + auto not_instr = + avm2_testing::InstructionBuilder(WireOpCode::NOT_16).operand(uint16_t{ 0 }).operand(uint16_t{ 1 }).build(); + auto return_instr = + avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 1 }).build(); + + auto original_bytecode = build_bytecode({ set_instr, not_instr, return_instr }); + + // Decompile + auto fuzzer_data = decompile_bytecode(original_bytecode, {}); + + // Rebuild + auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data); + + EXPECT_EQ(original_bytecode, rebuilt_bytecode); +} + +TEST(BytecodeDecompiler, RoundTripSetFF) +{ + // Build: SET_FF + RETURN + FF large_value = FF::random_element(); + auto set_instr = avm2_testing::InstructionBuilder(WireOpCode::SET_FF) + .operand(uint16_t{ 0 }) + .operand(MemoryTag::FF) + .operand(large_value) + .build(); + auto return_instr = + avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 0 }).build(); + + auto original_bytecode = build_bytecode({ set_instr, return_instr }); + + // Decompile + auto fuzzer_data = decompile_bytecode(original_bytecode, {}); + + // Rebuild + auto rebuilt_bytecode = rebuild_bytecode(fuzzer_data); + + EXPECT_EQ(original_bytecode, rebuilt_bytecode); +} + +TEST(BytecodeDecompiler, PreservesCalldata) +{ + std::vector calldata = { FF(1), FF(2), FF(42) }; + + auto return_instr = + avm2_testing::InstructionBuilder(WireOpCode::RETURN).operand(uint16_t{ 0 }).operand(uint16_t{ 0 }).build(); + auto bytecode = build_bytecode({ return_instr }); + + auto fuzzer_data = decompile_bytecode(bytecode, calldata); + + EXPECT_EQ(fuzzer_data.calldata, calldata); +} + +TEST(BytecodeDecompiler, EmptyBytecode) +{ + std::vector empty_bytecode; + std::vector calldata; + + auto fuzzer_data = decompile_bytecode(empty_bytecode, calldata); + + EXPECT_TRUE(fuzzer_data.instruction_blocks[0].empty()); +} diff --git a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/control_flow.cpp b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/control_flow.cpp index 6ebd989dfd5a..6a93cd10ca67 100644 --- a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/control_flow.cpp +++ b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/control_flow.cpp @@ -237,6 +237,13 @@ int predict_block_size(ProgramBlock* block) const int JMP_SIZE = 1 + 4; // opcode + destination offset const int JMP_IF_SIZE = 1 + 1 + 2 + 4; // opcode + direct/indirect + condition offset + destination offset auto bytecode_length = static_cast(create_bytecode(block->get_instructions()).size()); + + // If block has explicit terminator (from decompiler), just return bytecode length + // No additional terminators will be added + if (block->has_explicit_terminator) { + return bytecode_length; + } + switch (block->terminator_type) { case TerminatorType::RETURN: return bytecode_length; // finalized with return, already counted @@ -262,6 +269,10 @@ int predict_block_size(ProgramBlock* block) } return bytecode_length + JMP_IF_SIZE + JMP_SIZE; // finalized with jumpi } + case TerminatorType::NONE: + // Block with no terminator type but has explicit terminator (already handled above) + // This shouldn't be reached, but return bytecode length just in case + return bytecode_length; default: throw std::runtime_error("Predict block size: Every block should be terminated with return, jump, or jumpi, " "got " + @@ -286,8 +297,9 @@ std::vector ControlFlow::build_bytecode(const ReturnOptions& return_opt std::vector blocks = dfs_traverse(start_block); // Step 2 terminate all non-terminated blocks with return + // Skip blocks that have explicit terminators (from decompiler) for (ProgramBlock* block : blocks) { - if (block->terminator_type == TerminatorType::NONE) { + if (block->terminator_type == TerminatorType::NONE && !block->has_explicit_terminator) { block->finalize_with_return( /*TODO(defkit) fix return size */ 1, return_options.return_value_tag, @@ -308,9 +320,17 @@ std::vector ControlFlow::build_bytecode(const ReturnOptions& return_opt } // Step 4 terminate unterminated blocks with jumps with known offsets, get the bytecode for each block + // Skip adding terminators for blocks with explicit terminators (from decompiler) std::vector> block_bytecodes; for (ProgramBlock* block : blocks) { std::vector instructions = block->get_instructions(); + + // If block has explicit terminator (from decompiler), just use instructions as-is + if (block->has_explicit_terminator) { + block_bytecodes.push_back(create_bytecode(instructions)); + continue; + } + switch (block->terminator_type) { case TerminatorType::RETURN: // finalized with return // already terminated with return @@ -345,6 +365,10 @@ std::vector ControlFlow::build_bytecode(const ReturnOptions& return_opt instructions.push_back(jump_else_instruction); break; } + case TerminatorType::NONE: + // Block with no terminator type but has explicit terminator + // This shouldn't be reached, but handle it gracefully + break; default: throw std::runtime_error( "Inject terminators: Every block should be terminated with return, jump, or jumpi"); diff --git a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/fuzz.cpp b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/fuzz.cpp index a43a83483d98..200ef71e51b6 100644 --- a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/fuzz.cpp +++ b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/fuzz.cpp @@ -35,9 +35,10 @@ SimulatorResult fuzz_against_ts_simulator(FuzzerData& fuzzer_data) auto contract_address = ContractDBProxy::register_contract_from_bytecode(bytecode); FuzzerContractDB contract_db = *contract_db_proxy->get_contract_db(); - // Create the transaction + // Create the transaction and globals auto tx = create_default_tx( contract_address, MSG_SENDER, fuzzer_data.calldata, TRANSACTION_FEE, IS_STATIC_CALL, GAS_LIMIT); + auto globals = create_default_globals(); FF fee_required_da = FF(tx.effective_gas_fees.fee_per_da_gas) * FF(tx.gas_settings.gas_limits.da_gas); FF fee_required_l2 = FF(tx.effective_gas_fees.fee_per_l2_gas) * FF(tx.gas_settings.gas_limits.l2_gas); @@ -45,14 +46,14 @@ SimulatorResult fuzz_against_ts_simulator(FuzzerData& fuzzer_data) try { ws_mgr->checkpoint(); - cpp_result = cpp_simulator.simulate(*ws_mgr, contract_db, tx); + cpp_result = cpp_simulator.simulate(*ws_mgr, contract_db, tx, globals); ws_mgr->revert(); } catch (const std::exception& e) { throw std::runtime_error(std::string("CppSimulator threw an exception: ") + e.what()); } ws_mgr->checkpoint(); - auto js_result = js_simulator->simulate(*ws_mgr, contract_db, tx); + auto js_result = js_simulator->simulate(*ws_mgr, contract_db, tx, globals); ContractDBProxy::reset_instance(); diff --git a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/fuzz.test.cpp b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/fuzz.test.cpp index 0a8058048d1b..40ac85a11eea 100644 --- a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/fuzz.test.cpp +++ b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/fuzz.test.cpp @@ -45,11 +45,12 @@ SimulatorResult simulate_with_default_tx(std::vector& bytecode, std::ve FF fee_required_da = FF(tx.effective_gas_fees.fee_per_da_gas) * FF(tx.gas_settings.gas_limits.da_gas); FF fee_required_l2 = FF(tx.effective_gas_fees.fee_per_l2_gas) * FF(tx.gas_settings.gas_limits.l2_gas); ws_mgr->write_fee_payer_balance(tx.fee_payer, fee_required_da + fee_required_l2); + auto globals = create_default_globals(); auto cpp_simulator = CppSimulator(); ws_mgr->checkpoint(); try { - auto result = cpp_simulator.simulate(*ws_mgr, contract_db, tx); + auto result = cpp_simulator.simulate(*ws_mgr, contract_db, tx, globals); ws_mgr->revert(); ws_mgr->reset_world_state(); return result; diff --git a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/instruction.hpp b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/instruction.hpp index cc0280825b04..a6980aa7b4ed 100644 --- a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/instruction.hpp +++ b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/instruction.hpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -111,7 +112,12 @@ struct VariableRef { uint32_t base_offset_seed = 0; AddressingModeWrapper mode = AddressingMode::Direct; - MSGPACK_FIELDS(tag, index, pointer_address_seed, base_offset_seed, mode); + /// @brief Raw operand value for decompiled bytecode + /// When set, this exact value is used as the operand instead of computing from seeds. + /// This ensures byte-perfect reconstruction of decompiled bytecode. + std::optional raw_operand = std::nullopt; + + MSGPACK_FIELDS(tag, index, pointer_address_seed, base_offset_seed, mode, raw_operand); }; struct AddressRef { @@ -126,7 +132,13 @@ struct AddressRef { /// Sets M[0] = base_offset uint32_t base_offset_seed = 0; AddressingModeWrapper mode = AddressingMode::Direct; - MSGPACK_FIELDS(address, pointer_address_seed, base_offset_seed, mode); + + /// @brief Raw operand value for decompiled bytecode + /// When set, this exact value is used as the operand instead of computing from seeds. + /// This ensures byte-perfect reconstruction of decompiled bytecode. + std::optional raw_operand = std::nullopt; + + MSGPACK_FIELDS(address, pointer_address_seed, base_offset_seed, mode, raw_operand); }; using ParamRef = std::variant; @@ -561,6 +573,19 @@ struct CALL_Instruction { is_static_call); }; +/// @brief RAW_CALL: Raw CALL/STATICCALL instruction from decompiled bytecode +/// Wire format: [INDIRECT16, l2GasOffset, daGasOffset, addrOffset, argsOffset, argsSizeOffset] +/// Used for byte-perfect reconstruction of bytecode - no semantic processing +struct RAW_CALL_Instruction { + AddressRef l2_gas_address; + AddressRef da_gas_address; + AddressRef addr_address; + AddressRef args_address; + AddressRef args_size_address; + bool is_static_call; + MSGPACK_FIELDS(l2_gas_address, da_gas_address, addr_address, args_address, args_size_address, is_static_call); +}; + /// @brief: RETURNDATASIZE + RETURNDATACOPY: // M[copySizeOffset] = nestedReturndata.size() // M[dstOffset:dstOffset+M[copySizeOffset]] = @@ -624,6 +649,93 @@ struct SHA256COMPRESSION_Instruction { MSGPACK_FIELDS(state_address, input_address, dst_address); }; +// ============================================================================ +// Control Flow Instructions (opaque - serialize directly for decompiler) +// These instructions are used by the bytecode decompiler to preserve +// bytecode equivalence when decompiling and recompiling. +// ============================================================================ + +/// @brief JUMP_32: Unconditional jump to destination PC +struct JUMP_32_Instruction { + uint32_t destination; // Raw PC offset from bytecode + MSGPACK_FIELDS(destination); +}; + +/// @brief JUMPI_32: Conditional jump based on condition at memory offset +struct JUMPI_32_Instruction { + AddressRef condition_address; // Memory address of condition (with addressing mode) + uint32_t destination; // Jump target if condition is true + MSGPACK_FIELDS(condition_address, destination); +}; + +/// @brief RETURN: Return from execution with data +struct RETURN_Instruction { + AddressRef return_data_size_address; // Memory address containing size + AddressRef return_data_address; // Memory address of return data start + MSGPACK_FIELDS(return_data_size_address, return_data_address); +}; + +/// @brief REVERT_8: Revert execution (8-bit operands) +struct REVERT_8_Instruction { + AddressRef return_data_size_address; + AddressRef return_data_address; + MSGPACK_FIELDS(return_data_size_address, return_data_address); +}; + +/// @brief REVERT_16: Revert execution (16-bit operands) +struct REVERT_16_Instruction { + AddressRef return_data_size_address; + AddressRef return_data_address; + MSGPACK_FIELDS(return_data_size_address, return_data_address); +}; + +/// @brief INTERNALCALL: Call internal function at destination PC +struct INTERNALCALL_Instruction { + uint32_t destination; // Target PC of internal function + MSGPACK_FIELDS(destination); +}; + +/// @brief INTERNALRETURN: Return from internal function call +struct INTERNALRETURN_Instruction { + // No operands - use empty MSGPACK_FIELDS + uint8_t _dummy = 0; // Placeholder for msgpack serialization + MSGPACK_FIELDS(_dummy); +}; + +// ============================================================================ +// Raw Instructions (for byte-perfect bytecode reconstruction from decompiler) +// ============================================================================ + +/// @brief RAW_RETURNDATASIZE: [INDIRECT8, dstOffset] +struct RAW_RETURNDATASIZE_Instruction { + AddressRef dst_address; + MSGPACK_FIELDS(dst_address); +}; + +/// @brief RAW_RETURNDATACOPY: [INDIRECT8, dstOffset, rdStartOffset, copySizeOffset] +struct RAW_RETURNDATACOPY_Instruction { + AddressRef dst_address; + AddressRef rd_start_address; + AddressRef copy_size_address; + MSGPACK_FIELDS(dst_address, rd_start_address, copy_size_address); +}; + +/// @brief RAW_L1TOL2MSGEXISTS: [INDIRECT8, msgKeyOffset, msgLeafIndexOffset, resultOffset] +struct RAW_L1TOL2MSGEXISTS_Instruction { + AddressRef msg_key_address; + AddressRef msg_leaf_index_address; + AddressRef result_address; + MSGPACK_FIELDS(msg_key_address, msg_leaf_index_address, result_address); +}; + +/// @brief RAW_GETCONTRACTINSTANCE: [INDIRECT8, addrOffset, resultOffset, memberIndex] +struct RAW_GETCONTRACTINSTANCE_Instruction { + AddressRef addr_address; + AddressRef result_address; + uint8_t member_index; + MSGPACK_FIELDS(addr_address, result_address, member_index); +}; + using FuzzInstruction = std::variant; + SHA256COMPRESSION_Instruction, + // Control flow instructions (for decompiler) + JUMP_32_Instruction, + JUMPI_32_Instruction, + RETURN_Instruction, + REVERT_8_Instruction, + REVERT_16_Instruction, + INTERNALCALL_Instruction, + INTERNALRETURN_Instruction, + // Raw instructions for byte-perfect bytecode reconstruction + RAW_RETURNDATASIZE_Instruction, + RAW_RETURNDATACOPY_Instruction, + RAW_L1TOL2MSGEXISTS_Instruction, + RAW_GETCONTRACTINSTANCE_Instruction>; template struct overloaded : Ts... { using Ts::operator()...; @@ -886,6 +1012,25 @@ inline std::ostream& operator<<(std::ostream& os, const FuzzInstruction& instruc os << "SHA256COMPRESSION_Instruction " << arg.state_address << " " << arg.input_address << " " << arg.dst_address; }, + // Control flow instructions + [&](JUMP_32_Instruction arg) { os << "JUMP_32_Instruction dest=" << arg.destination; }, + [&](JUMPI_32_Instruction arg) { + os << "JUMPI_32_Instruction cond=" << arg.condition_address << " dest=" << arg.destination; + }, + [&](RETURN_Instruction arg) { + os << "RETURN_Instruction size_off=" << arg.return_data_size_address + << " data_off=" << arg.return_data_address; + }, + [&](REVERT_8_Instruction arg) { + os << "REVERT_8_Instruction size_off=" << arg.return_data_size_address + << " data_off=" << arg.return_data_address; + }, + [&](REVERT_16_Instruction arg) { + os << "REVERT_16_Instruction size_off=" << arg.return_data_size_address + << " data_off=" << arg.return_data_address; + }, + [&](INTERNALCALL_Instruction arg) { os << "INTERNALCALL_Instruction dest=" << arg.destination; }, + [&](INTERNALRETURN_Instruction) { os << "INTERNALRETURN_Instruction"; }, [&](auto) { os << "Unknown instruction"; }, }, instruction); diff --git a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/memory_manager.cpp b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/memory_manager.cpp index 64183a192a18..0090a6dcfb0e 100644 --- a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/memory_manager.cpp +++ b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/memory_manager.cpp @@ -95,6 +95,14 @@ ResolvedAddress MemoryManager::resolve_address(VariableRef variable, uint32_t absolute_address, uint32_t max_operand_address) { + // For decompiled bytecode: use raw_operand directly if set + if (variable.raw_operand.has_value()) { + return ResolvedAddress{ + .absolute_address = absolute_address, + .operand_address = variable.raw_operand.value(), + }; + } + ResolvedAddress resolved_address = { .absolute_address = absolute_address, }; @@ -130,6 +138,13 @@ ResolvedAddress MemoryManager::resolve_address(VariableRef variable, ResolvedAddress MemoryManager::resolve_address(AddressRef address, uint32_t max_operand_address) { + // For decompiled bytecode: use raw_operand directly if set + if (address.raw_operand.has_value()) { + return ResolvedAddress{ + .absolute_address = address.address, + .operand_address = address.raw_operand.value(), + }; + } ResolvedAddress resolved_address = { .absolute_address = address.address, @@ -196,8 +211,23 @@ std::optional> Mem std::optional> MemoryManager:: get_resolved_address_and_operand_8(VariableRef address) { + // For decompiled bytecode: use raw_operand directly with proper addressing mode + if (address.raw_operand.has_value()) { + auto resolved_address = resolve_address(address, address.index, 255); + auto operand = OperandBuilder::from(static_cast(resolved_address.operand_address)); + return std::make_pair(resolved_address, get_memory_address_operand(operand, address.mode)); + } + auto actual_address = get_variable_address(address.tag, address.index, get_max_variable_address(address.mode, 255)); if (!actual_address.has_value()) { + // For Direct mode, if no stored variable found, use the index directly as the address + // This supports decompiled bytecode where addresses aren't tracked in stored_variables + if (address.mode == AddressingMode::Direct) { + uint32_t direct_address = address.index % 256; + auto resolved_address = resolve_address(address, direct_address, 255); + auto operand = OperandBuilder::from(static_cast(resolved_address.operand_address)); + return std::make_pair(resolved_address, get_memory_address_operand(operand, address.mode)); + } return std::nullopt; } auto resolved_address = resolve_address(address, actual_address.value(), 255); @@ -228,9 +258,24 @@ std::optional> Mem std::optional> MemoryManager:: get_resolved_address_and_operand_16(VariableRef address) { + // For decompiled bytecode: use raw_operand directly with proper addressing mode + if (address.raw_operand.has_value()) { + auto resolved_address = resolve_address(address, address.index, 65535); + auto operand = OperandBuilder::from(static_cast(resolved_address.operand_address)); + return std::make_pair(resolved_address, get_memory_address_operand(operand, address.mode)); + } + auto actual_address = get_variable_address(address.tag, address.index, get_max_variable_address(address.mode, 65535)); if (!actual_address.has_value()) { + // For Direct mode, if no stored variable found, use the index directly as the address + // This supports decompiled bytecode where addresses aren't tracked in stored_variables + if (address.mode == AddressingMode::Direct) { + uint32_t direct_address = address.index % 65536; + auto resolved_address = resolve_address(address, direct_address, 65535); + auto operand = OperandBuilder::from(static_cast(resolved_address.operand_address)); + return std::make_pair(resolved_address, get_memory_address_operand(operand, address.mode)); + } return std::nullopt; } diff --git a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/program_block.cpp b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/program_block.cpp index bad8e439c839..95abc644c08a 100644 --- a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/program_block.cpp +++ b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/program_block.cpp @@ -10,6 +10,98 @@ #include "barretenberg/vm2/simulation/lib/serialization.hpp" #include "barretenberg/vm2/testing/instruction_builder.hpp" +namespace { + +// Helper to check if a VariableRef is from decompiled bytecode +bool is_decompiled_ref(const VariableRef& ref) +{ + return ref.raw_operand.has_value(); +} + +// Helper to check if an AddressRef is from decompiled bytecode +bool is_decompiled_ref(const AddressRef& ref) +{ + return ref.raw_operand.has_value(); +} + +// Helper to check if a ParamRef is from decompiled bytecode +bool is_decompiled_ref(const ParamRef& ref) +{ + return std::visit([](const auto& r) { return is_decompiled_ref(r); }, ref); +} + +// Create an OperandBuilder from a decompiled VariableRef (16-bit) +bb::avm2::testing::OperandBuilder decompiled_operand_16(const VariableRef& ref) +{ + auto operand = + bb::avm2::testing::OperandBuilder::from(static_cast(ref.raw_operand.value_or(ref.index))); + switch (ref.mode.value) { + case AddressingMode::Indirect: + return operand.indirect(); + case AddressingMode::Relative: + return operand.relative(); + case AddressingMode::IndirectRelative: + return operand.indirect().relative(); + case AddressingMode::Direct: + default: + return operand; + } +} + +// Create an OperandBuilder from a decompiled AddressRef (16-bit) +bb::avm2::testing::OperandBuilder decompiled_operand_16(const AddressRef& ref) +{ + auto operand = + bb::avm2::testing::OperandBuilder::from(static_cast(ref.raw_operand.value_or(ref.address))); + switch (ref.mode.value) { + case AddressingMode::Indirect: + return operand.indirect(); + case AddressingMode::Relative: + return operand.relative(); + case AddressingMode::IndirectRelative: + return operand.indirect().relative(); + case AddressingMode::Direct: + default: + return operand; + } +} + +// Create an OperandBuilder from a decompiled ParamRef (16-bit) +bb::avm2::testing::OperandBuilder decompiled_operand_16(const ParamRef& ref) +{ + return std::visit([](const auto& r) { return decompiled_operand_16(r); }, ref); +} + +// Create an OperandBuilder from a decompiled VariableRef (8-bit) +[[maybe_unused]] bb::avm2::testing::OperandBuilder decompiled_operand_8(const VariableRef& ref) +{ + auto operand = + bb::avm2::testing::OperandBuilder::from(static_cast(ref.raw_operand.value_or(ref.index))); + switch (ref.mode.value) { + case AddressingMode::Indirect: + return operand.indirect(); + case AddressingMode::Direct: + default: + return operand; + } +} + +// Create an OperandBuilder from a decompiled AddressRef (8-bit) +[[maybe_unused]] bb::avm2::testing::OperandBuilder decompiled_operand_8(const AddressRef& ref) +{ + auto operand = + bb::avm2::testing::OperandBuilder::from(static_cast(ref.raw_operand.value_or(ref.address))); + switch (ref.mode.value) { + case AddressingMode::Indirect: + return operand.indirect(); + case AddressingMode::Direct: + default: + return operand; + } +} + +} // namespace + void ProgramBlock::preprocess_memory_addresses(ResolvedAddress resolved_address) { if (resolved_address.base_pointer.has_value()) { @@ -1642,31 +1734,147 @@ void ProgramBlock::process_instruction(FuzzInstruction instruction) [this](SHR_16_Instruction instruction) { return this->process_shr_16_instruction(instruction); }, [this](CAST_8_Instruction instruction) { return this->process_cast_8_instruction(instruction); }, [this](CAST_16_Instruction instruction) { return this->process_cast_16_instruction(instruction); }, - [this](SSTORE_Instruction instruction) { return this->process_sstore_instruction(instruction); }, - [this](SLOAD_Instruction instruction) { return this->process_sload_instruction(instruction); }, - [this](GETENVVAR_Instruction instruction) { return this->process_getenvvar_instruction(instruction); }, + [this](SSTORE_Instruction instruction) { + // Direct serialization for decompiled bytecode + if (is_decompiled_ref(instruction.src_address) || is_decompiled_ref(instruction.result_address)) { + auto sstore_instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::SSTORE) + .operand(decompiled_operand_16(instruction.src_address)) + .operand(decompiled_operand_16(instruction.result_address)) + .build(); + this->instructions.push_back(sstore_instr); + } else { + return this->process_sstore_instruction(instruction); + } + }, + [this](SLOAD_Instruction instruction) { + // Direct serialization for decompiled bytecode + if (is_decompiled_ref(instruction.slot_address) || is_decompiled_ref(instruction.result_address)) { + auto sload_instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::SLOAD) + .operand(decompiled_operand_16(instruction.slot_address)) + .operand(decompiled_operand_16(instruction.result_address)) + .build(); + this->instructions.push_back(sload_instr); + } else { + return this->process_sload_instruction(instruction); + } + }, + [this](GETENVVAR_Instruction instruction) { + // Direct serialization for decompiled bytecode + if (is_decompiled_ref(instruction.result_address)) { + auto getenvvar_instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::GETENVVAR_16) + .operand(decompiled_operand_16(instruction.result_address)) + .operand(instruction.type) + .build(); + this->instructions.push_back(getenvvar_instr); + } else { + return this->process_getenvvar_instruction(instruction); + } + }, [this](EMITNULLIFIER_Instruction instruction) { - return this->process_emitnulifier_instruction(instruction); + // Direct serialization for decompiled bytecode + if (is_decompiled_ref(instruction.nullifier_address)) { + auto emit_instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::EMITNULLIFIER) + .operand(decompiled_operand_16(instruction.nullifier_address)) + .build(); + this->instructions.push_back(emit_instr); + } else { + return this->process_emitnulifier_instruction(instruction); + } }, [this](NULLIFIEREXISTS_Instruction instruction) { - return this->process_nullifierexists_instruction(instruction); + // Direct serialization for decompiled bytecode + if (is_decompiled_ref(instruction.nullifier_address) || + is_decompiled_ref(instruction.contract_address_address) || + is_decompiled_ref(instruction.result_address)) { + auto exists_instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::NULLIFIEREXISTS) + .operand(decompiled_operand_16(instruction.nullifier_address)) + .operand(decompiled_operand_16(instruction.contract_address_address)) + .operand(decompiled_operand_16(instruction.result_address)) + .build(); + this->instructions.push_back(exists_instr); + } else { + return this->process_nullifierexists_instruction(instruction); + } }, [this](EMITNOTEHASH_Instruction instruction) { - return this->process_emitnotehash_instruction(instruction); + // Direct serialization for decompiled bytecode + if (is_decompiled_ref(instruction.note_hash_address)) { + auto emit_instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::EMITNOTEHASH) + .operand(decompiled_operand_16(instruction.note_hash_address)) + .build(); + this->instructions.push_back(emit_instr); + } else { + return this->process_emitnotehash_instruction(instruction); + } }, [this](NOTEHASHEXISTS_Instruction instruction) { - return this->process_notehashexists_instruction(instruction); + // Direct serialization for decompiled bytecode + if (is_decompiled_ref(instruction.notehash_address) || + is_decompiled_ref(instruction.leaf_index_address) || + is_decompiled_ref(instruction.result_address)) { + auto exists_instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::NOTEHASHEXISTS) + .operand(decompiled_operand_16(instruction.notehash_address)) + .operand(decompiled_operand_16(instruction.leaf_index_address)) + .operand(decompiled_operand_16(instruction.result_address)) + .build(); + this->instructions.push_back(exists_instr); + } else { + return this->process_notehashexists_instruction(instruction); + } }, [this](CALLDATACOPY_Instruction instruction) { - return this->process_calldatacopy_instruction(instruction); + // Direct serialization for decompiled bytecode + if (is_decompiled_ref(instruction.dst_address) || is_decompiled_ref(instruction.copy_size_address) || + is_decompiled_ref(instruction.cd_start_address)) { + auto copy_instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::CALLDATACOPY) + .operand(decompiled_operand_16(instruction.dst_address)) + .operand(decompiled_operand_16(instruction.cd_start_address)) + .operand(decompiled_operand_16(instruction.copy_size_address)) + .build(); + this->instructions.push_back(copy_instr); + } else { + return this->process_calldatacopy_instruction(instruction); + } }, [this](SENDL2TOL1MSG_Instruction instruction) { - return this->process_sendl2tol1msg_instruction(instruction); + // Direct serialization for decompiled bytecode + if (is_decompiled_ref(instruction.recipient_address) || + is_decompiled_ref(instruction.content_address)) { + auto msg_instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::SENDL2TOL1MSG) + .operand(decompiled_operand_16(instruction.recipient_address)) + .operand(decompiled_operand_16(instruction.content_address)) + .build(); + this->instructions.push_back(msg_instr); + } else { + return this->process_sendl2tol1msg_instruction(instruction); + } }, [this](EMITUNENCRYPTEDLOG_Instruction instruction) { - return this->process_emitunencryptedlog_instruction(instruction); + // Direct serialization for decompiled bytecode + if (is_decompiled_ref(instruction.log_size_address)) { + auto log_instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::EMITUNENCRYPTEDLOG) + .operand(decompiled_operand_16(instruction.log_size_address)) + .operand(instruction.log_values_address_start) + .build(); + this->instructions.push_back(log_instr); + } else { + return this->process_emitunencryptedlog_instruction(instruction); + } }, [this](CALL_Instruction instruction) { return this->process_call_instruction(instruction); }, + [this](RAW_CALL_Instruction instruction) { + // Direct serialization for decompiled CALL/STATICCALL + auto opcode = + instruction.is_static_call ? bb::avm2::WireOpCode::STATICCALL : bb::avm2::WireOpCode::CALL; + auto call_instr = bb::avm2::testing::InstructionBuilder(opcode) + .operand(decompiled_operand_16(instruction.l2_gas_address)) + .operand(decompiled_operand_16(instruction.da_gas_address)) + .operand(decompiled_operand_16(instruction.addr_address)) + .operand(decompiled_operand_16(instruction.args_address)) + .operand(decompiled_operand_16(instruction.args_size_address)) + .build(); + this->instructions.push_back(call_instr); + }, [this](RETURNDATASIZE_WITH_RETURNDATACOPY_Instruction instruction) { return this->process_returndatasize_with_returndatacopy_instruction(instruction); }, @@ -1682,6 +1890,89 @@ void ProgramBlock::process_instruction(FuzzInstruction instruction) [this](SHA256COMPRESSION_Instruction instruction) { return this->process_sha256compression_instruction(instruction); }, + // Control flow instructions (direct serialization for decompiler) + [this](JUMP_32_Instruction instruction) { + auto jump_instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::JUMP_32) + .operand(instruction.destination) + .build(); + this->instructions.push_back(jump_instr); + this->has_explicit_terminator = true; + }, + [this](JUMPI_32_Instruction instruction) { + auto jumpi_instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::JUMPI_32) + .operand(decompiled_operand_16(instruction.condition_address)) + .operand(instruction.destination) + .build(); + this->instructions.push_back(jumpi_instr); + // Note: JUMPI doesn't fully terminate - fall-through continues + }, + [this](RETURN_Instruction instruction) { + auto return_instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::RETURN) + .operand(decompiled_operand_16(instruction.return_data_size_address)) + .operand(decompiled_operand_16(instruction.return_data_address)) + .build(); + this->instructions.push_back(return_instr); + this->has_explicit_terminator = true; + }, + [this](REVERT_8_Instruction instruction) { + auto revert_instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::REVERT_8) + .operand(decompiled_operand_8(instruction.return_data_size_address)) + .operand(decompiled_operand_8(instruction.return_data_address)) + .build(); + this->instructions.push_back(revert_instr); + this->has_explicit_terminator = true; + }, + [this](REVERT_16_Instruction instruction) { + auto revert_instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::REVERT_16) + .operand(decompiled_operand_16(instruction.return_data_size_address)) + .operand(decompiled_operand_16(instruction.return_data_address)) + .build(); + this->instructions.push_back(revert_instr); + this->has_explicit_terminator = true; + }, + [this](INTERNALCALL_Instruction instruction) { + auto internalcall_instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::INTERNALCALL) + .operand(instruction.destination) + .build(); + this->instructions.push_back(internalcall_instr); + }, + [this](INTERNALRETURN_Instruction) { + auto internalreturn_instr = + bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::INTERNALRETURN).build(); + this->instructions.push_back(internalreturn_instr); + this->has_explicit_terminator = true; + }, + // Raw instructions for byte-perfect bytecode reconstruction + [this](RAW_RETURNDATASIZE_Instruction instruction) { + auto instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::RETURNDATASIZE) + .operand(decompiled_operand_16(instruction.dst_address)) + .build(); + this->instructions.push_back(instr); + }, + [this](RAW_RETURNDATACOPY_Instruction instruction) { + auto instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::RETURNDATACOPY) + .operand(decompiled_operand_16(instruction.dst_address)) + .operand(decompiled_operand_16(instruction.rd_start_address)) + .operand(decompiled_operand_16(instruction.copy_size_address)) + .build(); + this->instructions.push_back(instr); + }, + [this](RAW_L1TOL2MSGEXISTS_Instruction instruction) { + auto instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::L1TOL2MSGEXISTS) + .operand(decompiled_operand_16(instruction.msg_key_address)) + .operand(decompiled_operand_16(instruction.msg_leaf_index_address)) + .operand(decompiled_operand_16(instruction.result_address)) + .build(); + this->instructions.push_back(instr); + }, + [this](RAW_GETCONTRACTINSTANCE_Instruction instruction) { + auto instr = bb::avm2::testing::InstructionBuilder(bb::avm2::WireOpCode::GETCONTRACTINSTANCE) + .operand(decompiled_operand_16(instruction.addr_address)) + .operand(decompiled_operand_16(instruction.result_address)) + .operand(instruction.member_index) + .build(); + this->instructions.push_back(instr); + }, [](auto) { throw std::runtime_error("Unknown instruction"); }, }, instruction); diff --git a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/program_block.hpp b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/program_block.hpp index e247300a15f8..fdb68892598a 100644 --- a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/program_block.hpp +++ b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/program_block.hpp @@ -121,6 +121,11 @@ class ProgramBlock { TerminatorType terminator_type = TerminatorType::NONE; int offset = -1; + /// @brief True if block contains an explicit terminator (RETURN, REVERT, JUMP, INTERNALRETURN) + /// When true, build_bytecode should NOT add implicit terminators. + /// This is used by the decompiler to preserve bytecode equivalence. + bool has_explicit_terminator = false; + ProgramBlock() = default; /// @brief process the instruction /// @param instruction the instruction to process diff --git a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/simulator.cpp b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/simulator.cpp index 2b93b296868d..f2ce9e9a4d16 100644 --- a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/simulator.cpp +++ b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/simulator.cpp @@ -71,7 +71,8 @@ GlobalVariables create_default_globals() SimulatorResult CppSimulator::simulate(fuzzer::FuzzerWorldStateManager& ws_mgr, fuzzer::FuzzerContractDB& contract_db, - const Tx& tx) + const Tx& tx, + const GlobalVariables& globals) { const PublicSimulatorConfig config{ @@ -82,16 +83,26 @@ SimulatorResult CppSimulator::simulate(fuzzer::FuzzerWorldStateManager& ws_mgr, ProtocolContracts protocol_contracts{}; - auto globals = create_default_globals(); - WorldState& ws = ws_mgr.get_world_state(); WorldStateRevision ws_rev = ws_mgr.get_current_revision(); + // Log transaction details + vinfo("[CPP_SIM] === Starting C++ Simulation ==="); + vinfo("[CPP_SIM] tx.fee_payer: ", tx.fee_payer); + vinfo("[CPP_SIM] tx.gas_settings.gas_limits.da_gas: ", tx.gas_settings.gas_limits.da_gas); + vinfo("[CPP_SIM] tx.gas_settings.gas_limits.l2_gas: ", tx.gas_settings.gas_limits.l2_gas); + vinfo("[CPP_SIM] tx.effective_gas_fees.fee_per_da_gas: ", tx.effective_gas_fees.fee_per_da_gas); + vinfo("[CPP_SIM] tx.effective_gas_fees.fee_per_l2_gas: ", tx.effective_gas_fees.fee_per_l2_gas); + vinfo("[CPP_SIM] globals.gas_fees.fee_per_da_gas: ", globals.gas_fees.fee_per_da_gas); + vinfo("[CPP_SIM] globals.gas_fees.fee_per_l2_gas: ", globals.gas_fees.fee_per_l2_gas); + vinfo("[CPP_SIM] app_logic_enqueued_calls count: ", tx.app_logic_enqueued_calls.size()); + AvmSimulationHelper helper; TxSimulationResult result = helper.simulate_fast_with_existing_ws(contract_db, ws_rev, ws, config, tx, globals, protocol_contracts); bool reverted = result.revert_code != RevertCode::OK; // Just process the top level call's output + vinfo("[CPP_SIM] === C++ Simulation Complete ==="); vinfo( "C++ Simulator result - reverted: ", reverted, ", output size: ", result.call_stack_metadata[0].output.size()); std::vector values = {}; @@ -106,17 +117,33 @@ SimulatorResult CppSimulator::simulate(fuzzer::FuzzerWorldStateManager& ws_mgr, } } if (result.public_inputs.has_value()) { + const auto& snapshots = result.public_inputs->end_tree_snapshots; + vinfo("[CPP_SIM] end_tree_snapshots.publicDataTree.root: ", snapshots.public_data_tree.root); + vinfo("[CPP_SIM] end_tree_snapshots.nullifierTree.root: ", snapshots.nullifier_tree.root); + vinfo("[CPP_SIM] end_tree_snapshots.noteHashTree.root: ", snapshots.note_hash_tree.root); return { .reverted = reverted, .output = values, .end_tree_snapshots = result.public_inputs->end_tree_snapshots }; } + vinfo("[CPP_SIM] WARNING: No public_inputs in result!"); return { .reverted = reverted, .output = values }; } JsSimulator* JsSimulator::instance = nullptr; + +std::string get_js_simulator_command(const std::string& simulator_path) +{ + // Allow overriding LOG_LEVEL via environment variable (default: silent) + const char* log_level_env = std::getenv("JS_SIMULATOR_LOG_LEVEL"); + std::string log_level = log_level_env ? log_level_env : "silent"; + // If not silent, don't redirect stderr to /dev/null + std::string stderr_redirect = (log_level == "silent") ? " 2>/dev/null" : ""; + return "LOG_LEVEL=" + log_level + " node " + simulator_path + stderr_redirect; +} + JsSimulator::JsSimulator(std::string& simulator_path) : simulator_path(simulator_path) - , process("LOG_LEVEL=silent node " + simulator_path + " 2>/dev/null") + , process(get_js_simulator_command(simulator_path)) {} JsSimulator* JsSimulator::getInstance() @@ -139,10 +166,9 @@ void JsSimulator::initialize(std::string& simulator_path) SimulatorResult JsSimulator::simulate([[maybe_unused]] fuzzer::FuzzerWorldStateManager& ws_mgr, fuzzer::FuzzerContractDB& contract_db, - const Tx& tx) + const Tx& tx, + const GlobalVariables& globals) { - auto globals = create_default_globals(); - std::string serialized = serialize_simulation_request(tx, globals, contract_db); // Send the request diff --git a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/simulator.hpp b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/simulator.hpp index 57d4c68fb05c..fc759daeff89 100644 --- a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/simulator.hpp +++ b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzz_lib/simulator.hpp @@ -47,7 +47,8 @@ class Simulator { Simulator() = default; virtual SimulatorResult simulate(fuzzer::FuzzerWorldStateManager& ws_mgr, fuzzer::FuzzerContractDB& contract_db, - const Tx& tx) = 0; + const Tx& tx, + const GlobalVariables& globals) = 0; }; /// @brief uses barretenberg/vm2 to simulate the bytecode @@ -55,7 +56,8 @@ class CppSimulator : public Simulator { public: SimulatorResult simulate(fuzzer::FuzzerWorldStateManager& ws_mgr, fuzzer::FuzzerContractDB& contract_db, - const Tx& tx) override; + const Tx& tx, + const GlobalVariables& globals) override; }; /// @brief uses the yarn-project/simulator to simulate the bytecode @@ -79,7 +81,8 @@ class JsSimulator : public Simulator { SimulatorResult simulate(fuzzer::FuzzerWorldStateManager& ws_mgr, fuzzer::FuzzerContractDB& contract_db, - const Tx& tx) override; + const Tx& tx, + const GlobalVariables& globals) override; }; GlobalVariables create_default_globals(); diff --git a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzzer_lib.cpp b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzzer_lib.cpp index 03a29d1e4371..de4cc1ce35dd 100644 --- a/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzzer_lib.cpp +++ b/barretenberg/cpp/src/barretenberg/avm_fuzzer/fuzzer_lib.cpp @@ -68,7 +68,7 @@ SimulatorResult fuzz_tx(FuzzerWorldStateManager& ws_mgr, FuzzerContractDB& contr try { ws_mgr.checkpoint(); - cpp_result = cpp_simulator.simulate(ws_mgr, contract_db, tx_data.tx); + cpp_result = cpp_simulator.simulate(ws_mgr, contract_db, tx_data.tx, tx_data.global_variables); ws_mgr.revert(); } catch (const std::exception& e) { fuzz_info("CppSimulator threw an exception: ", e.what()); @@ -82,7 +82,7 @@ SimulatorResult fuzz_tx(FuzzerWorldStateManager& ws_mgr, FuzzerContractDB& contr } ws_mgr.checkpoint(); - auto js_result = js_simulator->simulate(ws_mgr, contract_db, tx_data.tx); + auto js_result = js_simulator->simulate(ws_mgr, contract_db, tx_data.tx, tx_data.global_variables); // If the results do not match if (!compare_simulator_results(cpp_result, js_result)) { diff --git a/barretenberg/cpp/src/barretenberg/avm_fuzzer/tools/CMakeLists.txt b/barretenberg/cpp/src/barretenberg/avm_fuzzer/tools/CMakeLists.txt new file mode 100644 index 000000000000..7c3c7d96cdb4 --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/avm_fuzzer/tools/CMakeLists.txt @@ -0,0 +1,28 @@ +# CLI tools for the AVM fuzzer + +# corpus_seed - Convert bytecode JSON files to fuzzer corpus entries (FuzzerData) +add_executable(corpus_seed corpus_seed.cpp) +target_link_libraries(corpus_seed PRIVATE + avm_fuzzer + vm2 + crypto_sha256 + nlohmann_json::nlohmann_json +) + +# corpus_seed_tx - Convert TS ExecutionHints JSON to FuzzerTxData corpus entries +add_executable(corpus_seed_tx corpus_seed_tx.cpp) +target_link_libraries(corpus_seed_tx PRIVATE + avm_fuzzer + vm2 + crypto_sha256 + nlohmann_json::nlohmann_json +) + +# corpus_from_avm_inputs - Convert AvmCircuitInputs msgpack to FuzzerTxData corpus +# This reads the binary files produced by DumpingCppPublicTxSimulator +add_executable(corpus_from_avm_inputs corpus_from_avm_inputs.cpp) +target_link_libraries(corpus_from_avm_inputs PRIVATE + avm_fuzzer + vm2 + crypto_sha256 +) diff --git a/barretenberg/cpp/src/barretenberg/avm_fuzzer/tools/corpus_from_avm_inputs.cpp b/barretenberg/cpp/src/barretenberg/avm_fuzzer/tools/corpus_from_avm_inputs.cpp new file mode 100644 index 000000000000..76272c804042 --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/avm_fuzzer/tools/corpus_from_avm_inputs.cpp @@ -0,0 +1,415 @@ +/** + * corpus_from_avm_inputs - Convert AvmCircuitInputs msgpack to FuzzerTxData corpus + * + * Usage: corpus_from_avm_inputs [--verbose] [--dry-run] + * + * This tool reads msgpack binary files produced by DumpingCppPublicTxSimulator + * and converts them to FuzzerTxData format for the tx.fuzzer corpus. + * + * The input file contains AvmCircuitInputs (AvmExecutionHints + AvmCircuitPublicInputs). + * We extract ExecutionHints and convert: + * - contract_classes → ContractClass + decompile bytecode to FuzzerData + * - contract_instances → ContractInstance + extract addresses + * - tx, global_variables, protocol_contracts → direct copy + */ + +#include "barretenberg/avm_fuzzer/fuzz_lib/bytecode_decompiler.hpp" +#include "barretenberg/avm_fuzzer/fuzz_lib/control_flow.hpp" +#include "barretenberg/avm_fuzzer/fuzz_lib/fuzzer_data.hpp" +#include "barretenberg/avm_fuzzer/tx.fuzzer.hpp" +#include "barretenberg/crypto/sha256/sha256.hpp" +#include "barretenberg/serialize/msgpack_impl.hpp" +#include "barretenberg/vm2/common/avm_io.hpp" + +#include +#include +#include +#include +#include +#include +#include + +using namespace bb::avm2; + +namespace { + +/** + * Read entire file into a byte vector + */ +std::vector read_file(const std::string& path) +{ + std::ifstream file(path, std::ios::binary | std::ios::ate); + if (!file) { + throw std::runtime_error("Cannot open file: " + path); + } + + auto size = file.tellg(); + file.seekg(0, std::ios::beg); + + std::vector data(static_cast(size)); + if (!file.read(reinterpret_cast(data.data()), size)) { + throw std::runtime_error("Failed to read file: " + path); + } + + return data; +} + +/** + * Convert bytes to hex string (for filename generation) + */ +std::string bytes_to_hex(const std::vector& bytes) +{ + std::ostringstream ss; + for (auto byte : bytes) { + ss << std::hex << std::setfill('0') << std::setw(2) << static_cast(byte); + } + return ss.str(); +} + +/** + * Convert PublicKeysHint to PublicKeys + */ +PublicKeys to_public_keys(const PublicKeysHint& hint) +{ + return PublicKeys{ + .nullifier_key = hint.master_nullifier_public_key, + .incoming_viewing_key = hint.master_incoming_viewing_public_key, + .outgoing_viewing_key = hint.master_outgoing_viewing_public_key, + .tagging_key = hint.master_tagging_public_key, + }; +} + +/** + * Convert ContractClassHint to ContractClass + */ +ContractClass to_contract_class(const ContractClassHint& hint) +{ + return ContractClass{ + .id = hint.class_id, + .artifact_hash = hint.artifact_hash, + .private_functions_root = hint.private_functions_root, + .packed_bytecode = hint.packed_bytecode, + }; +} + +/** + * Convert ContractInstanceHint to ContractInstance + */ +ContractInstance to_contract_instance(const ContractInstanceHint& hint) +{ + return ContractInstance{ + .salt = hint.salt, + .deployer = hint.deployer, + .current_contract_class_id = hint.current_contract_class_id, + .original_contract_class_id = hint.original_contract_class_id, + .initialization_hash = hint.initialization_hash, + .public_keys = to_public_keys(hint.public_keys), + }; +} + +/** + * Rebuild bytecode from FuzzerData to verify roundtrip fidelity. + * Returns the recompiled bytecode. + */ +std::vector rebuild_bytecode(const FuzzerData& data) +{ + // Make a mutable copy since ControlFlow modifies the blocks + auto instruction_blocks = data.instruction_blocks; + ControlFlow control_flow(instruction_blocks); + for (const auto& cfg_instr : data.cfg_instructions) { + control_flow.process_cfg_instruction(cfg_instr); + } + return control_flow.build_bytecode(data.return_options); +} + +/** + * Verify that decompiled FuzzerData produces identical bytecode when recompiled. + * Returns true if bytecode matches, false otherwise. + */ +bool verify_bytecode_roundtrip(const std::vector& original_bytecode, + const FuzzerData& fuzzer_data, + bool verbose) +{ + try { + auto rebuilt = rebuild_bytecode(fuzzer_data); + + if (rebuilt.size() != original_bytecode.size()) { + if (verbose) { + std::cerr << " ROUNDTRIP FAILED: size mismatch (original=" << original_bytecode.size() + << ", rebuilt=" << rebuilt.size() << ")\n"; + } + return false; + } + + for (size_t i = 0; i < original_bytecode.size(); i++) { + if (original_bytecode[i] != rebuilt[i]) { + if (verbose) { + std::cerr << " ROUNDTRIP FAILED: byte mismatch at offset " << i << " (original=0x" << std::hex + << static_cast(original_bytecode[i]) << ", rebuilt=0x" + << static_cast(rebuilt[i]) << std::dec << ")\n"; + + // Print context: bytes around the mismatch + size_t start = (i >= 8) ? (i - 8) : 0; + size_t end = std::min(i + 16, original_bytecode.size()); + std::cerr << " Context (offsets " << start << "-" << end << "):\n"; + std::cerr << " Original: "; + for (size_t j = start; j < end; j++) { + if (j == i) { + std::cerr << "["; + } + std::cerr << std::hex << std::setfill('0') << std::setw(2) + << static_cast(original_bytecode[j]); + if (j == i) { + std::cerr << "]"; + } + std::cerr << " "; + } + std::cerr << std::dec << "\n"; + std::cerr << " Rebuilt: "; + for (size_t j = start; j < end; j++) { + if (j == i) { + std::cerr << "["; + } + std::cerr << std::hex << std::setfill('0') << std::setw(2) << static_cast(rebuilt[j]); + if (j == i) { + std::cerr << "]"; + } + std::cerr << " "; + } + std::cerr << std::dec << "\n"; + } + return false; + } + } + + return true; + } catch (const std::exception& e) { + if (verbose) { + std::cerr << " ROUNDTRIP FAILED: rebuild error - " << e.what() << "\n"; + } + return false; + } +} + +void print_usage() +{ + std::cerr << "Usage: corpus_from_avm_inputs [--verbose] [--dry-run]\n\n" + << " input.bin - Msgpack AvmCircuitInputs file from DumpingCppPublicTxSimulator\n" + << " corpus_dir - Directory to write FuzzerTxData corpus entry\n\n" + << "Options:\n" + << " --verbose - Print conversion details\n" + << " --dry-run - Parse and validate without writing\n"; +} + +} // namespace + +int main(int argc, char** argv) +{ + if (argc < 3) { + print_usage(); + return 1; + } + + std::string input_path = argv[1]; + std::string corpus_dir = argv[2]; + bool verbose = false; + bool dry_run = false; + + // Parse optional flags + for (int i = 3; i < argc; i++) { + std::string arg = argv[i]; + if (arg == "--verbose" || arg == "-v") { + verbose = true; + } else if (arg == "--dry-run" || arg == "-n") { + dry_run = true; + } else { + std::cerr << "Unknown option: " << arg << "\n"; + print_usage(); + return 1; + } + } + + // 1. Read msgpack binary + std::vector data; + try { + data = read_file(input_path); + if (verbose) { + std::cout << "Read " << data.size() << " bytes from " << input_path << "\n"; + } + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << "\n"; + return 1; + } + + // 2. Parse as AvmProvingInputs + AvmProvingInputs inputs; + try { + inputs = AvmProvingInputs::from(data); + if (verbose) { + std::cout << "Parsed AvmProvingInputs successfully\n"; + } + } catch (const std::exception& e) { + std::cerr << "Error parsing AvmProvingInputs: " << e.what() << "\n"; + return 1; + } + + const auto& hints = inputs.hints; + + // 3. Build FuzzerTxData + FuzzerTxData tx_data; + + // 3a. Convert contract classes and decompile bytecode + if (verbose) { + std::cout << "Processing " << hints.contract_classes.size() << " contract classes...\n"; + } + + for (const auto& cc_hint : hints.contract_classes) { + // Convert to ContractClass + tx_data.contract_classes.push_back(to_contract_class(cc_hint)); + + // Decompile bytecode to FuzzerData + if (!cc_hint.packed_bytecode.empty()) { + bool decompile_success = false; + try { + FuzzerData fuzzer_data = bb::avm_fuzzer::decompile_bytecode(cc_hint.packed_bytecode, {}); + + // Verify roundtrip: recompiled bytecode must match original + if (!verify_bytecode_roundtrip(cc_hint.packed_bytecode, fuzzer_data, verbose)) { + std::cerr << "WARNING: Class " << cc_hint.class_id << ": bytecode roundtrip verification failed!\n" + << " This will cause contract address mismatch at runtime.\n"; + // Still add the FuzzerData, but warn the user + } + + tx_data.input_programs.push_back(std::move(fuzzer_data)); + decompile_success = true; + + if (verbose) { + size_t instr_count = 0; + for (const auto& block : tx_data.input_programs.back().instruction_blocks) { + instr_count += block.size(); + } + std::cout << " Class " << cc_hint.class_id << ": " << instr_count << " instructions from " + << cc_hint.packed_bytecode.size() << " bytes (roundtrip OK)\n"; + } + } catch (const std::exception& e) { + if (verbose) { + // Print first 32 bytes of bytecode for debugging + std::cerr << " Class " << cc_hint.class_id << ": decompile failed (" << e.what() << ")\n"; + std::cerr << " Bytecode (" << cc_hint.packed_bytecode.size() << " bytes): "; + size_t dump_len = std::min(cc_hint.packed_bytecode.size(), size_t(32)); + for (size_t i = 0; i < dump_len; i++) { + std::cerr << std::hex << std::setfill('0') << std::setw(2) + << static_cast(cc_hint.packed_bytecode[i]) << " "; + } + if (cc_hint.packed_bytecode.size() > 32) { + std::cerr << "..."; + } + std::cerr << std::dec << "\n"; + } + } catch (...) { + if (verbose) { + std::cerr << " Class " << cc_hint.class_id << ": decompile failed (unknown error)" + << ", using empty FuzzerData\n"; + } + } + + if (!decompile_success) { + // Add empty FuzzerData as placeholder + tx_data.input_programs.push_back(FuzzerData{}); + } + } else { + // Empty bytecode - add empty FuzzerData + tx_data.input_programs.push_back(FuzzerData{}); + if (verbose) { + std::cout << " Class " << cc_hint.class_id << ": empty bytecode\n"; + } + } + } + + // 3b. Convert contract instances and extract addresses + if (verbose) { + std::cout << "Processing " << hints.contract_instances.size() << " contract instances...\n"; + } + + for (const auto& ci_hint : hints.contract_instances) { + // Extract address + tx_data.contract_addresses.push_back(ci_hint.address); + + // Convert to ContractInstance + tx_data.contract_instances.push_back(to_contract_instance(ci_hint)); + + if (verbose) { + std::cout << " Instance at " << ci_hint.address << " (class " << ci_hint.current_contract_class_id + << ")\n"; + } + } + + // 3c. Copy shared types directly + tx_data.tx = hints.tx; + tx_data.global_variables = hints.global_variables; + tx_data.protocol_contracts = hints.protocol_contracts; + + // 3d. Fix maxPriorityFeesPerGas to be consistent with effective_gas_fees + // JS computes: effectiveFees = globals.gasFees + priorityFees + // So: priorityFees = effectiveFees - globals.gasFees + // This ensures C++ and JS compute the same fee when using the same tx data. + const auto& effective = tx_data.tx.effective_gas_fees; + const auto& globals = tx_data.global_variables.gas_fees; + + // Compute the priority fees needed to match effective_gas_fees + uint128_t priority_da = + (effective.fee_per_da_gas > globals.fee_per_da_gas) ? (effective.fee_per_da_gas - globals.fee_per_da_gas) : 0; + uint128_t priority_l2 = + (effective.fee_per_l2_gas > globals.fee_per_l2_gas) ? (effective.fee_per_l2_gas - globals.fee_per_l2_gas) : 0; + + tx_data.tx.gas_settings.max_priority_fees_per_gas = GasFees{ + .fee_per_da_gas = priority_da, + .fee_per_l2_gas = priority_l2, + }; + + if (verbose) { + std::cout << " Fixed maxPriorityFeesPerGas: (" << priority_da << ", " << priority_l2 << ") " + << "to match effective_gas_fees: (" << effective.fee_per_da_gas << ", " << effective.fee_per_l2_gas + << ") with globals: (" << globals.fee_per_da_gas << ", " << globals.fee_per_l2_gas << ")\n"; + } + + if (verbose) { + std::cout << "\nFuzzerTxData summary:\n" + << " input_programs: " << tx_data.input_programs.size() << "\n" + << " contract_classes: " << tx_data.contract_classes.size() << "\n" + << " contract_instances: " << tx_data.contract_instances.size() << "\n" + << " contract_addresses: " << tx_data.contract_addresses.size() << "\n" + << " setup_enqueued_calls: " << tx_data.tx.setup_enqueued_calls.size() << "\n" + << " app_logic_enqueued_calls: " << tx_data.tx.app_logic_enqueued_calls.size() << "\n"; + } + + if (dry_run) { + std::cout << "Dry run - not writing output\n"; + return 0; + } + + // 4. Serialize to msgpack + msgpack::sbuffer buffer; + msgpack::pack(buffer, tx_data); + + // 5. Ensure corpus directory exists + std::filesystem::create_directories(corpus_dir); + + // 6. Generate filename from content hash (first 16 bytes of SHA256) + auto hash = bb::crypto::sha256(std::vector(buffer.data(), buffer.data() + buffer.size())); + std::string filename = bytes_to_hex(std::vector(hash.begin(), hash.begin() + 16)); + std::string output_path = corpus_dir + "/" + filename; + + // 7. Write output + std::ofstream output_file(output_path, std::ios::binary); + if (!output_file) { + std::cerr << "Error: Cannot write to " << output_path << "\n"; + return 1; + } + + output_file.write(buffer.data(), static_cast(buffer.size())); + output_file.close(); + + std::cout << "Written: " << output_path << " (" << buffer.size() << " bytes)\n"; + return 0; +} diff --git a/barretenberg/cpp/src/barretenberg/avm_fuzzer/tools/corpus_seed.cpp b/barretenberg/cpp/src/barretenberg/avm_fuzzer/tools/corpus_seed.cpp new file mode 100644 index 000000000000..eb4c62a03c3b --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/avm_fuzzer/tools/corpus_seed.cpp @@ -0,0 +1,235 @@ +/** + * corpus_seed - Convert bytecode JSON files to fuzzer corpus entries + * + * Usage: corpus_seed [--verbose] [--dry-run] + * + * Input JSON format: + * { + * "bytecode": "0x...", // Hex-encoded bytecode + * "calldata": ["0x...", ...] // Optional hex-encoded field elements + * } + * + * Output: Msgpack-serialized FuzzerData written to corpus_dir with content-hash filename + */ + +#include "barretenberg/avm_fuzzer/fuzz_lib/bytecode_decompiler.hpp" +#include "barretenberg/avm_fuzzer/fuzz_lib/fuzzer_data.hpp" +#include "barretenberg/crypto/sha256/sha256.hpp" +#include "barretenberg/serialize/msgpack_impl.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +using json = nlohmann::json; +using bb::avm2::FF; + +namespace { + +/** + * Parse hex string to bytes (with or without 0x prefix) + */ +std::vector hex_to_bytes(const std::string& hex) +{ + std::string h = hex; + // Remove 0x prefix if present + if (h.size() >= 2 && h[0] == '0' && (h[1] == 'x' || h[1] == 'X')) { + h = h.substr(2); + } + + std::vector bytes; + bytes.reserve(h.size() / 2); + + for (size_t i = 0; i + 1 < h.size(); i += 2) { + auto byte = static_cast(std::stoul(h.substr(i, 2), nullptr, 16)); + bytes.push_back(byte); + } + + return bytes; +} + +/** + * Convert bytes to hex string (for filename generation) + */ +std::string bytes_to_hex(const std::vector& bytes) +{ + std::ostringstream ss; + for (auto byte : bytes) { + ss << std::hex << std::setfill('0') << std::setw(2) << static_cast(byte); + } + return ss.str(); +} + +/** + * Parse hex string to field element + * Handles arbitrary length hex, pads to 32 bytes on the left + */ +FF hex_to_ff(const std::string& hex) +{ + auto bytes = hex_to_bytes(hex); + + // Pad to 32 bytes (big-endian, so pad on the left) + while (bytes.size() < 32) { + bytes.insert(bytes.begin(), 0); + } + + // Truncate if too long (take the rightmost 32 bytes) + if (bytes.size() > 32) { + bytes = std::vector(bytes.end() - 32, bytes.end()); + } + + return FF::serialize_from_buffer(bytes.data()); +} + +void print_usage() +{ + std::cerr << "Usage: corpus_seed [--verbose] [--dry-run]\n\n" + << " input.json - JSON file with bytecode and optional calldata\n" + << " corpus_dir - Directory to write corpus entry\n\n" + << "Options:\n" + << " --verbose - Print decompiled instruction count\n" + << " --dry-run - Parse and validate without writing\n\n" + << "Input JSON format:\n" + << " {\n" + << " \"bytecode\": \"0x...\",\n" + << " \"calldata\": [\"0x...\", ...]\n" + << " }\n"; +} + +} // namespace + +int main(int argc, char** argv) +{ + if (argc < 3) { + print_usage(); + return 1; + } + + std::string input_path = argv[1]; + std::string corpus_dir = argv[2]; + bool verbose = false; + bool dry_run = false; + + // Parse optional flags + for (int i = 3; i < argc; i++) { + std::string arg = argv[i]; + if (arg == "--verbose" || arg == "-v") { + verbose = true; + } else if (arg == "--dry-run" || arg == "-n") { + dry_run = true; + } else { + std::cerr << "Unknown option: " << arg << "\n"; + print_usage(); + return 1; + } + } + + // Read and parse JSON input + std::ifstream input_file(input_path); + if (!input_file) { + std::cerr << "Error: Cannot open " << input_path << "\n"; + return 1; + } + + json input_json; + try { + input_file >> input_json; + } catch (const json::parse_error& e) { + std::cerr << "Error: Failed to parse JSON: " << e.what() << "\n"; + return 1; + } + + // Extract bytecode + if (!input_json.contains("bytecode")) { + std::cerr << "Error: JSON must contain 'bytecode' field\n"; + return 1; + } + + std::vector bytecode; + try { + bytecode = hex_to_bytes(input_json["bytecode"].get()); + } catch (const std::exception& e) { + std::cerr << "Error: Failed to parse bytecode: " << e.what() << "\n"; + return 1; + } + + // Extract calldata (optional) + std::vector calldata; + if (input_json.contains("calldata")) { + try { + for (const auto& cd : input_json["calldata"]) { + calldata.push_back(hex_to_ff(cd.get())); + } + } catch (const std::exception& e) { + std::cerr << "Error: Failed to parse calldata: " << e.what() << "\n"; + return 1; + } + } + + // Print bytecode for debugging + if (verbose) { + std::cout << "Bytecode (" << bytecode.size() << " bytes): "; + for (size_t i = 0; i < std::min(bytecode.size(), size_t(32)); ++i) { + std::cout << std::hex << std::setfill('0') << std::setw(2) << static_cast(bytecode[i]); + } + if (bytecode.size() > 32) { + std::cout << "..."; + } + std::cout << std::dec << "\n"; + } + + // Decompile bytecode to FuzzerData + FuzzerData fuzzer_data; + try { + fuzzer_data = bb::avm_fuzzer::decompile_bytecode(bytecode, calldata); + } catch (const std::exception& e) { + std::cerr << "Error: Failed to decompile bytecode: " << e.what() << "\n"; + return 1; + } + + if (verbose) { + size_t instr_count = 0; + for (const auto& block : fuzzer_data.instruction_blocks) { + instr_count += block.size(); + } + std::cout << "Decompiled " << instr_count << " instructions from " << bytecode.size() << " bytes\n"; + std::cout << "Calldata: " << calldata.size() << " elements\n"; + } + + if (dry_run) { + std::cout << "Dry run - not writing output\n"; + return 0; + } + + // Serialize to msgpack + msgpack::sbuffer buffer; + msgpack::pack(buffer, fuzzer_data); + + // Ensure corpus directory exists + std::filesystem::create_directories(corpus_dir); + + // Generate filename from content hash (first 16 bytes of SHA256) + auto hash = bb::crypto::sha256(std::vector(buffer.data(), buffer.data() + buffer.size())); + std::string filename = bytes_to_hex(std::vector(hash.begin(), hash.begin() + 16)); + std::string output_path = corpus_dir + "/" + filename; + + // Write output + std::ofstream output_file(output_path, std::ios::binary); + if (!output_file) { + std::cerr << "Error: Cannot write to " << output_path << "\n"; + return 1; + } + + output_file.write(buffer.data(), static_cast(buffer.size())); + output_file.close(); + + std::cout << "Written: " << output_path << "\n"; + return 0; +} diff --git a/barretenberg/cpp/src/barretenberg/avm_fuzzer/tools/corpus_seed_tx.cpp b/barretenberg/cpp/src/barretenberg/avm_fuzzer/tools/corpus_seed_tx.cpp new file mode 100644 index 000000000000..c7609ae6d969 --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/avm_fuzzer/tools/corpus_seed_tx.cpp @@ -0,0 +1,549 @@ +/** + * corpus_seed_tx - Convert TS ExecutionHints JSON to FuzzerTxData corpus entries + * + * Usage: corpus_seed_tx [--verbose] [--dry-run] + * + * This tool takes a JSON file generated by TypeScript that contains: + * - Contract classes with bytecode + * - Contract instances + * - Transaction metadata + * - Global variables + * - Protocol contracts + * + * It decompiles bytecode fields to FuzzerData and produces FuzzerTxData for the tx.fuzzer corpus. + */ + +#include "barretenberg/avm_fuzzer/fuzz_lib/bytecode_decompiler.hpp" +#include "barretenberg/avm_fuzzer/fuzz_lib/fuzzer_data.hpp" +#include "barretenberg/avm_fuzzer/tx.fuzzer.hpp" +#include "barretenberg/crypto/sha256/sha256.hpp" +#include "barretenberg/serialize/msgpack_impl.hpp" +#include "barretenberg/vm2/common/aztec_types.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +using json = nlohmann::json; +using namespace bb::avm2; + +namespace { + +/** + * Parse hex string to bytes (with or without 0x prefix) + */ +std::vector hex_to_bytes(const std::string& hex) +{ + std::string h = hex; + // Remove 0x prefix if present + if (h.size() >= 2 && h[0] == '0' && (h[1] == 'x' || h[1] == 'X')) { + h = h.substr(2); + } + + std::vector bytes; + bytes.reserve(h.size() / 2); + + for (size_t i = 0; i + 1 < h.size(); i += 2) { + auto byte = static_cast(std::stoul(h.substr(i, 2), nullptr, 16)); + bytes.push_back(byte); + } + + return bytes; +} + +/** + * Convert bytes to hex string (for filename generation) + */ +std::string bytes_to_hex(const std::vector& bytes) +{ + std::ostringstream ss; + for (auto byte : bytes) { + ss << std::hex << std::setfill('0') << std::setw(2) << static_cast(byte); + } + return ss.str(); +} + +/** + * Parse hex string to field element + * Handles arbitrary length hex, pads to 32 bytes on the left + */ +FF hex_to_ff(const std::string& hex) +{ + auto bytes = hex_to_bytes(hex); + + // Pad to 32 bytes (big-endian, so pad on the left) + while (bytes.size() < 32) { + bytes.insert(bytes.begin(), 0); + } + + // Truncate if too long (take the rightmost 32 bytes) + if (bytes.size() > 32) { + bytes = std::vector(bytes.end() - 32, bytes.end()); + } + + return FF::serialize_from_buffer(bytes.data()); +} + +/** + * Parse hex string to uint128_t + */ +uint128_t hex_to_u128(const std::string& hex) +{ + auto bytes = hex_to_bytes(hex); + + // Pad to 16 bytes (big-endian, so pad on the left) + while (bytes.size() < 16) { + bytes.insert(bytes.begin(), 0); + } + + // Take the rightmost 16 bytes if too long + if (bytes.size() > 16) { + bytes = std::vector(bytes.end() - 16, bytes.end()); + } + + // Convert to uint128_t (big-endian) + uint128_t result = 0; + for (size_t i = 0; i < 16; i++) { + result = (result << 8) | bytes[i]; + } + return result; +} + +/** + * Parse a GasFees object from JSON + */ +GasFees parse_gas_fees(const json& j) +{ + GasFees fees; + if (j.contains("feePerDaGas")) { + fees.fee_per_da_gas = hex_to_u128(j["feePerDaGas"].get()); + } + if (j.contains("feePerL2Gas")) { + fees.fee_per_l2_gas = hex_to_u128(j["feePerL2Gas"].get()); + } + return fees; +} + +/** + * Parse a Gas object from JSON + */ +Gas parse_gas(const json& j) +{ + Gas gas; + if (j.contains("l2Gas")) { + gas.l2_gas = j["l2Gas"].get(); + } + if (j.contains("daGas")) { + gas.da_gas = j["daGas"].get(); + } + return gas; +} + +/** + * Parse a GasSettings object from JSON + */ +GasSettings parse_gas_settings(const json& j) +{ + GasSettings settings; + if (j.contains("gasLimits")) { + settings.gas_limits = parse_gas(j["gasLimits"]); + } + if (j.contains("teardownGasLimits")) { + settings.teardown_gas_limits = parse_gas(j["teardownGasLimits"]); + } + if (j.contains("maxFeesPerGas")) { + settings.max_fees_per_gas = parse_gas_fees(j["maxFeesPerGas"]); + } + if (j.contains("maxPriorityFeesPerGas")) { + settings.max_priority_fees_per_gas = parse_gas_fees(j["maxPriorityFeesPerGas"]); + } + return settings; +} + +/** + * Parse a PublicKeys object from JSON + */ +PublicKeys parse_public_keys(const json& /* j */) +{ + PublicKeys keys; + // Public keys are typically points, parse if present + // For now, leave as default (zeros) if not provided + // TODO: Implement proper point parsing when needed + return keys; +} + +/** + * Parse a ContractClass from JSON + */ +ContractClass parse_contract_class(const json& j) +{ + ContractClass contract_class; + + if (j.contains("id")) { + contract_class.id = hex_to_ff(j["id"].get()); + } + if (j.contains("artifactHash")) { + contract_class.artifact_hash = hex_to_ff(j["artifactHash"].get()); + } + if (j.contains("privateFunctionsRoot")) { + contract_class.private_functions_root = hex_to_ff(j["privateFunctionsRoot"].get()); + } + if (j.contains("packedBytecode")) { + contract_class.packed_bytecode = hex_to_bytes(j["packedBytecode"].get()); + } + + return contract_class; +} + +/** + * Parse a ContractInstance from JSON + */ +ContractInstance parse_contract_instance(const json& j) +{ + ContractInstance instance; + + if (j.contains("salt")) { + instance.salt = hex_to_ff(j["salt"].get()); + } + if (j.contains("deployer")) { + instance.deployer = hex_to_ff(j["deployer"].get()); + } + if (j.contains("currentContractClassId")) { + instance.current_contract_class_id = hex_to_ff(j["currentContractClassId"].get()); + } + if (j.contains("originalContractClassId")) { + instance.original_contract_class_id = hex_to_ff(j["originalContractClassId"].get()); + } + if (j.contains("initializationHash")) { + instance.initialization_hash = hex_to_ff(j["initializationHash"].get()); + } + if (j.contains("publicKeys")) { + instance.public_keys = parse_public_keys(j["publicKeys"]); + } + + return instance; +} + +/** + * Parse a GlobalVariables from JSON + */ +GlobalVariables parse_global_variables(const json& j) +{ + GlobalVariables gv; + + if (j.contains("chainId")) { + gv.chain_id = hex_to_ff(j["chainId"].get()); + } + if (j.contains("version")) { + gv.version = hex_to_ff(j["version"].get()); + } + if (j.contains("blockNumber")) { + gv.block_number = j["blockNumber"].get(); + } + if (j.contains("slotNumber")) { + gv.slot_number = hex_to_ff(j["slotNumber"].get()); + } + if (j.contains("timestamp")) { + gv.timestamp = j["timestamp"].get(); + } + if (j.contains("coinbase")) { + gv.coinbase = hex_to_ff(j["coinbase"].get()); + } + if (j.contains("feeRecipient")) { + gv.fee_recipient = hex_to_ff(j["feeRecipient"].get()); + } + if (j.contains("gasFees")) { + gv.gas_fees = parse_gas_fees(j["gasFees"]); + } + + return gv; +} + +/** + * Parse a PublicCallRequest from JSON + */ +PublicCallRequest parse_public_call_request(const json& j) +{ + PublicCallRequest req; + + if (j.contains("msgSender")) { + req.msg_sender = hex_to_ff(j["msgSender"].get()); + } + if (j.contains("contractAddress")) { + req.contract_address = hex_to_ff(j["contractAddress"].get()); + } + if (j.contains("isStaticCall")) { + req.is_static_call = j["isStaticCall"].get(); + } + if (j.contains("calldataHash")) { + req.calldata_hash = hex_to_ff(j["calldataHash"].get()); + } + + return req; +} + +/** + * Parse a PublicCallRequestWithCalldata from JSON + */ +PublicCallRequestWithCalldata parse_public_call_request_with_calldata(const json& j) +{ + PublicCallRequestWithCalldata req_with_cd; + + if (j.contains("request")) { + req_with_cd.request = parse_public_call_request(j["request"]); + } + if (j.contains("calldata")) { + for (const auto& cd : j["calldata"]) { + req_with_cd.calldata.push_back(hex_to_ff(cd.get())); + } + } + + return req_with_cd; +} + +/** + * Parse a Tx object from JSON + */ +Tx parse_tx(const json& j) +{ + Tx tx; + + if (j.contains("hash")) { + tx.hash = j["hash"].get(); + } + if (j.contains("gasSettings")) { + tx.gas_settings = parse_gas_settings(j["gasSettings"]); + } + if (j.contains("effectiveGasFees")) { + tx.effective_gas_fees = parse_gas_fees(j["effectiveGasFees"]); + } + if (j.contains("setupEnqueuedCalls")) { + for (const auto& call : j["setupEnqueuedCalls"]) { + tx.setup_enqueued_calls.push_back(parse_public_call_request_with_calldata(call)); + } + } + if (j.contains("appLogicEnqueuedCalls")) { + for (const auto& call : j["appLogicEnqueuedCalls"]) { + tx.app_logic_enqueued_calls.push_back(parse_public_call_request_with_calldata(call)); + } + } + if (j.contains("teardownEnqueuedCall") && !j["teardownEnqueuedCall"].is_null()) { + tx.teardown_enqueued_call = parse_public_call_request_with_calldata(j["teardownEnqueuedCall"]); + } + if (j.contains("gasUsedByPrivate")) { + tx.gas_used_by_private = parse_gas(j["gasUsedByPrivate"]); + } + if (j.contains("feePayer")) { + tx.fee_payer = hex_to_ff(j["feePayer"].get()); + } + + return tx; +} + +/** + * Parse ProtocolContracts from JSON + */ +ProtocolContracts parse_protocol_contracts(const json& j) +{ + ProtocolContracts pc; + + if (j.contains("derivedAddresses")) { + size_t idx = 0; + for (const auto& addr : j["derivedAddresses"]) { + if (idx < pc.derived_addresses.size()) { + pc.derived_addresses[idx] = hex_to_ff(addr.get()); + idx++; + } + } + } + + return pc; +} + +void print_usage() +{ + std::cerr << "Usage: corpus_seed_tx [--verbose] [--dry-run]\n\n" + << " input.json - JSON file with TS ExecutionHints-like structure\n" + << " corpus_dir - Directory to write corpus entry\n\n" + << "Options:\n" + << " --verbose - Print decompiled instruction counts per contract\n" + << " --dry-run - Parse and validate without writing\n\n" + << "Input JSON format:\n" + << " {\n" + << " \"contractClasses\": [...],\n" + << " \"contractInstances\": [...],\n" + << " \"contractAddresses\": [...],\n" + << " \"globalVariables\": {...},\n" + << " \"tx\": {...},\n" + << " \"protocolContracts\": {...}\n" + << " }\n"; +} + +} // namespace + +int main(int argc, char** argv) +{ + if (argc < 3) { + print_usage(); + return 1; + } + + std::string input_path = argv[1]; + std::string corpus_dir = argv[2]; + bool verbose = false; + bool dry_run = false; + + // Parse optional flags + for (int i = 3; i < argc; i++) { + std::string arg = argv[i]; + if (arg == "--verbose" || arg == "-v") { + verbose = true; + } else if (arg == "--dry-run" || arg == "-n") { + dry_run = true; + } else { + std::cerr << "Unknown option: " << arg << "\n"; + print_usage(); + return 1; + } + } + + // Read and parse JSON input + std::ifstream input_file(input_path); + if (!input_file) { + std::cerr << "Error: Cannot open " << input_path << "\n"; + return 1; + } + + json input_json; + try { + input_file >> input_json; + } catch (const json::parse_error& e) { + std::cerr << "Error: Failed to parse JSON: " << e.what() << "\n"; + return 1; + } + + // Build FuzzerTxData + FuzzerTxData tx_data; + + // Parse contract classes and decompile bytecode + if (input_json.contains("contractClasses")) { + for (const auto& cc_json : input_json["contractClasses"]) { + try { + ContractClass cc = parse_contract_class(cc_json); + tx_data.contract_classes.push_back(cc); + + // Decompile bytecode to FuzzerData + if (!cc.packed_bytecode.empty()) { + FuzzerData fuzzer_data = bb::avm_fuzzer::decompile_bytecode(cc.packed_bytecode, {}); + tx_data.input_programs.push_back(fuzzer_data); + + if (verbose) { + size_t instr_count = 0; + for (const auto& block : fuzzer_data.instruction_blocks) { + instr_count += block.size(); + } + std::cout << "Contract class " << cc.id << ": " << instr_count << " instructions from " + << cc.packed_bytecode.size() << " bytes\n"; + } + } + } catch (const std::exception& e) { + std::cerr << "Error parsing contract class: " << e.what() << "\n"; + return 1; + } + } + } + + // Parse contract instances + if (input_json.contains("contractInstances")) { + for (const auto& ci_json : input_json["contractInstances"]) { + try { + tx_data.contract_instances.push_back(parse_contract_instance(ci_json)); + } catch (const std::exception& e) { + std::cerr << "Error parsing contract instance: " << e.what() << "\n"; + return 1; + } + } + } + + // Parse contract addresses + if (input_json.contains("contractAddresses")) { + for (const auto& addr : input_json["contractAddresses"]) { + tx_data.contract_addresses.push_back(hex_to_ff(addr.get())); + } + } + + // Parse global variables + if (input_json.contains("globalVariables")) { + try { + tx_data.global_variables = parse_global_variables(input_json["globalVariables"]); + } catch (const std::exception& e) { + std::cerr << "Error parsing global variables: " << e.what() << "\n"; + return 1; + } + } + + // Parse tx + if (input_json.contains("tx")) { + try { + tx_data.tx = parse_tx(input_json["tx"]); + } catch (const std::exception& e) { + std::cerr << "Error parsing tx: " << e.what() << "\n"; + return 1; + } + } + + // Parse protocol contracts + if (input_json.contains("protocolContracts")) { + try { + tx_data.protocol_contracts = parse_protocol_contracts(input_json["protocolContracts"]); + } catch (const std::exception& e) { + std::cerr << "Error parsing protocol contracts: " << e.what() << "\n"; + return 1; + } + } + + if (verbose) { + std::cout << "\nFuzzerTxData summary:\n" + << " input_programs: " << tx_data.input_programs.size() << "\n" + << " contract_classes: " << tx_data.contract_classes.size() << "\n" + << " contract_instances: " << tx_data.contract_instances.size() << "\n" + << " contract_addresses: " << tx_data.contract_addresses.size() << "\n"; + } + + if (dry_run) { + std::cout << "Dry run - not writing output\n"; + return 0; + } + + // Serialize to msgpack + msgpack::sbuffer buffer; + msgpack::pack(buffer, tx_data); + + // Ensure corpus directory exists + std::filesystem::create_directories(corpus_dir); + + // Generate filename from content hash (first 16 bytes of SHA256) + auto hash = bb::crypto::sha256(std::vector(buffer.data(), buffer.data() + buffer.size())); + std::string filename = bytes_to_hex(std::vector(hash.begin(), hash.begin() + 16)); + std::string output_path = corpus_dir + "/" + filename; + + // Write output + std::ofstream output_file(output_path, std::ios::binary); + if (!output_file) { + std::cerr << "Error: Cannot write to " << output_path << "\n"; + return 1; + } + + output_file.write(buffer.data(), static_cast(buffer.size())); + output_file.close(); + + std::cout << "Written: " << output_path << "\n"; + return 0; +} diff --git a/yarn-project/simulator/src/public/fuzzing/avm_fuzzer_simulator.ts b/yarn-project/simulator/src/public/fuzzing/avm_fuzzer_simulator.ts index a5ff9ef70ba1..b0b726422016 100644 --- a/yarn-project/simulator/src/public/fuzzing/avm_fuzzer_simulator.ts +++ b/yarn-project/simulator/src/public/fuzzing/avm_fuzzer_simulator.ts @@ -210,9 +210,9 @@ export class AvmFuzzerSimulator extends BaseAvmSimulationTester { */ public async simulate(txHint: AvmTxHint): Promise { // Compute fee from gas limits and max fees per gas (upper bound on fee) - const totalFee = - BigInt(txHint.gasSettings.gasLimits.daGas) * txHint.gasSettings.maxFeesPerGas.feePerDaGas + - BigInt(txHint.gasSettings.gasLimits.l2Gas) * txHint.gasSettings.maxFeesPerGas.feePerL2Gas; + const feeRequiredDa = BigInt(txHint.gasSettings.gasLimits.daGas) * txHint.gasSettings.maxFeesPerGas.feePerDaGas; + const feeRequiredL2 = BigInt(txHint.gasSettings.gasLimits.l2Gas) * txHint.gasSettings.maxFeesPerGas.feePerL2Gas; + const totalFee = feeRequiredDa + feeRequiredL2; await this.setFeePayerBalance(txHint.feePayer, new Fr(totalFee)); diff --git a/yarn-project/simulator/src/public/fuzzing/avm_simulator_bin.ts b/yarn-project/simulator/src/public/fuzzing/avm_simulator_bin.ts index e36af508da40..f948986312b7 100644 --- a/yarn-project/simulator/src/public/fuzzing/avm_simulator_bin.ts +++ b/yarn-project/simulator/src/public/fuzzing/avm_simulator_bin.ts @@ -6,6 +6,7 @@ import { deserializeFromMessagePack, serializeWithMessagePack, } from '@aztec/stdlib/avm'; +import { AztecAddress } from '@aztec/stdlib/aztec-address'; import { GlobalVariables, TreeSnapshots } from '@aztec/stdlib/tx'; import { NativeWorldStateService } from '@aztec/world-state'; @@ -44,6 +45,21 @@ async function simulateWithFuzzer( rawContractClasses: any[], // Replace these when we are moving contract classes to TS rawContractInstances: [any, any][], // Replace these when we are moving contract instances to TS ): Promise<{ reverted: boolean; output: Fr[]; revertReason?: string; publicInputs: AvmCircuitPublicInputs }> { + // Log to stderr since stdout is used for communication + // console.error('[JS_SIM] === Starting JS Simulation ==='); + // console.error('[JS_SIM] txHint.feePayer:', txHint.feePayer.toString()); + // console.error('[JS_SIM] txHint.gasSettings.gasLimits.daGas:', txHint.gasSettings.gasLimits.daGas); + // console.error('[JS_SIM] txHint.gasSettings.gasLimits.l2Gas:', txHint.gasSettings.gasLimits.l2Gas); + // console.error('[JS_SIM] txHint.gasSettings.maxFeesPerGas.feePerDaGas:', txHint.gasSettings.maxFeesPerGas.feePerDaGas); + // console.error('[JS_SIM] txHint.gasSettings.maxFeesPerGas.feePerL2Gas:', txHint.gasSettings.maxFeesPerGas.feePerL2Gas); + // console.error('[JS_SIM] txHint.effectiveGasFees.feePerDaGas:', txHint.effectiveGasFees.feePerDaGas); + // console.error('[JS_SIM] txHint.effectiveGasFees.feePerL2Gas:', txHint.effectiveGasFees.feePerL2Gas); + // console.error('[JS_SIM] globals.gasFees.feePerDaGas:', globals.gasFees.feePerDaGas); + // console.error('[JS_SIM] globals.gasFees.feePerL2Gas:', globals.gasFees.feePerL2Gas); + // console.error('[JS_SIM] appLogicEnqueuedCalls count:', txHint.appLogicEnqueuedCalls?.length ?? 0); + // console.error('[JS_SIM] contractClasses count:', rawContractClasses.length); + // console.error('[JS_SIM] contractInstances count:', rawContractInstances.length); + const worldStateService = await openExistingWorldState(dataDir, mapSizeKb); const simulator = await AvmFuzzerSimulator.create(worldStateService, globals); @@ -54,16 +70,55 @@ async function simulateWithFuzzer( } // Register contract instances from C++ - for (const [rawAddress, rawInstance] of rawContractInstances) { + // Sort by address and deduplicate to match C++ registration order + // (C++ sorts and uses std::unique before inserting nullifiers) + const sortedContractInstances = [...rawContractInstances].sort((a, b) => { + const addrA = AztecAddress.fromPlainObject(a[0]); + const addrB = AztecAddress.fromPlainObject(b[0]); + return addrA.toBigInt() < addrB.toBigInt() ? -1 : addrA.toBigInt() > addrB.toBigInt() ? 1 : 0; + }); + // Deduplicate consecutive entries with same address (like std::unique after sort) + const uniqueContractInstances = sortedContractInstances.filter((item, index) => { + if (index === 0) { + return true; + } + const prevAddr = AztecAddress.fromPlainObject(sortedContractInstances[index - 1][0]); + const currAddr = AztecAddress.fromPlainObject(item[0]); + return !prevAddr.equals(currAddr); + }); + for (const [rawAddress, rawInstance] of uniqueContractInstances) { + // console.error('[JS_SIM] Registering contract instance:', rawAddress); await simulator.addContractInstanceFromCpp(rawAddress, rawInstance); } + // console.error('[JS_SIM] Running simulation...'); const result = await simulator.simulate(txHint); + // console.error('[JS_SIM] Simulation complete'); const output = result .getAppLogicReturnValues() .flatMap((rv: { values?: Fr[] } | undefined) => rv?.values?.filter((v: Fr | null | undefined) => v != null) ?? []); + // console.error('[JS_SIM] === JS Simulation Complete ==='); + // console.error('[JS_SIM] reverted:', !result.revertCode.isOK()); + // console.error('[JS_SIM] output length:', output.length); + // if (result.publicInputs) { + // console.error( + // '[JS_SIM] end_tree_snapshots.publicDataTree.root:', + // result.publicInputs.endTreeSnapshots.publicDataTree.root.toString(), + // ); + // console.error( + // '[JS_SIM] end_tree_snapshots.nullifierTree.root:', + // result.publicInputs.endTreeSnapshots.nullifierTree.root.toString(), + // ); + // console.error( + // '[JS_SIM] end_tree_snapshots.noteHashTree.root:', + // result.publicInputs.endTreeSnapshots.noteHashTree.root.toString(), + // ); + // } else { + // console.error('[JS_SIM] WARNING: No publicInputs in result!'); + // } + return { reverted: !result.revertCode.isOK(), output, diff --git a/yarn-project/simulator/src/public/public_tx_simulator/apps_tests/opcode_spam.test.ts b/yarn-project/simulator/src/public/public_tx_simulator/apps_tests/opcode_spam.test.ts index 9dfe67395332..476c9b4f1571 100644 --- a/yarn-project/simulator/src/public/public_tx_simulator/apps_tests/opcode_spam.test.ts +++ b/yarn-project/simulator/src/public/public_tx_simulator/apps_tests/opcode_spam.test.ts @@ -15,6 +15,7 @@ import { SimpleContractDataSource } from '../../fixtures/simple_contract_data_so import { TestExecutorMetrics } from '../../test_executor_metrics.js'; import { MeasuredCppPublicTxSimulator } from '../cpp_public_tx_simulator.js'; import { MeasuredCppVsTsPublicTxSimulator } from '../cpp_vs_ts_public_tx_simulator.js'; +import { MeasuredDumpingCppPublicTxSimulator } from '../dumping_cpp_public_tx_simulator.js'; // NOTE: This test is meant to be run for benchmarking. Set RUN_AVM_OPCODE_SPAM=1 to enable. const describeOrSkip = process.env.RUN_AVM_OPCODE_SPAM ? describe : describe.skip; @@ -30,6 +31,9 @@ const MAX_CALL_STACK_ITEMS = COLLECT_META_CHECK_RET ? 10000 : 0; const MAX_CALL_STACK_DEPTH = COLLECT_META_CHECK_RET ? 10000 : 0; const expectToBeTrue = COLLECT_META_CHECK_RET ? (x: boolean) => expect(x).toBe(true) : () => {}; +// Environment variable for dumping AVM circuit inputs to disk +const DUMP_DIR = process.env.DUMP_AVM_INPUTS_TO_DIR; + describeOrSkip('Opcode Spammer Benchmarks', () => { const logger = createLogger('opcode-spam-bench'); @@ -39,13 +43,14 @@ describeOrSkip('Opcode Spammer Benchmarks', () => { // Shared metrics instance for benchmark collection const metrics = new TestExecutorMetrics(); - // Benchmark config - disable most collection for speed + // Benchmark config - disable most collection for speed, unless dumping const config: PublicSimulatorConfig = PublicSimulatorConfig.from({ skipFeeEnforcement: false, collectCallMetadata: COLLECT_META_CHECK_RET, collectDebugLogs: false, - collectHints: false, - collectPublicInputs: false, + // Enable hints/public inputs collection when dumping + collectHints: !!DUMP_DIR, + collectPublicInputs: !!DUMP_DIR, collectStatistics: false, // Increase call stack limits for nested call tests (defaults are too low for some opcodes) collectionLimits: CollectionLimitsConfig.from({ @@ -80,9 +85,15 @@ describeOrSkip('Opcode Spammer Benchmarks', () => { worldStateService = await NativeWorldStateService.tmp(); const contractDataSource = new SimpleContractDataSource(); const merkleTree = await worldStateService.fork(); - const simulatorFactory: MeasuredSimulatorFactory = useCppSimulator - ? (mt, cdb, g, m, c) => new MeasuredCppPublicTxSimulator(mt, cdb, g, m, c) - : (mt, cdb, g, m, c) => new MeasuredCppVsTsPublicTxSimulator(mt, cdb, g, m, c); + // Use MeasuredDumpingCppPublicTxSimulator when DUMP_DIR is set, otherwise use regular simulator + let simulatorFactory: MeasuredSimulatorFactory; + if (DUMP_DIR && useCppSimulator) { + simulatorFactory = (mt, cdb, g, m, c) => new MeasuredDumpingCppPublicTxSimulator(mt, cdb, g, m, c, DUMP_DIR); + } else if (useCppSimulator) { + simulatorFactory = (mt, cdb, g, m, c) => new MeasuredCppPublicTxSimulator(mt, cdb, g, m, c); + } else { + simulatorFactory = (mt, cdb, g, m, c) => new MeasuredCppVsTsPublicTxSimulator(mt, cdb, g, m, c); + } tester = new PublicTxSimulationTester( merkleTree, contractDataSource, diff --git a/yarn-project/simulator/src/public/public_tx_simulator/dumping_cpp_public_tx_simulator.ts b/yarn-project/simulator/src/public/public_tx_simulator/dumping_cpp_public_tx_simulator.ts index 865af531378e..f9215a36b641 100644 --- a/yarn-project/simulator/src/public/public_tx_simulator/dumping_cpp_public_tx_simulator.ts +++ b/yarn-project/simulator/src/public/public_tx_simulator/dumping_cpp_public_tx_simulator.ts @@ -13,8 +13,10 @@ import { strict as assert } from 'assert'; import { mkdirSync, writeFileSync } from 'fs'; import { join } from 'path'; +import type { ExecutorMetricsInterface } from '../executor_metrics_interface.js'; import type { PublicContractsDB } from '../public_db_sources.js'; import { CppPublicTxSimulator } from './cpp_public_tx_simulator.js'; +import type { MeasuredPublicTxSimulatorInterface } from './public_tx_simulator_interface.js'; /** * A C++ public tx simulator that dumps AVM circuit inputs to disk after simulation. @@ -79,3 +81,34 @@ export class DumpingCppPublicTxSimulator extends CppPublicTxSimulator { } } } + +/** + * A C++ public tx simulator that both dumps AVM circuit inputs and tracks metrics. + * Combines DumpingCppPublicTxSimulator and MeasuredCppPublicTxSimulator functionality. + */ +export class MeasuredDumpingCppPublicTxSimulator + extends DumpingCppPublicTxSimulator + implements MeasuredPublicTxSimulatorInterface +{ + constructor( + merkleTree: MerkleTreeWriteOperations, + contractsDB: PublicContractsDB, + globalVariables: GlobalVariables, + protected readonly metrics: ExecutorMetricsInterface, + config: Partial, + outputDir: string, + ) { + super(merkleTree, contractsDB, globalVariables, config, outputDir); + } + + public override async simulate(tx: Tx, txLabel: string = 'unlabeledTx'): Promise { + this.metrics.startRecordingTxSimulation(txLabel); + let result: PublicTxResult | undefined; + try { + result = await super.simulate(tx); + } finally { + this.metrics.stopRecordingTxSimulation(txLabel, result?.gasUsed, result?.revertCode); + } + return result; + } +} diff --git a/yarn-project/simulator/src/public/public_tx_simulator/index.ts b/yarn-project/simulator/src/public/public_tx_simulator/index.ts index fa7d44e03264..23be84239d6a 100644 --- a/yarn-project/simulator/src/public/public_tx_simulator/index.ts +++ b/yarn-project/simulator/src/public/public_tx_simulator/index.ts @@ -1,6 +1,6 @@ export * from './public_tx_simulator.js'; export { CppPublicTxSimulator, TelemetryCppPublicTxSimulator } from './cpp_public_tx_simulator.js'; -export { DumpingCppPublicTxSimulator } from './dumping_cpp_public_tx_simulator.js'; +export { DumpingCppPublicTxSimulator, MeasuredDumpingCppPublicTxSimulator } from './dumping_cpp_public_tx_simulator.js'; export { createPublicTxSimulatorForBlockBuilding } from './factories.js'; export type { PublicTxSimulatorInterface } from './public_tx_simulator_interface.js'; export { TelemetryPublicTxSimulator } from './telemetry_public_tx_simulator.js';