From c09a5a99f7ef1beb9e1da97d0fc5393359a5fb0b Mon Sep 17 00:00:00 2001 From: rodiazet Date: Wed, 3 Dec 2025 15:35:58 +0100 Subject: [PATCH] Optimize BN254 ecmul with field endomorphism MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `ecc::decompose()` procedure to split ECC scalar into two smaller ones. Use the decomposition to speed up BN254 scalar multiplication. Co-authored-by: Paweł Bylica --- lib/evmone_precompiles/bn254.cpp | 14 +- lib/evmone_precompiles/bn254.hpp | 17 +++ lib/evmone_precompiles/ecc.hpp | 112 ++++++++++++++ test/unittests/evmmax_bn254_mul_test.cpp | 186 +++++++++++++++++++++++ 4 files changed, 328 insertions(+), 1 deletion(-) diff --git a/lib/evmone_precompiles/bn254.cpp b/lib/evmone_precompiles/bn254.cpp index b2358e9281..8971d23af9 100644 --- a/lib/evmone_precompiles/bn254.cpp +++ b/lib/evmone_precompiles/bn254.cpp @@ -18,7 +18,19 @@ bool validate(const AffinePoint& pt) noexcept AffinePoint mul(const AffinePoint& pt, const uint256& c) noexcept { - const auto pr = ecc::mul(pt, c); + if (pt == 0) + return pt; + + if (c == 0) + return {}; + + // Optimized using field endomorphism with scalar decomposition. + // See ecc::decompose() for more details. + const auto [k1, k2] = ecc::decompose(c); + + const auto q = AffinePoint{Curve::BETA * pt.x, !k2.sign ? pt.y : -pt.y}; + const auto p = AffinePoint{pt.x, !k1.sign ? pt.y : -pt.y}; + const auto pr = msm(k1.value, p, k2.value, q); return ecc::to_affine(pr); } } // namespace evmmax::bn254 diff --git a/lib/evmone_precompiles/bn254.hpp b/lib/evmone_precompiles/bn254.hpp index 4e6ac5a9db..d153054fe1 100644 --- a/lib/evmone_precompiles/bn254.hpp +++ b/lib/evmone_precompiles/bn254.hpp @@ -31,6 +31,23 @@ struct Curve static constexpr auto A = 0; static constexpr auto B = ecc::FieldElement(3); + + /// Endomorphism parameters. See ecc::decompose(). + /// @{ + /// λ + static constexpr auto LAMBDA = 0xb3c4d79d41a917585bfc41088d8daaa78b17ea66b99c90dd_u256; + /// β + static constexpr ecc::FieldElement BETA{ + 0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe_u256}; + /// x₁ + static constexpr auto X1 = 0x6f4d8248eeb859fd95b806bca6f338ee_u256; + /// -y₁ + static constexpr auto MINUS_Y1 = 0x6f4d8248eeb859fbf83e9682e87cfd45_u256; + /// x₂ + static constexpr auto X2 = 0x6f4d8248eeb859fc8211bbeb7d4f1128_u256; + /// y₂ + static constexpr auto Y2 = 0x6f4d8248eeb859fd0be4e1541221250b_u256; + /// @} }; using AffinePoint = ecc::AffinePoint; diff --git a/lib/evmone_precompiles/ecc.hpp b/lib/evmone_precompiles/ecc.hpp index ca95abd9cd..34d2d5189e 100644 --- a/lib/evmone_precompiles/ecc.hpp +++ b/lib/evmone_precompiles/ecc.hpp @@ -513,4 +513,116 @@ ProjPoint msm(const typename Curve::uint_type& u, const AffinePoint +struct SignedScalar +{ + bool sign = false; // The sign of the scalar: false = positive, true = negative. + UIntT value; +}; + + +/// Verifies k ≡ k₁ + k₂·λ (mod N) and checks that k₁ and k₂ are "short" scalars. +template +[[maybe_unused, nodiscard]] bool verify_scalar_decomposition(const typename Curve::uint_type& k, + const SignedScalar& k1, + const SignedScalar& k2) noexcept +{ + // Verify k ≡ k₁ + k₂·λ (mod N). + { + static constexpr ModArith N{Curve::ORDER}; + auto r_k1 = N.to_mont(k1.value); + if (k1.sign) + r_k1 = N.sub(0, r_k1); + auto r_k2 = N.to_mont(k2.value); + if (k2.sign) + r_k2 = N.sub(0, r_k2); + + const auto r_k = N.to_mont(k); + + const auto right = N.add(r_k1, N.mul(r_k2, N.to_mont(Curve::LAMBDA))); + if (r_k != right) + return false; + } + + // Verify for u = (k₁, k₂) that ‖u‖ <= max(‖v₁‖, ‖v₂‖). + { + static constexpr auto V1_NORM_SQUARED = + Curve::X1 * Curve::X1 + Curve::MINUS_Y1 * Curve::MINUS_Y1; + static constexpr auto V2_NORM_SQUARED = Curve::X2 * Curve::X2 + Curve::Y2 * Curve::Y2; + static constexpr auto MAX_NORM_SQUARED = std::max(V1_NORM_SQUARED, V2_NORM_SQUARED); + const auto u_norm_squared = k1.value * k1.value + k2.value * k2.value; + return u_norm_squared <= MAX_NORM_SQUARED; + } +} + +/// Decomposes a scalar k into "short" scalars k₁ and k₂ such that k₁ + k₂·λ ≡ k (mod N). +/// +/// This decomposition allows more efficient scalar multiplication by using the multi-scalar +/// multiplication (MSM) and the GLV endomorphism. +/// The endomorphism ϕ: E → E defined as (x,y) → (βx,y) with eigenvalue λ allows computing +/// [λ](x,y) = (βx,y) with only one multiplication in 𝔽ₚ instead of a full scalar multiplication. +/// +/// Moreover, to compute the short scalars k₁ and k₂, we need linearly independent short vectors +/// (v₁=(x₁,y₁), v₂=(x₂,y₂)) such that f(v₁) = f(v₂) = 0, +/// where f: ℤ×ℤ → ℤₙ is defined as (x,y) → (x + y·λ), where λ² + λ ≡ -1 mod N. +/// +/// See https://www.iacr.org/archive/crypto2001/21390189.pdf for details. +/// +/// The Curve type must provide the endomorphism parameters: LAMBDA, BETA, X1, MINUS_Y1, X2, Y2. +template +std::array, 2> decompose( + const typename Curve::uint_type& k) noexcept +{ + using UIntT = Curve::uint_type; + + // Validate the provided setup parameters. + // λ² + λ ≡ -1 mod n + static_assert((umul(Curve::LAMBDA, Curve::LAMBDA) + Curve::LAMBDA + 1) % Curve::ORDER == 0); + // f: (x, y) → (x + λy) mod N + // f(v₁) = 0 + static_assert( + (Curve::X1 + umul(Curve::ORDER - Curve::MINUS_Y1, Curve::LAMBDA)) % Curve::ORDER == 0); + // f(v₂) = 0 + static_assert((Curve::X2 + umul(Curve::Y2, Curve::LAMBDA)) % Curve::ORDER == 0); + + static constexpr auto round_div = [](const auto& a) noexcept { + // DET is the (v₁, v₂) matrix determinant. + static constexpr auto WIDE_DET = + umul(Curve::X1, Curve::Y2) + umul(Curve::X2, Curve::MINUS_Y1); + static_assert(WIDE_DET <= std::numeric_limits::max()); + static constexpr auto DET = static_cast(WIDE_DET); + static constexpr auto HALF_DET = DET / 2; + + const auto [wide_q, r] = udivrem(a, DET); + // Division reduces the quotient enough to fit into a single uint. + // This can be shown at compile-time by inspecting the DET and Y2/-Y1 values. + assert(wide_q < std::numeric_limits::max()); + const auto q = static_cast(wide_q); + return q + (r > HALF_DET); // Round to nearest. + }; + + // Solve a system of two equations using Cramer method. + // ⎡X1 X2⎤ * ⎡b1⎤ = ⎡k⎤ + // ⎣Y1 Y2⎦ ⎣b2⎦ ⎣0⎦ + // and then approximate to the nearest integers: + // b1 = ⌊ Y2·k ÷ DET⌉ + // b2 = ⌊-Y1·k ÷ DET⌉ + const auto b1 = round_div(umul(k, Curve::Y2)); + const auto b2 = round_div(umul(k, Curve::MINUS_Y1)); + + // k1 = k - (x1*b1 + x2*b2) + const auto x1b1_x2b2 = umul(b1, Curve::X1) + umul(b2, Curve::X2); + const auto [wide_k1, k1_is_neg] = subc(decltype(x1b1_x2b2){k}, x1b1_x2b2); + const auto k1_abs = k1_is_neg ? -static_cast(wide_k1) : static_cast(wide_k1); + + // k2 = 0 - (y1*b1 + y2*b2) + const auto [wide_k2, k2_is_neg] = subc(umul(b1, Curve::MINUS_Y1), umul(b2, Curve::Y2)); + const auto k2_abs = k2_is_neg ? -static_cast(wide_k2) : static_cast(wide_k2); + + const SignedScalar k1{k1_is_neg, k1_abs}; + const SignedScalar k2{k2_is_neg, k2_abs}; + assert(verify_scalar_decomposition(k, k1, k2)); + return {k1, k2}; +} + } // namespace evmmax::ecc diff --git a/test/unittests/evmmax_bn254_mul_test.cpp b/test/unittests/evmmax_bn254_mul_test.cpp index 103e4a8899..2bd24be096 100644 --- a/test/unittests/evmmax_bn254_mul_test.cpp +++ b/test/unittests/evmmax_bn254_mul_test.cpp @@ -9,6 +9,192 @@ using namespace evmmax::bn254; using namespace evmone::test; +TEST(evmmax, bn254_decompose) +{ + struct TestCase + { + uint256 k; + bool sign1 = false; + uint256 k1; + bool sign2 = false; + uint256 k2; + }; + + static const std::vector TEST_CASES = { + {0, false, 0, false, 0}, + {1, false, 1, false, 0}, + { + // For k = LAMBDA, the optimal decomposition is (0, 1) + // but the algorithm returns some other solution. + Curve::LAMBDA, + false, + 0x89d3256894d213e3, + true, + Curve::X2 - 1, + }, + { + Curve::ORDER - Curve::LAMBDA, + false, + 0, + true, + 1, + }, + { + 2 * Curve::ORDER, // DET + false, + 0, + false, + 0, + }, + { + 2 * Curve::ORDER - 1, // DET-1 + true, + 1, + false, + 0, + }, + { + 2 * Curve::ORDER + 1, // DET+1 + false, + 1, + false, + 0, + }, + { + Curve::ORDER, // DET/2 + false, + Curve::Y2, + false, + 0x89d3256894d213e3, + }, + { + Curve::ORDER - 1, // DET/2-1 + false, + Curve::Y2 - 1, + false, + 0x89d3256894d213e3, + }, + { + Curve::ORDER + 1, // DET/2+1 + true, + Curve::Y2 - 1, + true, + 0x89d3256894d213e3, + }, + { + 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff_u256, + true, + 0x272d9e49b8c8ca4335756fc61411a7a3_u128, + false, + 0x3f296ebc4b455178a6a2b71572d476d6_u128, + }, + { + 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff_u256 % Curve::ORDER, + true, + 0x272d9e49b8c8ca42aba24a5d7f3f93c0_u128, + true, + 0x3024138ca3730883db6f04d60a7a9a52_u128, + }, + { + 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff_u256 % + Curve::FIELD_PRIME, + true, + 0x272d9e49b8c8ca3dd335f9b043dce0ca_u128, + false, + 0x3f296ebc4b45517b57c272205aeeda45_u128, + }, + { + Curve::X1, + false, + 0, + false, + Curve::MINUS_Y1, + }, + { + Curve::X2, + false, + Curve::X2, + false, + 0, + }, + { + Curve::MINUS_Y1, + false, + Curve::MINUS_Y1, + false, + 0, + }, + { + Curve::Y2, + true, + 0x89d3256894d213e3, + false, + Curve::MINUS_Y1, + }, + // Fuzzer finds: + { + 0x30644e72e131a029b85045b68181585d2833e84879b9709143e1fd91c6ea5404_u256, + true, + 0x6f4d8248eeb859fd0be4d9563b36d108_u128, + true, + 0x89d3256894d213e3, + }, + { + 0x30644e72e131a029b85045b68181585d9781875b000000000000000000000000_u256, + false, + 0x1cc9978e3571b0392917fddedaf4_u128, + true, + 0x89d3256894d213e3, + }, + { + 0x00b3c4d79d41a917585bfc41088d8daaa78b17e6af48a03bbfd25e8cd0364141_u256, + true, + 0x3b770fc551d2da1732fc9bebf_u128, + false, + 0x100000000000000, + }, + { + 0x30644e72e131a02a6c151d53c32a6fb58431295107473e18805b7c56ba7d94de_u256, + false, + 0x10000000022dfb1619c5c10e10400_u128, + false, + 1, + }, + { + 0xfffffffffc2f0000000000000000000000000000000000000000000000000000_u256, + true, + 0x4b3beb451625cff3c66c0effee355638_u128, + true, + 0x11e8b394e6bd3d13de2f7f7d38887a8c_u128, + }, + { + 0x000000000000000059e26bcea0d48bac65a4e1a8be2302524b7e65dd65dedb2a_u256, + false, + 0x36, + true, + 0x44e992b44a6909f1, + }, + }; + + static constexpr auto decompose = evmmax::ecc::decompose; + + for (const auto& t : TEST_CASES) + { + const auto& [first, second] = decompose(t.k); + const auto& [sign1, k1] = first; + const auto& [sign2, k2] = second; + + SCOPED_TRACE(hex(t.k)); + + EXPECT_EQ(sign1, t.sign1); + EXPECT_EQ(k1, t.k1) << hex(k1) << " != " << hex(t.k1); + EXPECT_EQ(sign2, t.sign2); + EXPECT_EQ(k2, t.k2) << hex(k2) << " != " << hex(t.k2); + + EXPECT_TRUE(verify_scalar_decomposition(t.k, first, second)); + } +} + namespace { struct TestCase