Skip to content

Commit fd5bd44

Browse files
rodiazetchfast
andcommitted
Optimize BN254 ecmul with field endomorphism
Add `ecc::decompose()` procedure to split ECC scalar into two smaller ones. Use the decomposition to speed up BN254 scalar multiplication. Co-authored-by: Paweł Bylica <pawel@hepcolgum.band>
1 parent eeb288a commit fd5bd44

File tree

4 files changed

+326
-1
lines changed

4 files changed

+326
-1
lines changed

lib/evmone_precompiles/bn254.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,17 @@ bool validate(const AffinePoint& pt) noexcept
1818

1919
AffinePoint mul(const AffinePoint& pt, const uint256& c) noexcept
2020
{
21-
const auto pr = ecc::mul(pt, c);
21+
if (pt == 0)
22+
return pt;
23+
24+
if (c == 0)
25+
return {};
26+
27+
const auto [k1, k2] = ecc::decompose<Curve>(c);
28+
29+
const auto q = AffinePoint{Curve::BETA * pt.x, !k2.sign ? pt.y : -pt.y};
30+
const auto p = AffinePoint{pt.x, !k1.sign ? pt.y : -pt.y};
31+
const auto pr = msm(k1.value, p, k2.value, q);
2232
return ecc::to_affine(pr);
2333
}
2434
} // namespace evmmax::bn254

lib/evmone_precompiles/bn254.hpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,23 @@ struct Curve
3131

3232
static constexpr auto A = 0;
3333
static constexpr auto B = ecc::FieldElement<Curve>(3);
34+
35+
/// Endomorphism parameters. See ecc::decompose().
36+
/// @{
37+
/// λ
38+
static constexpr auto LAMBDA = 0xb3c4d79d41a917585bfc41088d8daaa78b17ea66b99c90dd_u256;
39+
/// β
40+
static constexpr ecc::FieldElement<Curve> BETA{
41+
0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe_u256};
42+
/// x₁
43+
static constexpr auto X1 = 0x6f4d8248eeb859fd95b806bca6f338ee_u256;
44+
/// -y₁
45+
static constexpr auto MINUS_Y1 = 0x6f4d8248eeb859fbf83e9682e87cfd45_u256;
46+
/// x₂
47+
static constexpr auto X2 = 0x6f4d8248eeb859fc8211bbeb7d4f1128_u256;
48+
/// y₂
49+
static constexpr auto Y2 = 0x6f4d8248eeb859fd0be4e1541221250b_u256;
50+
/// @}
3451
};
3552

3653
using AffinePoint = ecc::AffinePoint<Curve>;

lib/evmone_precompiles/ecc.hpp

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -513,4 +513,116 @@ ProjPoint<Curve> msm(const typename Curve::uint_type& u, const AffinePoint<Curve
513513
return r;
514514
}
515515

516+
template <typename UIntT>
517+
struct SignedScalar
518+
{
519+
bool sign = false; // The sign of the scalar: false = positive, true = negative.
520+
UIntT value;
521+
};
522+
523+
524+
/// Verifies k ≡ k₁ + k₂·λ (mod N) and checks that k₁ and k₂ are "short" scalars.
525+
template <typename Curve>
526+
[[maybe_unused, nodiscard]] bool verify_scalar_decomposition(const typename Curve::uint_type& k,
527+
const SignedScalar<typename Curve::uint_type>& k1,
528+
const SignedScalar<typename Curve::uint_type>& k2) noexcept
529+
{
530+
// Verify k ≡ k₁ + k₂·λ (mod N).
531+
{
532+
static constexpr ModArith N{Curve::ORDER};
533+
auto r_k1 = N.to_mont(k1.value);
534+
if (k1.sign)
535+
r_k1 = N.sub(0, r_k1);
536+
auto r_k2 = N.to_mont(k2.value);
537+
if (k2.sign)
538+
r_k2 = N.sub(0, r_k2);
539+
540+
const auto r_k = N.to_mont(k);
541+
542+
const auto right = N.add(r_k1, N.mul(r_k2, N.to_mont(Curve::LAMBDA)));
543+
if (r_k != right)
544+
return false;
545+
}
546+
547+
// Verify for u = (k₁, k₂) that ‖u‖ <= max(‖v₁‖, ‖v₂‖).
548+
{
549+
static constexpr auto V1_NORM_SQUARED =
550+
Curve::X1 * Curve::X1 + Curve::MINUS_Y1 * Curve::MINUS_Y1;
551+
static constexpr auto V2_NORM_SQUARED = Curve::X2 * Curve::X2 + Curve::Y2 * Curve::Y2;
552+
static constexpr auto MAX_NORM_SQUARED = std::max(V1_NORM_SQUARED, V2_NORM_SQUARED);
553+
const auto u_norm_squared = k1.value * k1.value + k2.value * k2.value;
554+
return u_norm_squared <= MAX_NORM_SQUARED;
555+
}
556+
}
557+
558+
/// Decomposes a scalar k into "short" scalars k₁ and k₂ such that k₁ + k₂·λ ≡ k (mod N).
559+
///
560+
/// This decomposition allows more efficient scalar multiplication by using the multi-scalar
561+
/// multiplication (MSM) and the GLV endomorphism.
562+
/// The endomorphism ϕ: E → E defined as (x,y) → (βx,y) with eigenvalue λ allows computing
563+
/// [λ](x,y) = (βx,y) with only one multiplication in 𝔽ₚ instead of a full scalar multiplication.
564+
///
565+
/// Moreover, to compute the short scalars k₁ and k₂, we need linearly independent short vectors
566+
/// (v₁=(x₁,y₁), v₂=(x₂,y₂)) such that f(v₁) = f(v₂) = 0,
567+
/// where f: ℤ×ℤ → ℤₙ is defined as (x,y) → (x + y·λ), where λ² + λ ≡ -1 mod N.
568+
///
569+
/// See https://www.iacr.org/archive/crypto2001/21390189.pdf for details.
570+
///
571+
/// The Curve type must provide the endomorphism parameters: LAMBDA, BETA, X1, MINUS_Y1, X2, Y2.
572+
template <typename Curve>
573+
std::array<SignedScalar<typename Curve::uint_type>, 2> decompose(
574+
const typename Curve::uint_type& k) noexcept
575+
{
576+
using UIntT = Curve::uint_type;
577+
578+
// Validate the provided setup parameters.
579+
// λ² + λ ≡ -1 mod n
580+
static_assert((umul(Curve::LAMBDA, Curve::LAMBDA) + Curve::LAMBDA + 1) % Curve::ORDER == 0);
581+
// f: (x, y) → (x + λy) mod N
582+
// f(v₁) = 0
583+
static_assert(
584+
(Curve::X1 + umul(Curve::ORDER - Curve::MINUS_Y1, Curve::LAMBDA)) % Curve::ORDER == 0);
585+
// f(v₂) = 0
586+
static_assert((Curve::X2 + umul(Curve::Y2, Curve::LAMBDA)) % Curve::ORDER == 0);
587+
588+
static constexpr auto round_div = [](const auto& a) noexcept {
589+
// DET is the (v₁, v₂) matrix determinant.
590+
static constexpr auto WIDE_DET =
591+
umul(Curve::X1, Curve::Y2) + umul(Curve::X2, Curve::MINUS_Y1);
592+
static_assert(WIDE_DET <= std::numeric_limits<UIntT>::max());
593+
static constexpr auto DET = static_cast<UIntT>(WIDE_DET);
594+
static constexpr auto HALF_DET = DET / 2;
595+
596+
const auto [wide_q, r] = udivrem(a, DET);
597+
// Division reduces the quotient enough to fit into a single uint.
598+
// This can be shown at compile-time by inspecting the DET and Y2/-Y1 values.
599+
assert(wide_q < std::numeric_limits<UIntT>::max());
600+
const auto q = static_cast<UIntT>(wide_q);
601+
return q + (r > HALF_DET); // Round to nearest.
602+
};
603+
604+
// Solve a system of two equations using Cramer method.
605+
// ⎡X1 X2⎤ * ⎡b1⎤ = ⎡k⎤
606+
// ⎣Y1 Y2⎦ ⎣b2⎦ ⎣0⎦
607+
// and then approximate to the nearest integers:
608+
// b1 = ⌊ Y2·k ÷ DET⌉
609+
// b2 = ⌊-Y1·k ÷ DET⌉
610+
const auto b1 = round_div(umul(k, Curve::Y2));
611+
const auto b2 = round_div(umul(k, Curve::MINUS_Y1));
612+
613+
// k1 = k - (x1*b1 + x2*b2)
614+
const auto x1b1_x2b2 = umul(b1, Curve::X1) + umul(b2, Curve::X2);
615+
const auto [wide_k1, k1_is_neg] = subc(decltype(x1b1_x2b2){k}, x1b1_x2b2);
616+
const auto k1_abs = k1_is_neg ? -static_cast<UIntT>(wide_k1) : static_cast<UIntT>(wide_k1);
617+
618+
// k2 = 0 - (y1*b1 + y2*b2)
619+
const auto [wide_k2, k2_is_neg] = subc(umul(b1, Curve::MINUS_Y1), umul(b2, Curve::Y2));
620+
const auto k2_abs = k2_is_neg ? -static_cast<UIntT>(wide_k2) : static_cast<UIntT>(wide_k2);
621+
622+
const SignedScalar k1{k1_is_neg, k1_abs};
623+
const SignedScalar k2{k2_is_neg, k2_abs};
624+
assert(verify_scalar_decomposition<Curve>(k, k1, k2));
625+
return {k1, k2};
626+
}
627+
516628
} // namespace evmmax::ecc

test/unittests/evmmax_bn254_mul_test.cpp

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,192 @@
99
using namespace evmmax::bn254;
1010
using namespace evmone::test;
1111

12+
TEST(evmmax, bn254_decompose)
13+
{
14+
struct TestCase
15+
{
16+
uint256 k;
17+
bool sign1 = false;
18+
uint256 k1;
19+
bool sign2 = false;
20+
uint256 k2;
21+
};
22+
23+
static const std::vector<TestCase> TEST_CASES = {
24+
{0, false, 0, false, 0},
25+
{1, false, 1, false, 0},
26+
{
27+
// For k = LAMBDA, the optimal decomposition is (0, 1)
28+
// but the algorithm returns some other solution.
29+
Curve::LAMBDA,
30+
false,
31+
0x89d3256894d213e3,
32+
true,
33+
Curve::X2 - 1,
34+
},
35+
{
36+
Curve::ORDER - Curve::LAMBDA,
37+
false,
38+
0,
39+
true,
40+
1,
41+
},
42+
{
43+
2 * Curve::ORDER, // DET
44+
false,
45+
0,
46+
false,
47+
0,
48+
},
49+
{
50+
2 * Curve::ORDER - 1, // DET-1
51+
true,
52+
1,
53+
false,
54+
0,
55+
},
56+
{
57+
2 * Curve::ORDER + 1, // DET+1
58+
false,
59+
1,
60+
false,
61+
0,
62+
},
63+
{
64+
Curve::ORDER, // DET/2
65+
false,
66+
Curve::Y2,
67+
false,
68+
0x89d3256894d213e3,
69+
},
70+
{
71+
Curve::ORDER - 1, // DET/2-1
72+
false,
73+
Curve::Y2 - 1,
74+
false,
75+
0x89d3256894d213e3,
76+
},
77+
{
78+
Curve::ORDER + 1, // DET/2+1
79+
true,
80+
Curve::Y2 - 1,
81+
true,
82+
0x89d3256894d213e3,
83+
},
84+
{
85+
0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff_u256,
86+
true,
87+
0x272d9e49b8c8ca4335756fc61411a7a3_u128,
88+
false,
89+
0x3f296ebc4b455178a6a2b71572d476d6_u128,
90+
},
91+
{
92+
0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff_u256 % Curve::ORDER,
93+
true,
94+
0x272d9e49b8c8ca42aba24a5d7f3f93c0_u128,
95+
true,
96+
0x3024138ca3730883db6f04d60a7a9a52_u128,
97+
},
98+
{
99+
0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff_u256 %
100+
Curve::FIELD_PRIME,
101+
true,
102+
0x272d9e49b8c8ca3dd335f9b043dce0ca_u128,
103+
false,
104+
0x3f296ebc4b45517b57c272205aeeda45_u128,
105+
},
106+
{
107+
Curve::X1,
108+
false,
109+
0,
110+
false,
111+
Curve::MINUS_Y1,
112+
},
113+
{
114+
Curve::X2,
115+
false,
116+
Curve::X2,
117+
false,
118+
0,
119+
},
120+
{
121+
Curve::MINUS_Y1,
122+
false,
123+
Curve::MINUS_Y1,
124+
false,
125+
0,
126+
},
127+
{
128+
Curve::Y2,
129+
true,
130+
0x89d3256894d213e3,
131+
false,
132+
Curve::MINUS_Y1,
133+
},
134+
// Fuzzer finds:
135+
{
136+
0x30644e72e131a029b85045b68181585d2833e84879b9709143e1fd91c6ea5404_u256,
137+
true,
138+
0x6f4d8248eeb859fd0be4d9563b36d108_u128,
139+
true,
140+
0x89d3256894d213e3,
141+
},
142+
{
143+
0x30644e72e131a029b85045b68181585d9781875b000000000000000000000000_u256,
144+
false,
145+
0x1cc9978e3571b0392917fddedaf4_u128,
146+
true,
147+
0x89d3256894d213e3,
148+
},
149+
{
150+
0x00b3c4d79d41a917585bfc41088d8daaa78b17e6af48a03bbfd25e8cd0364141_u256,
151+
true,
152+
0x3b770fc551d2da1732fc9bebf_u128,
153+
false,
154+
0x100000000000000,
155+
},
156+
{
157+
0x30644e72e131a02a6c151d53c32a6fb58431295107473e18805b7c56ba7d94de_u256,
158+
false,
159+
0x10000000022dfb1619c5c10e10400_u128,
160+
false,
161+
1,
162+
},
163+
{
164+
0xfffffffffc2f0000000000000000000000000000000000000000000000000000_u256,
165+
true,
166+
0x4b3beb451625cff3c66c0effee355638_u128,
167+
true,
168+
0x11e8b394e6bd3d13de2f7f7d38887a8c_u128,
169+
},
170+
{
171+
0x000000000000000059e26bcea0d48bac65a4e1a8be2302524b7e65dd65dedb2a_u256,
172+
false,
173+
0x36,
174+
true,
175+
0x44e992b44a6909f1,
176+
},
177+
};
178+
179+
static constexpr auto decompose = evmmax::ecc::decompose<Curve>;
180+
181+
for (const auto& t : TEST_CASES)
182+
{
183+
const auto& [first, second] = decompose(t.k);
184+
const auto& [sign1, k1] = first;
185+
const auto& [sign2, k2] = second;
186+
187+
SCOPED_TRACE(hex(t.k));
188+
189+
EXPECT_EQ(sign1, t.sign1);
190+
EXPECT_EQ(k1, t.k1) << hex(k1) << " != " << hex(t.k1);
191+
EXPECT_EQ(sign2, t.sign2);
192+
EXPECT_EQ(k2, t.k2) << hex(k2) << " != " << hex(t.k2);
193+
194+
EXPECT_TRUE(verify_scalar_decomposition<Curve>(t.k, first, second));
195+
}
196+
}
197+
12198
namespace
13199
{
14200
struct TestCase

0 commit comments

Comments
 (0)