Skip to content

Commit 89a422f

Browse files
committed
improve
1 parent 8444dd5 commit 89a422f

File tree

1 file changed

+26
-26
lines changed

1 file changed

+26
-26
lines changed

cp-algo/math/sieve.hpp

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -134,50 +134,50 @@ namespace cp_algo::math {
134134
uint32_t wl = l / width;
135135
uint32_t wr = (r + width - 1) / width;
136136
uint32_t N = (uint32_t)wheel.mask.words;
137-
138-
for (uint32_t i = wl; i < wr; i += N) {
139-
uint32_t block = std::min(N, wr - i);
140-
uint32_t j = 0;
141-
for (; j + 4 <= block; j += 4) {
142-
auto &p_vec = vector_cast<u64x4>(prime.word(i + j));
143-
auto m_vec = vector_cast<const u64x4>(wheel.mask.word(j));
144-
p_vec &= m_vec;
145-
}
146-
for (; j < block; j++) {
147-
prime.word(i + j) &= wheel.mask.word(j);
137+
auto loop = [&](uint32_t i, uint32_t block) {
138+
auto p_ptr = std::assume_aligned<32>(&prime.word(i));
139+
auto m_ptr = std::assume_aligned<32>(&wheel.mask.word(0));
140+
#pragma GCC unroll coprime
141+
for (uint32_t j = 0; j < block; j++) {
142+
p_ptr[j] &= m_ptr[j];
148143
}
144+
};
145+
while (wl + N <= wr) {
146+
loop(wl, N);
147+
wl += N;
149148
}
149+
loop(wl, wr - wl);
150150
}
151151

152152
template <class BitArray>
153153
constexpr void sieve210(BitArray& prime, uint32_t l, uint32_t r, size_t i, int state) {
154-
static const auto [ord_step, step_sum] = []() {
155-
big_vector<std::array<uint32_t, 2 * coprime>> ord_steps(num_primes);
156-
big_vector<uint32_t> sums(num_primes);
154+
static const auto ord_step = []() {
155+
std::array<std::array<uint32_t, 2 * coprime>, num_primes> ord_steps;
157156
for (uint32_t i = 0; i < size(sqrt_primes); i++) {
158157
auto p = sqrt_primes[i];
158+
auto &ords = ord_steps[i];
159+
auto last = to_ord(p);
159160
for(uint32_t j = 0; j < coprime; j++) {
160-
ord_steps[i][j] = to_ord(p * (res210[j] + gap210[j])) - to_ord(p * res210[j]);
161-
}
162-
sums[i] = std::ranges::fold_left(ord_steps[i], 0u, std::plus{});
163-
for(uint32_t j = 0; j < coprime; j++) {
164-
ord_steps[i][j + coprime] = ord_steps[i][j];
161+
auto next = to_ord(p * (res210[j] + gap210[j]));
162+
ords[j] = ords[j + coprime] = next - last;
163+
last = next;
165164
}
166165
}
167-
return std::pair{ord_steps, sums};
166+
return ord_steps;
168167
}();
169-
while (l + step_sum[i] <= r) {
168+
auto advance = [&]() {
169+
prime.reset(std::exchange(l, l + ord_step[i][state++]));
170+
};
171+
uint32_t p = sqrt_primes[i];
172+
while (l + p * coprime <= r) {
170173
#pragma GCC unroll coprime
171174
for (size_t j = 0; j < coprime; j++) {
172-
prime.reset(l);
173-
l += ord_step[i][state++];
175+
advance();
174176
}
175177
state -= coprime;
176178
}
177179
while (l < r) {
178-
prime.reset(l);
179-
l += ord_step[i][state++];
180-
state = state == coprime ? 0 : state;
180+
advance();
181181
}
182182
}
183183

0 commit comments

Comments
 (0)