Skip to content

Commit 9acf475

Browse files
committed
deps: V8: cherry-pick c135d5e81f82
Original commit message: [api][strings] Optimize ContainsOnlyOneByte with SIMD Add Highway-based SIMD implementation for checking if strings contain only one-byte (Latin-1) characters. Highway provides portable SIMD abstraction across different architectures with automatic target selection at runtime. The SIMD implementation processes 8 uint16_t values (128 bits) at once,checking if any character has the high byte set. This provides significant speedup over the previous implementation which processed 2-4 values per iteration and only checked every 16 iterations. The optimization applies to both String::ContainsOnlyOneByte() in the public API and the internal IsOnly8Bit() helper used during string hashing. This improves performance for string validation, encoding decisions, and UTF-8/UTF-16 conversion operations. Additionally, it resolves a TODO by leszeks. Change-Id: I41f519339fb96f3bf3f4fa30283f84ccbb5115d0 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/7159233 Reviewed-by: Erik Corry <erikcorry@chromium.org> Commit-Queue: Erik Corry <erikcorry@chromium.org> Reviewed-by: Leszek Swirski <leszeks@chromium.org> Cr-Commit-Position: refs/heads/main@{#104120} Refs: v8/v8@c135d5e
1 parent 1d41c8f commit 9acf475

File tree

5 files changed

+48
-64
lines changed

5 files changed

+48
-64
lines changed

common.gypi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838

3939
# Reset this number to 0 on major V8 upgrades.
4040
# Increment by one for each non-official patch applied to deps/v8.
41-
'v8_embedder_string': '-node.13',
41+
'v8_embedder_string': '-node.14',
4242

4343
##### V8 defaults for Node.js #####
4444

deps/v8/src/api/api.cc

Lines changed: 3 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@
159159
#include "src/objects/intl-objects.h"
160160
#endif // V8_INTL_SUPPORT
161161

162+
#include "src/strings/string-hasher-inl.h"
163+
162164
#if V8_OS_LINUX || V8_OS_DARWIN || V8_OS_FREEBSD
163165
#include <signal.h>
164166
#include <unistd.h>
@@ -5575,28 +5577,6 @@ bool String::IsOneByte() const {
55755577
return Utils::OpenDirectHandle(this)->IsOneByteRepresentation();
55765578
}
55775579

5578-
// Helpers for ContainsOnlyOneByteHelper
5579-
template <size_t size>
5580-
struct OneByteMask;
5581-
template <>
5582-
struct OneByteMask<4> {
5583-
static const uint32_t value = 0xFF00FF00;
5584-
};
5585-
template <>
5586-
struct OneByteMask<8> {
5587-
static const uint64_t value = 0xFF00'FF00'FF00'FF00;
5588-
};
5589-
static const uintptr_t kOneByteMask = OneByteMask<sizeof(uintptr_t)>::value;
5590-
static const uintptr_t kAlignmentMask = sizeof(uintptr_t) - 1;
5591-
static inline bool Unaligned(const uint16_t* chars) {
5592-
return reinterpret_cast<const uintptr_t>(chars) & kAlignmentMask;
5593-
}
5594-
5595-
static inline const uint16_t* Align(const uint16_t* chars) {
5596-
return reinterpret_cast<uint16_t*>(reinterpret_cast<uintptr_t>(chars) &
5597-
~kAlignmentMask);
5598-
}
5599-
56005580
class ContainsOnlyOneByteHelper {
56015581
public:
56025582
ContainsOnlyOneByteHelper() : is_one_byte_(true) {}
@@ -5613,35 +5593,7 @@ class ContainsOnlyOneByteHelper {
56135593
// Nothing to do.
56145594
}
56155595
void VisitTwoByteString(const uint16_t* chars, int length) {
5616-
// Accumulated bits.
5617-
uintptr_t acc = 0;
5618-
// Align to uintptr_t.
5619-
const uint16_t* end = chars + length;
5620-
while (Unaligned(chars) && chars != end) {
5621-
acc |= *chars++;
5622-
}
5623-
// Read word aligned in blocks,
5624-
// checking the return value at the end of each block.
5625-
const uint16_t* aligned_end = Align(end);
5626-
const int increment = sizeof(uintptr_t) / sizeof(uint16_t);
5627-
const int inner_loops = 16;
5628-
while (chars + inner_loops * increment < aligned_end) {
5629-
for (int i = 0; i < inner_loops; i++) {
5630-
acc |= *reinterpret_cast<const uintptr_t*>(chars);
5631-
chars += increment;
5632-
}
5633-
// Check for early return.
5634-
if ((acc & kOneByteMask) != 0) {
5635-
is_one_byte_ = false;
5636-
return;
5637-
}
5638-
}
5639-
// Read the rest.
5640-
while (chars != end) {
5641-
acc |= *chars++;
5642-
}
5643-
// Check result.
5644-
if ((acc & kOneByteMask) != 0) is_one_byte_ = false;
5596+
is_one_byte_ = internal::detail::IsOnly8Bit(chars, length);
56455597
}
56465598

56475599
private:

deps/v8/src/strings/string-hasher-inl.h

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,6 @@
1111
#include "src/common/globals.h"
1212
#include "src/utils/utils.h"
1313

14-
#ifdef __SSE2__
15-
#include <emmintrin.h>
16-
#elif defined(__ARM_NEON__)
17-
#include <arm_neon.h>
18-
#endif
19-
2014
// Comment inserted to prevent header reordering.
2115
#include <type_traits>
2216

@@ -43,14 +37,18 @@ uint32_t ConvertRawHashToUsableHash(T raw_hash) {
4337
}
4438

4539
V8_INLINE bool IsOnly8Bit(const uint16_t* chars, unsigned len) {
46-
// TODO(leszeks): This could be SIMD for efficiency on large strings, if we
47-
// need it.
48-
for (unsigned i = 0; i < len; ++i) {
49-
if (chars[i] > 255) {
50-
return false;
40+
// For small strings, use a simple scalar loop to avoid SIMD overhead.
41+
// Threshold of 16 is chosen to balance setup cost vs benefit.
42+
if (len <= 16) {
43+
for (unsigned i = 0; i < len; i++) {
44+
if (chars[i] > 0xFF) {
45+
return false;
46+
}
5147
}
48+
return true;
5249
}
53-
return true;
50+
// For larger strings, use the non-inlined SIMD implementation.
51+
return IsOnly8BitSIMD(chars, len);
5452
}
5553

5654
V8_INLINE uint64_t GetRapidHash(const uint8_t* chars, uint32_t length,

deps/v8/src/strings/string-hasher.cc

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
#include "src/strings/string-hasher.h"
77

8+
#include "hwy/highway.h"
89
#include "src/strings/string-hasher-inl.h"
910

1011
namespace v8::internal {
@@ -73,6 +74,33 @@ uint64_t HashConvertingTo8Bit(const uint16_t* chars, uint32_t length,
7374
return rapidhash<ConvertTo8BitHashReader>(
7475
reinterpret_cast<const uint8_t*>(chars), length, seed, secret);
7576
}
77+
78+
bool IsOnly8BitSIMD(const uint16_t* chars, unsigned len) {
79+
namespace hw = hwy::HWY_NAMESPACE;
80+
hw::FixedTag<uint16_t, 8> tag;
81+
const size_t stride = hw::Lanes(tag);
82+
const auto high_byte_mask = hw::Set(tag, static_cast<uint16_t>(0xFF00));
83+
const auto zero = hw::Zero(tag);
84+
85+
const uint16_t* end = chars + len;
86+
while (chars + stride <= end) {
87+
const auto data = hw::LoadU(tag, chars);
88+
const auto high_bytes = hw::And(data, high_byte_mask);
89+
const auto cmp = hw::Eq(high_bytes, zero);
90+
if (!hw::AllTrue(tag, cmp)) {
91+
return false;
92+
}
93+
chars += stride;
94+
}
95+
// Handle remaining characters.
96+
while (chars < end) {
97+
if (*chars > 0xFF) {
98+
return false;
99+
}
100+
chars++;
101+
}
102+
return true;
103+
}
76104
} // namespace detail
77105

78106
} // namespace v8::internal

deps/v8/src/strings/string-hasher.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ class Vector;
1717

1818
namespace internal {
1919

20+
namespace detail {
21+
// Non-inlined SIMD implementation for checking if a uint16_t string contains
22+
// only Latin1 characters. Used by the inline IsOnly8Bit wrapper.
23+
V8_EXPORT_PRIVATE bool IsOnly8BitSIMD(const uint16_t* chars, unsigned len);
24+
} // namespace detail
25+
2026
// A simple incremental string hasher. Slow but allows for special casing each
2127
// individual character.
2228
class RunningStringHasher final {

0 commit comments

Comments
 (0)