From a215fe8e54c10db2dcc60b8004fb708d4713ff59 Mon Sep 17 00:00:00 2001 From: Dan Carney Date: Fri, 21 Nov 2025 08:29:57 +0000 Subject: [PATCH] deps: V8: cherry-pick 67507b2a88f4 Original commit message: Reland "use highway to check and copy leading ascii" This is a reland of commit a3e84e5f01540cec142f4d4f41f1921373c220e5 Original change's description: > use highway to check and copy leading ascii > > Change-Id: I065532aeeee95273821aa1f25b5ffc5c5c23cbf1 > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/7172479 > Reviewed-by: Patrick Thier > Reviewed-by: Toon Verwaest > Commit-Queue: Dan Carney > Cr-Commit-Position: refs/heads/main@{#103820} Change-Id: I43b4ad18817eb52b701e112d2d0a5f685374ae1f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/7184338 Reviewed-by: Toon Verwaest Reviewed-by: Patrick Thier Commit-Queue: Dan Carney Cr-Commit-Position: refs/heads/main@{#103865} Refs: https://github.com/v8/v8/commit/67507b2a88f4cf6eb18ed351661d218e498fa3e7 --- common.gypi | 2 +- deps/v8/src/strings/unicode-inl.h | 20 ++++------------ deps/v8/src/strings/unicode.cc | 39 +++++++++++++++++++++++++++++++ deps/v8/src/strings/unicode.h | 10 ++++---- 4 files changed, 49 insertions(+), 22 deletions(-) diff --git a/common.gypi b/common.gypi index a4825c5429d761..c58aa7fd89305d 100644 --- a/common.gypi +++ b/common.gypi @@ -38,7 +38,7 @@ # Reset this number to 0 on major V8 upgrades. # Increment by one for each non-official patch applied to deps/v8. - 'v8_embedder_string': '-node.12', + 'v8_embedder_string': '-node.13', ##### V8 defaults for Node.js ##### diff --git a/deps/v8/src/strings/unicode-inl.h b/deps/v8/src/strings/unicode-inl.h index 782ff2ab500ee1..25f3d0375e7f1a 100644 --- a/deps/v8/src/strings/unicode-inl.h +++ b/deps/v8/src/strings/unicode-inl.h @@ -206,16 +206,6 @@ bool Utf8::IsValidCharacter(uchar c) { c != kBadChar); } -template <> -bool Utf8::IsAsciiOneByteString(const uint8_t* buffer, size_t size) { - return simdutf::validate_ascii(reinterpret_cast(buffer), size); -} - -template <> -bool Utf8::IsAsciiOneByteString(const uint16_t* buffer, size_t size) { - return false; -} - template Utf8::EncodingResult Utf8::Encode(v8::base::Vector string, char* buffer, size_t capacity, @@ -234,12 +224,10 @@ Utf8::EncodingResult Utf8::Encode(v8::base::Vector string, size_t read_index = 0; if (kSourceIsOneByte) { size_t writeable = std::min(string.size(), content_capacity); - // Just memcpy when possible. - if (writeable > 0 && Utf8::IsAsciiOneByteString(characters, writeable)) { - memcpy(buffer, characters, writeable); - read_index = writeable; - write_index = writeable; - } + size_t ascii_length = + Utf8::WriteLeadingAscii(characters, buffer, writeable); + read_index = ascii_length; + write_index = ascii_length; } uint16_t last = Utf16::kNoPreviousCharacter; for (; read_index < string.size(); read_index++) { diff --git a/deps/v8/src/strings/unicode.cc b/deps/v8/src/strings/unicode.cc index 3f318556787f2a..d213ea68e8ad1d 100644 --- a/deps/v8/src/strings/unicode.cc +++ b/deps/v8/src/strings/unicode.cc @@ -21,10 +21,49 @@ #include "unicode/uchar.h" #endif +#include "hwy/highway.h" #include "third_party/simdutf/simdutf.h" namespace unibrow { +template <> +size_t Utf8::WriteLeadingAscii(const uint8_t* src, char* dest, + size_t length) { + namespace hw = hwy::HWY_NAMESPACE; + const hw::ScalableTag d; + const size_t N = hw::Lanes(d); + // Don't bother with simd if the string isn't long enough. We're using 2 + // registers, so don't enter the loop unless we can iterate 2 times through. + if (length < 4 * N) { + return 0; + } + // We're checking ascii by checking the sign bit so make the strings signed. + const int8_t* src_s = reinterpret_cast(src); + int8_t* dst_s = reinterpret_cast(dest); + size_t i = 0; + DCHECK_GE(length, 2 * N); + for (; i <= length - 2 * N; i += 2 * N) { + const auto v0 = hw::LoadU(d, src_s + i); + const auto v1 = hw::LoadU(d, src_s + i + N); + const auto combined = hw::Or(v0, v1); + bool is_ascii = hw::AllTrue(d, hw::Ge(combined, hw::Zero(d))); + if (is_ascii) { + hw::StoreU(v0, d, dst_s + i); + hw::StoreU(v1, d, dst_s + i + N); + } else { + break; + } + } + return i; +} + +template <> +size_t Utf8::WriteLeadingAscii(const uint16_t* src, char* dest, + size_t size) { + // TODO(dcarney): this could be implemented similarly to the one byte variant + return 0; +} + #ifndef V8_INTL_SUPPORT static const int kStartBit = (1 << 30); static const int kChunkBits = (1 << 13); diff --git a/deps/v8/src/strings/unicode.h b/deps/v8/src/strings/unicode.h index e8e9cedceeadc9..1d6be916f773ab 100644 --- a/deps/v8/src/strings/unicode.h +++ b/deps/v8/src/strings/unicode.h @@ -213,7 +213,7 @@ class V8_EXPORT_PRIVATE Utf8 { static bool ValidateEncoding(const uint8_t* str, size_t length); template - static bool IsAsciiOneByteString(const Char* buffer, size_t size); + static size_t WriteLeadingAscii(const Char* src, char* dest, size_t size); // Encode the given characters as Utf8 into the provided output buffer. struct EncodingResult { @@ -227,12 +227,12 @@ class V8_EXPORT_PRIVATE Utf8 { }; template <> -inline bool Utf8::IsAsciiOneByteString(const uint8_t* buffer, - size_t size); +size_t Utf8::WriteLeadingAscii(const uint8_t* src, char* dest, + size_t size); template <> -inline bool Utf8::IsAsciiOneByteString(const uint16_t* buffer, - size_t size); +size_t Utf8::WriteLeadingAscii(const uint16_t* src, char* dest, + size_t size); #if V8_ENABLE_WEBASSEMBLY class V8_EXPORT_PRIVATE Wtf8 {