diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index 1506442f704ee1..7683d205aa6a3e 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -379,6 +379,8 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo& args) { return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA( env->isolate(), "The encoded data was not valid for encoding utf-8"); } + + // TODO(chalker): save on utf8 validity recheck in StringBytes::Encode() } if (length == 0) return args.GetReturnValue().SetEmptyString(); diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 8f6bedd63e25b3..67131eb8cf7b7a 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -539,6 +539,24 @@ MaybeLocal StringBytes::Encode(Isolate* isolate, return ExternOneByteString::NewFromCopy(isolate, buf, buflen); } + if (simdutf::validate_utf8(buf, buflen)) { + // We know that we are non-ASCII (and are unlikely Latin1), use 2-byte + // In the most likely case of valid UTF-8, we can use this fast impl + size_t u16size = simdutf::utf16_length_from_utf8(buf, buflen); + if (u16size > static_cast(v8::String::kMaxLength)) { + isolate->ThrowException(ERR_STRING_TOO_LONG(isolate)); + return MaybeLocal(); + } + uint16_t* dst = node::UncheckedMalloc(u16size); + if (u16size != 0 && dst == nullptr) { + THROW_ERR_MEMORY_ALLOCATION_FAILED(isolate); + return MaybeLocal(); + } + size_t utf16len = simdutf::convert_valid_utf8_to_utf16( + buf, buflen, reinterpret_cast(dst)); + return ExternTwoByteString::New(isolate, dst, utf16len); + } + val = String::NewFromUtf8(isolate, buf, v8::NewStringType::kNormal, buflen); Local str;