Skip to content

Commit 0847010

Browse files
ChALkeRmertcanaltin
andcommitted
src: move all 1-byte encodings to native
Co-authored-by: Mert Can Altin <mertgold60@gmail.com>
1 parent 70ec5c0 commit 0847010

File tree

8 files changed

+518
-160
lines changed

8 files changed

+518
-160
lines changed

lib/internal/encoding.js

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
// https://encoding.spec.whatwg.org
55

66
const {
7+
ArrayPrototypeMap,
78
Boolean,
89
ObjectDefineProperties,
910
ObjectGetOwnPropertyDescriptors,
1011
ObjectSetPrototypeOf,
1112
ObjectValues,
13+
SafeArrayIterator,
1214
SafeMap,
1315
StringPrototypeSlice,
1416
Symbol,
@@ -32,8 +34,6 @@ const kFatal = Symbol('kFatal');
3234
const kUTF8FastPath = Symbol('kUTF8FastPath');
3335
const kIgnoreBOM = Symbol('kIgnoreBOM');
3436

35-
const { isSinglebyteEncoding, createSinglebyteDecoder } = require('internal/encoding/single-byte');
36-
3737
const {
3838
getConstructorOf,
3939
customInspectSymbol: inspect,
@@ -58,6 +58,7 @@ const {
5858
encodeIntoResults,
5959
encodeUtf8String,
6060
decodeUTF8,
61+
decodeSingleByte,
6162
} = binding;
6263

6364
function validateDecoder(obj) {
@@ -71,6 +72,47 @@ const CONVERTER_FLAGS_IGNORE_BOM = 0x4;
7172

7273
const empty = new FastBuffer();
7374

75+
// Has to be synced with src/
76+
const encodingsSinglebyte = new SafeMap(new SafeArrayIterator(ArrayPrototypeMap([
77+
'ibm866',
78+
'koi8-r',
79+
'koi8-u',
80+
'macintosh',
81+
'x-mac-cyrillic',
82+
'iso-8859-2',
83+
'iso-8859-3',
84+
'iso-8859-4',
85+
'iso-8859-5',
86+
'iso-8859-6',
87+
'iso-8859-7',
88+
'iso-8859-8',
89+
'iso-8859-8-i',
90+
'iso-8859-10',
91+
'iso-8859-13',
92+
'iso-8859-14',
93+
'iso-8859-15',
94+
'iso-8859-16',
95+
'windows-874',
96+
'windows-1250',
97+
'windows-1251',
98+
'windows-1252',
99+
'windows-1253',
100+
'windows-1254',
101+
'windows-1255',
102+
'windows-1256',
103+
'windows-1257',
104+
'windows-1258',
105+
'x-user-defined', // Has to be last, special case
106+
], (e, i) => [e, i])));
107+
108+
const isSinglebyteEncoding = (enc) => encodingsSinglebyte.has(enc);
109+
110+
function createSinglebyteDecoder(encoding, fatal) {
111+
const key = encodingsSinglebyte.get(encoding);
112+
if (key === undefined) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
113+
return (buf) => decodeSingleByte(buf, key, fatal);
114+
}
115+
74116
const encodings = new SafeMap([
75117
['unicode-1-1-utf-8', 'utf-8'],
76118
['unicode11utf8', 'utf-8'],
@@ -462,7 +504,7 @@ function makeTextDecoderICU() {
462504
validateDecoder(this);
463505
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
464506

465-
if (this[kMethod]) return this[kMethod](parseInput(input));
507+
if (this[kMethod]) return this[kMethod](input);
466508

467509
this[kUTF8FastPath] &&= !(options?.stream);
468510

@@ -532,7 +574,6 @@ function makeTextDecoderJS() {
532574

533575
decode(input = empty, options = kEmptyObject) {
534576
validateDecoder(this);
535-
input = parseInput(input);
536577
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
537578

538579
if (this[kMethod]) return this[kMethod](input);
@@ -547,6 +588,7 @@ function makeTextDecoderJS() {
547588
this[kFlags] |= CONVERTER_FLAGS_FLUSH;
548589
}
549590

591+
input = parseInput(input);
550592
let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ?
551593
this[kHandle].end(input) :
552594
this[kHandle].write(input);

lib/internal/encoding/single-byte.js

Lines changed: 0 additions & 155 deletions
This file was deleted.

src/encoding_binding.cc

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "encoding_binding.h"
22
#include "ada.h"
3+
#include "encoding_singlebyte.h"
34
#include "env-inl.h"
45
#include "node_errors.h"
56
#include "node_external_reference.h"
@@ -389,6 +390,70 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
389390
}
390391
}
391392

393+
void BindingData::DecodeSingleByte(const FunctionCallbackInfo<Value>& args) {
394+
Environment* env = Environment::GetCurrent(args);
395+
396+
CHECK_GE(args.Length(), 2);
397+
Isolate* isolate = env->isolate();
398+
399+
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
400+
args[0]->IsArrayBufferView())) {
401+
return node::THROW_ERR_INVALID_ARG_TYPE(
402+
isolate,
403+
"The \"input\" argument must be an instance of SharedArrayBuffer, "
404+
"ArrayBuffer or ArrayBufferView.");
405+
}
406+
407+
static constexpr int kXUserDefined = 28; // Last one, see encoding.js
408+
409+
CHECK(args[1]->IsInt32());
410+
const int encoding = args[1].As<v8::Int32>()->Value();
411+
CHECK(encoding >= 0 && encoding <= kXUserDefined);
412+
413+
ArrayBufferViewContents<uint8_t> buffer(args[0]);
414+
const uint8_t* data = buffer.data();
415+
size_t length = buffer.length();
416+
417+
if (length == 0) return args.GetReturnValue().SetEmptyString();
418+
419+
const char* dataChar = reinterpret_cast<const char*>(data);
420+
if (!simdutf::validate_ascii_with_errors(dataChar, length).error) {
421+
Local<Value> ret;
422+
if (StringBytes::Encode(isolate, dataChar, length, LATIN1).ToLocal(&ret)) {
423+
args.GetReturnValue().Set(ret);
424+
}
425+
return;
426+
}
427+
428+
uint16_t* dst = node::UncheckedMalloc<uint16_t>(length);
429+
if (dst == nullptr) return node::THROW_ERR_MEMORY_ALLOCATION_FAILED(isolate);
430+
431+
if (encoding == kXUserDefined) {
432+
// x-user-defined
433+
for (size_t i = 0; i < length; i++) {
434+
dst[i] = data[i] >= 0x80 ? data[i] + 0xf700 : data[i];
435+
}
436+
} else {
437+
bool has_fatal = args[2]->IsTrue();
438+
439+
const uint16_t* table = tSingleByteEncodings[encoding];
440+
for (size_t i = 0; i < length; i++) dst[i] = table[data[i]];
441+
442+
const char16_t* dst16 = reinterpret_cast<char16_t*>(dst);
443+
if (has_fatal && fSingleByteEncodings[encoding] &&
444+
simdutf::find(dst16, dst16 + length, 0xfffd) != dst16 + length) {
445+
free(dst);
446+
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
447+
isolate, "The encoded data was not valid for this encoding");
448+
}
449+
}
450+
451+
Local<Value> ret;
452+
if (StringBytes::Raw(isolate, dst, length).ToLocal(&ret)) {
453+
args.GetReturnValue().Set(ret);
454+
}
455+
}
456+
392457
void BindingData::ToASCII(const FunctionCallbackInfo<Value>& args) {
393458
Environment* env = Environment::GetCurrent(args);
394459
CHECK_GE(args.Length(), 1);
@@ -421,6 +486,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
421486
SetMethod(isolate, target, "encodeInto", EncodeInto);
422487
SetMethodNoSideEffect(isolate, target, "encodeUtf8String", EncodeUtf8String);
423488
SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8);
489+
SetMethodNoSideEffect(isolate, target, "decodeSingleByte", DecodeSingleByte);
424490
SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII);
425491
SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode);
426492
}
@@ -438,6 +504,7 @@ void BindingData::RegisterTimerExternalReferences(
438504
registry->Register(EncodeInto);
439505
registry->Register(EncodeUtf8String);
440506
registry->Register(DecodeUTF8);
507+
registry->Register(DecodeSingleByte);
441508
registry->Register(ToASCII);
442509
registry->Register(ToUnicode);
443510
}

0 commit comments

Comments
 (0)