Skip to content

Commit d65e2da

Browse files
committed
lib: unify ICU and no-ICU TextDecoder
1 parent 0847010 commit d65e2da

File tree

1 file changed

+86
-128
lines changed

1 file changed

+86
-128
lines changed

lib/internal/encoding.js

Lines changed: 86 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ const {
2525
ERR_INVALID_THIS,
2626
ERR_NO_ICU,
2727
} = require('internal/errors').codes;
28-
const kMethod = Symbol('method');
28+
const kSingleByte = Symbol('method');
2929
const kHandle = Symbol('handle');
3030
const kFlags = Symbol('flags');
3131
const kEncoding = Symbol('encoding');
@@ -52,6 +52,8 @@ const {
5252
validateObject,
5353
kValidateObjectAllowObjectsAndNull,
5454
} = require('internal/validators');
55+
56+
const { hasIntl } = internalBinding('config');
5557
const binding = internalBinding('encoding_binding');
5658
const {
5759
encodeInto,
@@ -447,166 +449,122 @@ function parseInput(input) {
447449
}
448450
}
449451

450-
const TextDecoder =
451-
internalBinding('config').hasIntl ?
452-
makeTextDecoderICU() :
453-
makeTextDecoderJS();
454-
455-
function makeTextDecoderICU() {
456-
const {
452+
let _decode, getConverter;
453+
if (hasIntl) {
454+
;({
457455
decode: _decode,
458456
getConverter,
459-
} = internalBinding('icu');
457+
} = internalBinding('icu'));
458+
}
460459

461-
class TextDecoder {
462-
constructor(encoding = 'utf-8', options = kEmptyObject) {
463-
encoding = `${encoding}`;
464-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
460+
const kBOMSeen = Symbol('BOM seen');
465461

466-
const enc = getEncodingFromLabel(encoding);
467-
if (enc === undefined)
468-
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
462+
let StringDecoder;
463+
function lazyStringDecoder() {
464+
if (StringDecoder === undefined)
465+
({ StringDecoder } = require('string_decoder'));
466+
return StringDecoder;
467+
}
469468

470-
let flags = 0;
471-
if (options !== null) {
472-
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
473-
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
474-
}
469+
class TextDecoder {
470+
constructor(encoding = 'utf-8', options = kEmptyObject) {
471+
encoding = `${encoding}`;
472+
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
475473

476-
this[kDecoder] = true;
477-
this[kFlags] = flags;
478-
this[kEncoding] = enc;
479-
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
480-
this[kFatal] = Boolean(options?.fatal);
481-
// Only support fast path for UTF-8.
482-
this[kUTF8FastPath] = enc === 'utf-8';
483-
this[kHandle] = undefined;
484-
this[kMethod] = undefined;
485-
486-
if (isSinglebyteEncoding(this.encoding)) {
487-
this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]);
488-
} else if (!this[kUTF8FastPath]) {
489-
this.#prepareConverter();
490-
}
474+
const enc = getEncodingFromLabel(encoding);
475+
if (enc === undefined)
476+
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
477+
478+
let flags = 0;
479+
if (options !== null) {
480+
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
481+
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
482+
}
483+
484+
this[kDecoder] = true;
485+
this[kFlags] = flags;
486+
this[kEncoding] = enc;
487+
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
488+
this[kFatal] = Boolean(options?.fatal);
489+
// Only support fast path for UTF-8.
490+
this[kUTF8FastPath] = enc === 'utf-8';
491+
this[kHandle] = undefined;
492+
this[kSingleByte] = undefined; // Does not care about streaming or BOM
493+
494+
if (isSinglebyteEncoding(enc)) {
495+
this[kSingleByte] = createSinglebyteDecoder(enc, this[kFatal]);
496+
} else if (!this[kUTF8FastPath]) {
497+
this.#prepareConverter();
491498
}
499+
}
492500

493-
#prepareConverter() {
494-
if (this[kHandle] !== undefined) return;
501+
#prepareConverter() {
502+
if (this[kHandle] !== undefined) return;
503+
if (hasIntl) {
495504
let icuEncoding = this[kEncoding];
496505
if (icuEncoding === 'gbk') icuEncoding = 'gb18030'; // 10.1.1. GBK's decoder is gb18030's decoder
497506
const handle = getConverter(icuEncoding, this[kFlags]);
498507
if (handle === undefined)
499508
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
500509
this[kHandle] = handle;
501-
}
502-
503-
decode(input = empty, options = kEmptyObject) {
504-
validateDecoder(this);
505-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
506-
507-
if (this[kMethod]) return this[kMethod](input);
508-
509-
this[kUTF8FastPath] &&= !(options?.stream);
510-
511-
if (this[kUTF8FastPath]) {
512-
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
510+
} else {
511+
if (this.encoding !== 'utf-8' && this.encoding !== 'utf-16le') {
512+
throw new ERR_ENCODING_NOT_SUPPORTED(`${this.encoding}`);
513513
}
514514

515-
this.#prepareConverter();
516-
517-
let flags = 0;
518-
if (options !== null)
519-
flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
520-
521-
return _decode(this[kHandle], input, flags, this.encoding);
515+
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
516+
// StringDecoder will normalize WHATWG encoding to Node.js encoding.
517+
this[kHandle] = new (lazyStringDecoder())(this.encoding);
518+
this[kBOMSeen] = false;
522519
}
523520
}
524521

525-
return TextDecoder;
526-
}
527-
528-
function makeTextDecoderJS() {
529-
let StringDecoder;
530-
function lazyStringDecoder() {
531-
if (StringDecoder === undefined)
532-
({ StringDecoder } = require('string_decoder'));
533-
return StringDecoder;
534-
}
535-
536-
const kBOMSeen = Symbol('BOM seen');
522+
decode(input = empty, options = kEmptyObject) {
523+
validateDecoder(this);
524+
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
537525

538-
function hasConverter(encoding) {
539-
return encoding === 'utf-8' || encoding === 'utf-16le';
540-
}
526+
if (this[kSingleByte]) return this[kSingleByte](input);
541527

542-
class TextDecoder {
543-
constructor(encoding = 'utf-8', options = kEmptyObject) {
544-
encoding = `${encoding}`;
545-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
528+
const stream = options?.stream;
529+
if (this[kUTF8FastPath]) {
530+
if (!stream) return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
531+
this[kUTF8FastPath] = false;
532+
}
546533

547-
const enc = getEncodingFromLabel(encoding);
548-
if (enc === undefined)
549-
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
534+
this.#prepareConverter();
550535

551-
let flags = 0;
552-
if (options !== null) {
553-
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
554-
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
555-
}
536+
if (hasIntl) {
537+
const flags = stream ? 0 : CONVERTER_FLAGS_FLUSH;
538+
return _decode(this[kHandle], input, flags, this.encoding);
539+
}
556540

557-
this[kDecoder] = true;
558-
this[kFlags] = flags;
559-
this[kEncoding] = enc;
560-
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
561-
this[kFatal] = Boolean(options?.fatal);
541+
if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
562542
this[kBOMSeen] = false;
563-
this[kMethod] = undefined;
564-
565-
if (isSinglebyteEncoding(enc)) {
566-
this[kMethod] = createSinglebyteDecoder(enc, this[kFatal]);
567-
} else {
568-
if (!hasConverter(enc)) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
569-
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
570-
// StringDecoder will normalize WHATWG encoding to Node.js encoding.
571-
this[kHandle] = new (lazyStringDecoder())(enc);
572-
}
573543
}
574544

575-
decode(input = empty, options = kEmptyObject) {
576-
validateDecoder(this);
577-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
578-
579-
if (this[kMethod]) return this[kMethod](input);
545+
if (stream) {
546+
this[kFlags] &= ~CONVERTER_FLAGS_FLUSH;
547+
} else {
548+
this[kFlags] |= CONVERTER_FLAGS_FLUSH;
549+
}
580550

581-
if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
582-
this[kBOMSeen] = false;
583-
}
551+
input = parseInput(input);
552+
let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ?
553+
this[kHandle].end(input) :
554+
this[kHandle].write(input);
584555

585-
if (options !== null && options.stream) {
586-
this[kFlags] &= ~CONVERTER_FLAGS_FLUSH;
587-
} else {
588-
this[kFlags] |= CONVERTER_FLAGS_FLUSH;
556+
if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
557+
// If the very first result in the stream is a BOM, and we are not
558+
// explicitly told to ignore it, then we discard it.
559+
if (result[0] === '\ufeff') {
560+
result = StringPrototypeSlice(result, 1);
589561
}
562+
this[kBOMSeen] = true;
563+
}
590564

591-
input = parseInput(input);
592-
let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ?
593-
this[kHandle].end(input) :
594-
this[kHandle].write(input);
595-
596-
if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
597-
// If the very first result in the stream is a BOM, and we are not
598-
// explicitly told to ignore it, then we discard it.
599-
if (result[0] === '\ufeff') {
600-
result = StringPrototypeSlice(result, 1);
601-
}
602-
this[kBOMSeen] = true;
603-
}
565+
return result;
604566

605-
return result;
606-
}
607567
}
608-
609-
return TextDecoder;
610568
}
611569

612570
// Mix in some shared properties.

0 commit comments

Comments
 (0)