Skip to content

Commit 367908c

Browse files
committed
lib: unify ICU and no-ICU TextDecoder
1 parent 0847010 commit 367908c

File tree

1 file changed

+84
-123
lines changed

1 file changed

+84
-123
lines changed

lib/internal/encoding.js

Lines changed: 84 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ const {
2525
ERR_INVALID_THIS,
2626
ERR_NO_ICU,
2727
} = require('internal/errors').codes;
28-
const kMethod = Symbol('method');
28+
const kSingleByte = Symbol('method');
2929
const kHandle = Symbol('handle');
3030
const kFlags = Symbol('flags');
3131
const kEncoding = Symbol('encoding');
@@ -447,166 +447,127 @@ function parseInput(input) {
447447
}
448448
}
449449

450-
const TextDecoder =
451-
internalBinding('config').hasIntl ?
452-
makeTextDecoderICU() :
453-
makeTextDecoderJS();
450+
const { hasIntl } = internalBinding('config');
454451

455-
function makeTextDecoderICU() {
456-
const {
452+
let _decode, getConverter;
453+
if (hasIntl) {
454+
;({
457455
decode: _decode,
458456
getConverter,
459-
} = internalBinding('icu');
457+
} = internalBinding('icu'));
458+
}
460459

461-
class TextDecoder {
462-
constructor(encoding = 'utf-8', options = kEmptyObject) {
463-
encoding = `${encoding}`;
464-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
460+
const kBOMSeen = Symbol('BOM seen');
465461

466-
const enc = getEncodingFromLabel(encoding);
467-
if (enc === undefined)
468-
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
462+
let StringDecoder;
463+
function lazyStringDecoder() {
464+
if (StringDecoder === undefined)
465+
({ StringDecoder } = require('string_decoder'));
466+
return StringDecoder;
467+
}
469468

470-
let flags = 0;
471-
if (options !== null) {
472-
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
473-
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
474-
}
469+
class TextDecoder {
470+
constructor(encoding = 'utf-8', options = kEmptyObject) {
471+
encoding = `${encoding}`;
472+
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
475473

476-
this[kDecoder] = true;
477-
this[kFlags] = flags;
478-
this[kEncoding] = enc;
479-
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
480-
this[kFatal] = Boolean(options?.fatal);
481-
// Only support fast path for UTF-8.
482-
this[kUTF8FastPath] = enc === 'utf-8';
483-
this[kHandle] = undefined;
484-
this[kMethod] = undefined;
485-
486-
if (isSinglebyteEncoding(this.encoding)) {
487-
this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]);
488-
} else if (!this[kUTF8FastPath]) {
489-
this.#prepareConverter();
490-
}
474+
const enc = getEncodingFromLabel(encoding);
475+
if (enc === undefined)
476+
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
477+
478+
let flags = 0;
479+
if (options !== null) {
480+
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
481+
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
491482
}
492483

493-
#prepareConverter() {
494-
if (this[kHandle] !== undefined) return;
484+
this[kDecoder] = true;
485+
this[kFlags] = flags;
486+
this[kEncoding] = enc;
487+
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
488+
this[kFatal] = Boolean(options?.fatal);
489+
// Only support fast path for UTF-8.
490+
this[kUTF8FastPath] = enc === 'utf-8';
491+
this[kHandle] = undefined;
492+
this[kSingleByte] = undefined; // Does not care about streaming or BOM
493+
494+
if (isSinglebyteEncoding(enc)) {
495+
this[kSingleByte] = createSinglebyteDecoder(enc, this[kFatal]);
496+
} else if (!this[kUTF8FastPath]) {
497+
this.#prepareConverter();
498+
}
499+
}
500+
501+
#prepareConverter() {
502+
if (this[kHandle] !== undefined) return;
503+
if (hasIntl) {
495504
let icuEncoding = this[kEncoding];
496505
if (icuEncoding === 'gbk') icuEncoding = 'gb18030'; // 10.1.1. GBK's decoder is gb18030's decoder
497506
const handle = getConverter(icuEncoding, this[kFlags]);
498507
if (handle === undefined)
499508
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
500509
this[kHandle] = handle;
510+
} else {
511+
if (this.encoding !== 'utf-8' && this.encoding !== 'utf-16le') {
512+
throw new ERR_ENCODING_NOT_SUPPORTED(`${this.encoding}`);
513+
}
514+
515+
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
516+
// StringDecoder will normalize WHATWG encoding to Node.js encoding.
517+
this[kHandle] = new (lazyStringDecoder())(this.encoding);
518+
this[kBOMSeen] = false;
501519
}
520+
}
502521

503-
decode(input = empty, options = kEmptyObject) {
504-
validateDecoder(this);
505-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
522+
decode(input = empty, options = kEmptyObject) {
523+
validateDecoder(this);
524+
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
506525

507-
if (this[kMethod]) return this[kMethod](input);
526+
if (this[kSingleByte]) return this[kSingleByte](input);
508527

509-
this[kUTF8FastPath] &&= !(options?.stream);
528+
this[kUTF8FastPath] &&= !(options?.stream);
510529

511-
if (this[kUTF8FastPath]) {
512-
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
513-
}
530+
if (this[kUTF8FastPath]) {
531+
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
532+
}
514533

515-
this.#prepareConverter();
534+
this.#prepareConverter();
516535

536+
if (hasIntl) {
517537
let flags = 0;
518538
if (options !== null)
519539
flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
520540

521541
return _decode(this[kHandle], input, flags, this.encoding);
522542
}
523-
}
524543

525-
return TextDecoder;
526-
}
527-
528-
function makeTextDecoderJS() {
529-
let StringDecoder;
530-
function lazyStringDecoder() {
531-
if (StringDecoder === undefined)
532-
({ StringDecoder } = require('string_decoder'));
533-
return StringDecoder;
534-
}
535-
536-
const kBOMSeen = Symbol('BOM seen');
537-
538-
function hasConverter(encoding) {
539-
return encoding === 'utf-8' || encoding === 'utf-16le';
540-
}
541-
542-
class TextDecoder {
543-
constructor(encoding = 'utf-8', options = kEmptyObject) {
544-
encoding = `${encoding}`;
545-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
546-
547-
const enc = getEncodingFromLabel(encoding);
548-
if (enc === undefined)
549-
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
550-
551-
let flags = 0;
552-
if (options !== null) {
553-
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
554-
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
555-
}
556-
557-
this[kDecoder] = true;
558-
this[kFlags] = flags;
559-
this[kEncoding] = enc;
560-
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
561-
this[kFatal] = Boolean(options?.fatal);
544+
if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
562545
this[kBOMSeen] = false;
563-
this[kMethod] = undefined;
564-
565-
if (isSinglebyteEncoding(enc)) {
566-
this[kMethod] = createSinglebyteDecoder(enc, this[kFatal]);
567-
} else {
568-
if (!hasConverter(enc)) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
569-
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
570-
// StringDecoder will normalize WHATWG encoding to Node.js encoding.
571-
this[kHandle] = new (lazyStringDecoder())(enc);
572-
}
573546
}
574547

575-
decode(input = empty, options = kEmptyObject) {
576-
validateDecoder(this);
577-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
578-
579-
if (this[kMethod]) return this[kMethod](input);
548+
if (options !== null && options.stream) {
549+
this[kFlags] &= ~CONVERTER_FLAGS_FLUSH;
550+
} else {
551+
this[kFlags] |= CONVERTER_FLAGS_FLUSH;
552+
}
580553

581-
if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
582-
this[kBOMSeen] = false;
583-
}
554+
input = parseInput(input);
555+
let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ?
556+
this[kHandle].end(input) :
557+
this[kHandle].write(input);
584558

585-
if (options !== null && options.stream) {
586-
this[kFlags] &= ~CONVERTER_FLAGS_FLUSH;
587-
} else {
588-
this[kFlags] |= CONVERTER_FLAGS_FLUSH;
559+
if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
560+
// If the very first result in the stream is a BOM, and we are not
561+
// explicitly told to ignore it, then we discard it.
562+
if (result[0] === '\ufeff') {
563+
result = StringPrototypeSlice(result, 1);
589564
}
565+
this[kBOMSeen] = true;
566+
}
590567

591-
input = parseInput(input);
592-
let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ?
593-
this[kHandle].end(input) :
594-
this[kHandle].write(input);
595-
596-
if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
597-
// If the very first result in the stream is a BOM, and we are not
598-
// explicitly told to ignore it, then we discard it.
599-
if (result[0] === '\ufeff') {
600-
result = StringPrototypeSlice(result, 1);
601-
}
602-
this[kBOMSeen] = true;
603-
}
568+
return result;
604569

605-
return result;
606-
}
607570
}
608-
609-
return TextDecoder;
610571
}
611572

612573
// Mix in some shared properties.

0 commit comments

Comments
 (0)