Skip to content

Commit b2e1ece

Browse files
committed
lib: unify ICU and no-ICU TextDecoder
1 parent 70ec5c0 commit b2e1ece

File tree

1 file changed

+85
-127
lines changed

1 file changed

+85
-127
lines changed

lib/internal/encoding.js

Lines changed: 85 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ const {
2323
ERR_INVALID_THIS,
2424
ERR_NO_ICU,
2525
} = require('internal/errors').codes;
26-
const kMethod = Symbol('method');
26+
const kSingleByte = Symbol('method');
2727
const kHandle = Symbol('handle');
2828
const kFlags = Symbol('flags');
2929
const kEncoding = Symbol('encoding');
@@ -52,6 +52,8 @@ const {
5252
validateObject,
5353
kValidateObjectAllowObjectsAndNull,
5454
} = require('internal/validators');
55+
56+
const { hasIntl } = internalBinding('config');
5557
const binding = internalBinding('encoding_binding');
5658
const {
5759
encodeInto,
@@ -405,166 +407,122 @@ function parseInput(input) {
405407
}
406408
}
407409

408-
const TextDecoder =
409-
internalBinding('config').hasIntl ?
410-
makeTextDecoderICU() :
411-
makeTextDecoderJS();
412-
413-
function makeTextDecoderICU() {
414-
const {
410+
let _decode, getConverter;
411+
if (hasIntl) {
412+
;({
415413
decode: _decode,
416414
getConverter,
417-
} = internalBinding('icu');
415+
} = internalBinding('icu'));
416+
}
418417

419-
class TextDecoder {
420-
constructor(encoding = 'utf-8', options = kEmptyObject) {
421-
encoding = `${encoding}`;
422-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
418+
const kBOMSeen = Symbol('BOM seen');
423419

424-
const enc = getEncodingFromLabel(encoding);
425-
if (enc === undefined)
426-
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
420+
let StringDecoder;
421+
function lazyStringDecoder() {
422+
if (StringDecoder === undefined)
423+
({ StringDecoder } = require('string_decoder'));
424+
return StringDecoder;
425+
}
427426

428-
let flags = 0;
429-
if (options !== null) {
430-
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
431-
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
432-
}
427+
class TextDecoder {
428+
constructor(encoding = 'utf-8', options = kEmptyObject) {
429+
encoding = `${encoding}`;
430+
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
433431

434-
this[kDecoder] = true;
435-
this[kFlags] = flags;
436-
this[kEncoding] = enc;
437-
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
438-
this[kFatal] = Boolean(options?.fatal);
439-
// Only support fast path for UTF-8.
440-
this[kUTF8FastPath] = enc === 'utf-8';
441-
this[kHandle] = undefined;
442-
this[kMethod] = undefined;
443-
444-
if (isSinglebyteEncoding(this.encoding)) {
445-
this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]);
446-
} else if (!this[kUTF8FastPath]) {
447-
this.#prepareConverter();
448-
}
432+
const enc = getEncodingFromLabel(encoding);
433+
if (enc === undefined)
434+
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
435+
436+
let flags = 0;
437+
if (options !== null) {
438+
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
439+
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
449440
}
450441

451-
#prepareConverter() {
452-
if (this[kHandle] !== undefined) return;
442+
this[kDecoder] = true;
443+
this[kFlags] = flags;
444+
this[kEncoding] = enc;
445+
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
446+
this[kFatal] = Boolean(options?.fatal);
447+
// Only support fast path for UTF-8.
448+
this[kUTF8FastPath] = enc === 'utf-8';
449+
this[kHandle] = undefined;
450+
this[kSingleByte] = undefined; // Does not care about streaming or BOM
451+
452+
if (isSinglebyteEncoding(enc)) {
453+
this[kSingleByte] = createSinglebyteDecoder(enc, this[kFatal]);
454+
} else if (!this[kUTF8FastPath]) {
455+
this.#prepareConverter();
456+
}
457+
}
458+
459+
#prepareConverter() {
460+
if (this[kHandle] !== undefined) return;
461+
if (hasIntl) {
453462
let icuEncoding = this[kEncoding];
454463
if (icuEncoding === 'gbk') icuEncoding = 'gb18030'; // 10.1.1. GBK's decoder is gb18030's decoder
455464
const handle = getConverter(icuEncoding, this[kFlags]);
456465
if (handle === undefined)
457466
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
458467
this[kHandle] = handle;
459-
}
460-
461-
decode(input = empty, options = kEmptyObject) {
462-
validateDecoder(this);
463-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
464-
465-
if (this[kMethod]) return this[kMethod](parseInput(input));
466-
467-
this[kUTF8FastPath] &&= !(options?.stream);
468-
469-
if (this[kUTF8FastPath]) {
470-
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
468+
} else {
469+
if (this.encoding !== 'utf-8' && this.encoding !== 'utf-16le') {
470+
throw new ERR_ENCODING_NOT_SUPPORTED(`${this.encoding}`);
471471
}
472472

473-
this.#prepareConverter();
474-
475-
let flags = 0;
476-
if (options !== null)
477-
flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
478-
479-
return _decode(this[kHandle], input, flags, this.encoding);
473+
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
474+
this[kHandle] = new (lazyStringDecoder())(this.encoding);
475+
this[kBOMSeen] = false;
480476
}
481477
}
482478

483-
return TextDecoder;
484-
}
479+
decode(input = empty, options = kEmptyObject) {
480+
validateDecoder(this);
481+
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
485482

486-
function makeTextDecoderJS() {
487-
let StringDecoder;
488-
function lazyStringDecoder() {
489-
if (StringDecoder === undefined)
490-
({ StringDecoder } = require('string_decoder'));
491-
return StringDecoder;
492-
}
483+
if (this[kSingleByte]) return this[kSingleByte](parseInput(input));
493484

494-
const kBOMSeen = Symbol('BOM seen');
485+
const stream = options?.stream;
486+
if (this[kUTF8FastPath]) {
487+
if (!stream) return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
488+
this[kUTF8FastPath] = false;
489+
}
495490

496-
function hasConverter(encoding) {
497-
return encoding === 'utf-8' || encoding === 'utf-16le';
498-
}
491+
this.#prepareConverter();
499492

500-
class TextDecoder {
501-
constructor(encoding = 'utf-8', options = kEmptyObject) {
502-
encoding = `${encoding}`;
503-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
493+
if (hasIntl) {
494+
const flags = stream ? 0 : CONVERTER_FLAGS_FLUSH;
495+
return _decode(this[kHandle], input, flags, this.encoding);
496+
}
504497

505-
const enc = getEncodingFromLabel(encoding);
506-
if (enc === undefined)
507-
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
498+
input = parseInput(input); // Can throw, has to come before state changes
508499

509-
let flags = 0;
510-
if (options !== null) {
511-
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
512-
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
513-
}
514-
515-
this[kDecoder] = true;
516-
this[kFlags] = flags;
517-
this[kEncoding] = enc;
518-
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
519-
this[kFatal] = Boolean(options?.fatal);
500+
if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
520501
this[kBOMSeen] = false;
521-
this[kMethod] = undefined;
522-
523-
if (isSinglebyteEncoding(enc)) {
524-
this[kMethod] = createSinglebyteDecoder(enc, this[kFatal]);
525-
} else {
526-
if (!hasConverter(enc)) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
527-
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
528-
// StringDecoder will normalize WHATWG encoding to Node.js encoding.
529-
this[kHandle] = new (lazyStringDecoder())(enc);
530-
}
531502
}
532503

533-
decode(input = empty, options = kEmptyObject) {
534-
validateDecoder(this);
535-
input = parseInput(input);
536-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
537-
538-
if (this[kMethod]) return this[kMethod](input);
504+
if (stream) {
505+
this[kFlags] &= ~CONVERTER_FLAGS_FLUSH;
506+
} else {
507+
this[kFlags] |= CONVERTER_FLAGS_FLUSH;
508+
}
539509

540-
if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
541-
this[kBOMSeen] = false;
542-
}
510+
let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ?
511+
this[kHandle].end(input) :
512+
this[kHandle].write(input);
543513

544-
if (options !== null && options.stream) {
545-
this[kFlags] &= ~CONVERTER_FLAGS_FLUSH;
546-
} else {
547-
this[kFlags] |= CONVERTER_FLAGS_FLUSH;
514+
if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
515+
// If the very first result in the stream is a BOM, and we are not
516+
// explicitly told to ignore it, then we discard it.
517+
if (result[0] === '\ufeff') {
518+
result = StringPrototypeSlice(result, 1);
548519
}
520+
this[kBOMSeen] = true;
521+
}
549522

550-
let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ?
551-
this[kHandle].end(input) :
552-
this[kHandle].write(input);
553-
554-
if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
555-
// If the very first result in the stream is a BOM, and we are not
556-
// explicitly told to ignore it, then we discard it.
557-
if (result[0] === '\ufeff') {
558-
result = StringPrototypeSlice(result, 1);
559-
}
560-
this[kBOMSeen] = true;
561-
}
523+
return result;
562524

563-
return result;
564-
}
565525
}
566-
567-
return TextDecoder;
568526
}
569527

570528
// Mix in some shared properties.

0 commit comments

Comments
 (0)