Skip to content

Commit fb0ca32

Browse files
committed
lib: unify ICU and no-ICU TextDecoder
1 parent 70ec5c0 commit fb0ca32

File tree

1 file changed

+84
-137
lines changed

1 file changed

+84
-137
lines changed

lib/internal/encoding.js

Lines changed: 84 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ const {
2323
ERR_INVALID_THIS,
2424
ERR_NO_ICU,
2525
} = require('internal/errors').codes;
26-
const kMethod = Symbol('method');
26+
const kSingleByte = Symbol('single-byte');
2727
const kHandle = Symbol('handle');
2828
const kFlags = Symbol('flags');
2929
const kEncoding = Symbol('encoding');
@@ -52,6 +52,8 @@ const {
5252
validateObject,
5353
kValidateObjectAllowObjectsAndNull,
5454
} = require('internal/validators');
55+
56+
const { hasIntl } = internalBinding('config');
5557
const binding = internalBinding('encoding_binding');
5658
const {
5759
encodeInto,
@@ -405,166 +407,111 @@ function parseInput(input) {
405407
}
406408
}
407409

408-
const TextDecoder =
409-
internalBinding('config').hasIntl ?
410-
makeTextDecoderICU() :
411-
makeTextDecoderJS();
412-
413-
function makeTextDecoderICU() {
414-
const {
415-
decode: _decode,
416-
getConverter,
417-
} = internalBinding('icu');
418-
419-
class TextDecoder {
420-
constructor(encoding = 'utf-8', options = kEmptyObject) {
421-
encoding = `${encoding}`;
422-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
423-
424-
const enc = getEncodingFromLabel(encoding);
425-
if (enc === undefined)
426-
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
427-
428-
let flags = 0;
429-
if (options !== null) {
430-
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
431-
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
432-
}
433-
434-
this[kDecoder] = true;
435-
this[kFlags] = flags;
436-
this[kEncoding] = enc;
437-
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
438-
this[kFatal] = Boolean(options?.fatal);
439-
// Only support fast path for UTF-8.
440-
this[kUTF8FastPath] = enc === 'utf-8';
441-
this[kHandle] = undefined;
442-
this[kMethod] = undefined;
443-
444-
if (isSinglebyteEncoding(this.encoding)) {
445-
this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]);
446-
} else if (!this[kUTF8FastPath]) {
447-
this.#prepareConverter();
448-
}
449-
}
450-
451-
#prepareConverter() {
452-
if (this[kHandle] !== undefined) return;
453-
let icuEncoding = this[kEncoding];
454-
if (icuEncoding === 'gbk') icuEncoding = 'gb18030'; // 10.1.1. GBK's decoder is gb18030's decoder
455-
const handle = getConverter(icuEncoding, this[kFlags]);
456-
if (handle === undefined)
457-
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
458-
this[kHandle] = handle;
459-
}
410+
let icuDecode, icuGetConverter;
411+
if (hasIntl) {
412+
;({
413+
decode: icuDecode,
414+
getConverter: icuGetConverter,
415+
} = internalBinding('icu'));
416+
}
460417

461-
decode(input = empty, options = kEmptyObject) {
462-
validateDecoder(this);
463-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
418+
const kBOMSeen = Symbol('BOM seen');
464419

465-
if (this[kMethod]) return this[kMethod](parseInput(input));
420+
let StringDecoder;
421+
function lazyStringDecoder() {
422+
if (StringDecoder === undefined)
423+
({ StringDecoder } = require('string_decoder'));
424+
return StringDecoder;
425+
}
466426

467-
this[kUTF8FastPath] &&= !(options?.stream);
427+
class TextDecoder {
428+
constructor(encoding = 'utf-8', options = kEmptyObject) {
429+
encoding = `${encoding}`;
430+
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
468431

469-
if (this[kUTF8FastPath]) {
470-
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
471-
}
432+
const enc = getEncodingFromLabel(encoding);
433+
if (enc === undefined)
434+
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
472435

473-
this.#prepareConverter();
474-
475-
let flags = 0;
476-
if (options !== null)
477-
flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
436+
let flags = 0;
437+
if (options !== null) {
438+
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
439+
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
440+
}
478441

479-
return _decode(this[kHandle], input, flags, this.encoding);
442+
this[kDecoder] = true;
443+
this[kFlags] = flags;
444+
this[kEncoding] = enc;
445+
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
446+
this[kFatal] = Boolean(options?.fatal);
447+
this[kUTF8FastPath] = false;
448+
this[kHandle] = undefined;
449+
this[kSingleByte] = undefined; // Does not care about streaming or BOM
450+
451+
if (enc === 'utf-8') {
452+
this[kUTF8FastPath] = true;
453+
} else if (isSinglebyteEncoding(enc)) {
454+
this[kSingleByte] = createSinglebyteDecoder(enc, this[kFatal]);
455+
} else {
456+
this.#prepareConverter(); // Need to throw early if we don't support the encoding
480457
}
481458
}
482459

483-
return TextDecoder;
484-
}
485-
486-
function makeTextDecoderJS() {
487-
let StringDecoder;
488-
function lazyStringDecoder() {
489-
if (StringDecoder === undefined)
490-
({ StringDecoder } = require('string_decoder'));
491-
return StringDecoder;
460+
#prepareConverter() {
461+
if (this[kHandle] !== undefined) return;
462+
if (hasIntl) {
463+
let icuEncoding = this[kEncoding];
464+
if (icuEncoding === 'gbk') icuEncoding = 'gb18030'; // 10.1.1. GBK's decoder is gb18030's decoder
465+
const handle = icuGetConverter(icuEncoding, this[kFlags]);
466+
if (handle === undefined)
467+
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
468+
this[kHandle] = handle;
469+
} else if (this[kEncoding] === 'utf-8' || this[kEncoding] === 'utf-16le') {
470+
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
471+
this[kHandle] = new (lazyStringDecoder())(this[kEncoding]);
472+
this[kBOMSeen] = false;
473+
} else {
474+
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
475+
}
492476
}
493477

494-
const kBOMSeen = Symbol('BOM seen');
495-
496-
function hasConverter(encoding) {
497-
return encoding === 'utf-8' || encoding === 'utf-16le';
498-
}
478+
decode(input = empty, options = kEmptyObject) {
479+
validateDecoder(this);
480+
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
499481

500-
class TextDecoder {
501-
constructor(encoding = 'utf-8', options = kEmptyObject) {
502-
encoding = `${encoding}`;
503-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
482+
if (this[kSingleByte]) return this[kSingleByte](parseInput(input));
504483

505-
const enc = getEncodingFromLabel(encoding);
506-
if (enc === undefined)
507-
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
484+
const stream = options?.stream;
485+
if (this[kUTF8FastPath]) {
486+
if (!stream) return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
487+
this[kUTF8FastPath] = false;
488+
}
508489

509-
let flags = 0;
510-
if (options !== null) {
511-
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
512-
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
513-
}
490+
this.#prepareConverter();
514491

515-
this[kDecoder] = true;
516-
this[kFlags] = flags;
517-
this[kEncoding] = enc;
518-
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
519-
this[kFatal] = Boolean(options?.fatal);
520-
this[kBOMSeen] = false;
521-
this[kMethod] = undefined;
522-
523-
if (isSinglebyteEncoding(enc)) {
524-
this[kMethod] = createSinglebyteDecoder(enc, this[kFatal]);
525-
} else {
526-
if (!hasConverter(enc)) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
527-
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
528-
// StringDecoder will normalize WHATWG encoding to Node.js encoding.
529-
this[kHandle] = new (lazyStringDecoder())(enc);
530-
}
492+
if (hasIntl) {
493+
const flags = stream ? 0 : CONVERTER_FLAGS_FLUSH;
494+
return icuDecode(this[kHandle], input, flags, this[kEncoding]);
531495
}
532496

533-
decode(input = empty, options = kEmptyObject) {
534-
validateDecoder(this);
535-
input = parseInput(input);
536-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
497+
input = parseInput(input);
537498

538-
if (this[kMethod]) return this[kMethod](input);
499+
let result = stream ? this[kHandle].write(input) : this[kHandle].end(input);
539500

540-
if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
541-
this[kBOMSeen] = false;
501+
if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
502+
// If the very first result in the stream is a BOM, and we are not
503+
// explicitly told to ignore it, then we discard it.
504+
if (result[0] === '\ufeff') {
505+
result = StringPrototypeSlice(result, 1);
542506
}
507+
this[kBOMSeen] = true;
508+
}
543509

544-
if (options !== null && options.stream) {
545-
this[kFlags] &= ~CONVERTER_FLAGS_FLUSH;
546-
} else {
547-
this[kFlags] |= CONVERTER_FLAGS_FLUSH;
548-
}
510+
if (!stream) this[kBOMSeen] = false;
549511

550-
let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ?
551-
this[kHandle].end(input) :
552-
this[kHandle].write(input);
553-
554-
if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
555-
// If the very first result in the stream is a BOM, and we are not
556-
// explicitly told to ignore it, then we discard it.
557-
if (result[0] === '\ufeff') {
558-
result = StringPrototypeSlice(result, 1);
559-
}
560-
this[kBOMSeen] = true;
561-
}
512+
return result;
562513

563-
return result;
564-
}
565514
}
566-
567-
return TextDecoder;
568515
}
569516

570517
// Mix in some shared properties.

0 commit comments

Comments
 (0)