From e679ea5db02c9db5fe8c80ac4bbd442072f4a905 Mon Sep 17 00:00:00 2001 From: Jason Dent Date: Sun, 28 Dec 2025 09:32:07 +0100 Subject: [PATCH 1/4] refactor: A bit of refactoring of utf8 naming --- packages/cspell-trie-lib/perf/Utf8.perf.ts | 16 +- .../src/lib/TrieBlob/CharIndex.ts | 6 +- .../src/lib/TrieBlob/FastTrieBlobBuilder.ts | 27 ++-- .../src/lib/TrieBlob/TrieBlob.ts | 9 +- .../src/lib/TrieBlob/Utf8.test.ts | 32 ++-- .../cspell-trie-lib/src/lib/TrieBlob/Utf8.ts | 145 +++++++++++------- 6 files changed, 135 insertions(+), 100 deletions(-) diff --git a/packages/cspell-trie-lib/perf/Utf8.perf.ts b/packages/cspell-trie-lib/perf/Utf8.perf.ts index d5b892852a07..e609cde2287c 100644 --- a/packages/cspell-trie-lib/perf/Utf8.perf.ts +++ b/packages/cspell-trie-lib/perf/Utf8.perf.ts @@ -3,16 +3,16 @@ import { Buffer } from 'node:buffer'; import { suite } from 'perf-insight'; import { + decodeUtf8_32, + decodeUtf8_32Rev, decodeUtf8ByteStream, - decodeUtf8N_BE, - decodeUtf8N_LE, encodeCodePointsToUtf8Into, encodeTextToUtf8, encodeTextToUtf8_32, encodeTextToUtf8_32Into, encodeTextToUtf8Into, - encodeUtf8N_BE, - encodeUtf8N_LE, + encodeToUtf8_32, + encodeToUtf8_32Rev, textToCodePoints, } from '../src/lib/TrieBlob/Utf8.ts'; import { Utf8Encoder, Utf8Encoder2 } from '../src/lib/TrieBlob/Utf8Encoder.ts'; @@ -270,8 +270,8 @@ suite('Utf8 encode/decode', async (test) => { for (let i = iterations; i > 0; --i) { for (const char of chars) { const cp = char.codePointAt(0) || 0; - const u8 = encodeUtf8N_BE(cp); - const dcp = decodeUtf8N_BE(u8); + const u8 = encodeToUtf8_32(cp); + const dcp = decodeUtf8_32(u8); String.fromCodePoint(dcp); } } @@ -281,8 +281,8 @@ suite('Utf8 encode/decode', async (test) => { for (let i = iterations; i > 0; --i) { for (const char of chars) { const cp = char.codePointAt(0) || 0; - const u8 = encodeUtf8N_LE(cp); - const dcp = decodeUtf8N_LE(u8); + const u8 = encodeToUtf8_32Rev(cp); + const dcp = decodeUtf8_32Rev(u8); String.fromCodePoint(dcp); } } diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/CharIndex.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/CharIndex.ts index a3cf74c5a0e5..2fafb89025d1 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/CharIndex.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/CharIndex.ts @@ -1,8 +1,8 @@ -import { encodeTextToUtf8, encodeUtf8N_BE, type Utf8BE32 } from './Utf8.ts'; +import { encodeTextToUtf8, encodeToUtf8_32, type Utf8_32 } from './Utf8.ts'; export type Utf8Seq = Readonly; -export type CharIndexMap = Map; +export type CharIndexMap = Map; export type RO_CharIndexMap = Readonly; @@ -103,7 +103,7 @@ export class CharIndexBuilder { } const nc = c.normalize('NFC'); this.charIndex.add(nc); - const utf8 = encodeUtf8N_BE(nc.codePointAt(0) || 0); + const utf8 = encodeToUtf8_32(nc.codePointAt(0) || 0); this.charIndexMap.set(c, utf8); this.charIndexMap.set(nc, utf8); this.charIndexMap.set(c.normalize('NFD'), utf8); diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobBuilder.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobBuilder.ts index 3f70e9d14b3a..9fd7c33178ca 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobBuilder.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobBuilder.ts @@ -11,6 +11,7 @@ import type { FastTrieBlobBitMaskInfo } from './FastTrieBlobBitMaskInfo.ts'; import { FastTrieBlobInternals, sortNodes } from './FastTrieBlobInternals.ts'; import { resolveMap } from './resolveMap.ts'; import { TrieBlob } from './TrieBlob.ts'; +import { encodeToUtf8_32Rev } from './Utf8.ts'; type FastTrieBlobNode = number[]; @@ -106,24 +107,24 @@ export class FastTrieBlobBuilder implements TrieBuilder { let nodeIdx = 0; let depth = 0; - const insertChar = (char: string) => { + function insertChar(char: string) { if (!nodes[nodeIdx]) { refNodes.push(nodeIdx); } // console.warn('insertChar %o', { nodeIdx, depth, char }); const pDepth = depth; - const utf8Seq = this.letterToUtf8Seq(char); - for (let i = 0; i < utf8Seq.length; ++i) { - insertCharIndexes(utf8Seq[i], pDepth); + + for (let encoded = encodeToUtf8_32Rev(char.codePointAt(0) || 0); encoded; encoded >>>= 8) { + insertCharIndexes(encoded & 0xff, pDepth); } - }; + } /** * A single character can result in multiple nodes being created * because it takes multiple bytes to represent a character. * @param seq - partial character index. */ - const insertCharIndexes = (seq: number, pDepth: number) => { + function insertCharIndexes(seq: number, pDepth: number) { // console.warn('i %o at %o', char, nodeIdx); if (nodes[nodeIdx] && Object.isFrozen(nodes[nodeIdx])) { nodeIdx = nodes.push([...nodes[nodeIdx]]) - 1; @@ -148,9 +149,9 @@ export class FastTrieBlobBuilder implements TrieBuilder { stack[depth] = { nodeIdx, pos, pDepth }; } nodeIdx = childIdx; - }; + } - const markEOW = () => { + function markEOW() { // console.warn('$'); if (nodeIdx === eow) return; const node = nodes[nodeIdx]; @@ -164,9 +165,9 @@ export class FastTrieBlobBuilder implements TrieBuilder { node[0] |= NodeMaskEOW; } nodeIdx = eow; - }; + } - const reference = (refId: number) => { + function reference(refId: number) { const refNodeIdx = refNodes[refId]; assert(refNodeIdx !== undefined); // console.warn('r %o', { refId, nodeIdx, refNodeIdx, depth }); @@ -178,9 +179,9 @@ export class FastTrieBlobBuilder implements TrieBuilder { const pos = s.pos; const node = nodes[nodeIdx]; node[pos] = (refNodeIdx << NodeChildRefShift) | (node[pos] & LetterMask); - }; + } - const backStep = (num: number) => { + function backStep(num: number) { if (!num) return; // console.warn('<< %o', num); assert(num <= depth && num > 0); @@ -188,7 +189,7 @@ export class FastTrieBlobBuilder implements TrieBuilder { depth = stack[depth].pDepth; } nodeIdx = stack[depth + 1].nodeIdx; - }; + } const c: BuilderCursor = { insertChar, diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts index 262651683f40..ae3e1048b451 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts @@ -6,7 +6,7 @@ import { endianness } from '../utils/endian.ts'; import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.ts'; import { decodeTrieBlobToBTrie, encodeTrieBlobToBTrie } from './TrieBlobEncoder.ts'; import { TrieBlobInternals, TrieBlobIRoot } from './TrieBlobIRoot.ts'; -import { encodeTextToUtf8_32, Utf8Accumulator } from './Utf8.ts'; +import { encodeTextToUtf8_32Rev, Utf8Accumulator } from './Utf8.ts'; const NodeHeaderNumChildrenBits = 8 as const; const NodeHeaderNumChildrenShift = 0 as const; @@ -143,15 +143,12 @@ export class TrieBlob implements TrieData { const _nodes8 = this.#nodes8; for (; p.offset < p.text.length; ) { - const code = encodeTextToUtf8_32(p); - const nodes = _nodes; const nodes8 = _nodes8; let node = nodes[nodeIdx]; - let s = (p.bytes - 1) * 8; - for (let mask = 0xff << s; mask; mask >>>= 8, s -= 8) { - const charVal = (code & mask) >>> s; + for (let code = encodeTextToUtf8_32Rev(p); code; code >>>= 8) { + const charVal = code & 0xff; const count = node & 0xff; // TrieBlob.NodeMaskNumChildren const idx4 = nodeIdx << 2; // Binary search for the character in the child nodes. diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.test.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.test.ts index a5e8e2ca6564..29f63ec639b5 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.test.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.test.ts @@ -4,14 +4,14 @@ import { describe, expect, test } from 'vitest'; import { decodeUtf8ByteStream, - decodeUtf8N_BE, - decodeUtf8N_BE_StreamToString, - decodeUtf8N_LE, + decodeUtf8_32, + decodeUtf8_32_StreamToString, + decodeUtf8_32Rev, encodeCodePointsToUtf8Into, encodeTextToUtf8, encodeTextToUtf8_32Into, - encodeUtf8N_BE, - encodeUtf8N_LE, + encodeToUtf8_32, + encodeToUtf8_32Rev, hex32, textToCodePoints, Utf8Accumulator, @@ -35,16 +35,16 @@ describe('Utf8 lib', () => { encoder.encodeInto(char, buf8); const expectedUtf8_BE = extractUtf8BE(view); - const utf8BE = encodeUtf8N_BE(codePoint); + const utf8BE = encodeToUtf8_32(codePoint); expect(utf8BE).toBe(expectedUtf8_BE); - expect(decodeUtf8N_BE(utf8BE)).toBe(codePoint); + expect(decodeUtf8_32(utf8BE)).toBe(codePoint); const expectedUtf8_LE = view.getUint32(0, true); - const utf8LE = encodeUtf8N_LE(codePoint); + const utf8LE = encodeToUtf8_32Rev(codePoint); expect(utf8LE).toBe(expectedUtf8_LE); - expect(decodeUtf8N_LE(utf8LE)).toBe(codePoint); + expect(decodeUtf8_32Rev(utf8LE)).toBe(codePoint); } }); @@ -79,14 +79,14 @@ describe('Utf8 lib', () => { ${'é'} | ${[0xc3a9]} ${'🇺🇸'} | ${[0xf09f_87ba, 0xf09f_87b8]} `('encodeUtf8N_BE $text', ({ text, expected }) => { - const utf = textToCodePoints(text).map((cp) => encodeUtf8N_BE(cp)); + const utf = textToCodePoints(text).map((cp) => encodeToUtf8_32(cp)); expect(utf).toEqual(expected); expect( String.fromCodePoint( ...utf .map((v) => v ^ ~1) // force it to be native .map((v) => v ^ ~1) - .map((c) => decodeUtf8N_BE(c)), + .map((c) => decodeUtf8_32(c)), ), ).toEqual(text); }); @@ -104,11 +104,11 @@ describe('Utf8 lib', () => { }); test('decodeUtf8N_BE invalid', () => { - expect(decodeUtf8N_BE(0xff)).toBe(0xfffd); + expect(decodeUtf8_32(0xff)).toBe(0xfffd); }); test('decodeUtf8N_LE invalid', () => { - expect(decodeUtf8N_LE(0xff)).toBe(0xfffd); + expect(decodeUtf8_32Rev(0xff)).toBe(0xfffd); }); test.each` @@ -119,14 +119,14 @@ describe('Utf8 lib', () => { ${'ë'} | ${[0xabc3]} ${'🇺🇸'} | ${[0xba87_9ff0, 0xb887_9ff0]} `('encodeUtf8N_LE $text', ({ text, expected }) => { - const utf = textToCodePoints(text).map((cp) => encodeUtf8N_LE(cp)); + const utf = textToCodePoints(text).map((cp) => encodeToUtf8_32Rev(cp)); expect(utf).toEqual(expected); expect( String.fromCodePoint( ...utf .map((v) => v ^ ~1) // force it to be native .map((v) => v ^ ~1) - .map((c) => decodeUtf8N_LE(c)), + .map((c) => decodeUtf8_32Rev(c)), ), ).toEqual(text); }); @@ -147,7 +147,7 @@ describe('Utf8 lib', () => { const len = encodeTextToUtf8_32Into(text, buffer); expect(buffer.length).toBe(len); - expect(decodeUtf8N_BE_StreamToString(buffer)).toBe(text); + expect(decodeUtf8_32_StreamToString(buffer)).toBe(text); }); }); diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts index 8967098182c6..11a7c442485b 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts @@ -1,10 +1,44 @@ /* eslint-disable unicorn/prefer-code-point */ -/** A utf8 value represented as big endian 32bit number */ -export type Utf8BE32 = number; +/** + * A utf8 value represented as 32bit number + * + * Utf8_32 number are comparable in utf8 order. + * + * hightest byte lowest byte Code Point Range + * - 1 byte: 00000000 00000000 00000000 0xxxxxxx - 0x0000_0000 - 0x0000_007f + * - 2 bytes: 00000000 00000000 110xxxxx 10xxxxxx - 0x0000_0080 - 0x0000_07ff + * - 3 bytes: 00000000 1110xxxx 10xxxxxx 10xxxxxx - 0x0000_0800 - 0x0000_ffff + * - 4 bytes: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - 0x0001_0000 - 0x001f_ffff + * + */ +export type Utf8_32 = number; -/** A utf8 value represented as little endian 32bit number */ -export type Utf8LE32 = number; +/** + * A utf8 value represented as little endian 32bit number + * + * These numbers DO NOT sort into the correct order for utf8. + * + * hightest byte lowest byte Code Point Range + * - 1 byte: 00000000 00000000 00000000 0xxxxxxx - 0x0000_0000 - 0x0000_007f + * - 2 bytes: 00000000 00000000 10xxxxxx 110xxxxx - 0x0000_0080 - 0x0000_07ff + * - 3 bytes: 00000000 10xxxxxx 10xxxxxx 1110xxxx - 0x0000_0800 - 0x0000_ffff + * - 4 bytes: 10xxxxxx 10xxxxxx 10xxxxxx 11110xxx - 0x0001_0000 - 0x001f_ffff + * + * This number is useful when emitting code points to a byte stream: + * + * Example: + * ```ts + * for (const letter of text) { + * const codePoint = letter.codePointAt(0) || 0; + * for (let utf8_32Rev = encodeToUtf8_32Rev(codePoint); utf8_32Rev !== 0; utf8_32Rev >>>= 8) { + * const byte = utf8_32Rev & 0xff; + * emit(byte); // write byte to stream + * } + * } + * ``` + */ +export type Utf8_32Rev = number; export type CodePoint = number; @@ -21,7 +55,7 @@ export type CodePoint = number; * @param code - the code point to encode * @returns number containing the utf8 value. */ -export function encodeUtf8N_BE(code: CodePoint): Utf8BE32 { +export function encodeToUtf8_32(code: CodePoint): Utf8_32 { if (code < 0x80) { return code; } @@ -36,7 +70,7 @@ export function encodeUtf8N_BE(code: CodePoint): Utf8BE32 { ); } -export function decodeUtf8N_BE(utf8: Utf8BE32): CodePoint { +export function decodeUtf8_32(utf8: Utf8_32): CodePoint { if (utf8 >= 0 && utf8 < 0x80) { return utf8; } @@ -52,14 +86,14 @@ export function decodeUtf8N_BE(utf8: Utf8BE32): CodePoint { return 0xfffd; } -export function* decodeUtf8N_BE_Stream(utf8s: Iterable): Iterable { +export function* decodeUtf8_32_Stream(utf8s: Iterable): Iterable { for (const utf8 of utf8s) { - yield decodeUtf8N_BE(utf8); + yield decodeUtf8_32(utf8); } } -export function decodeUtf8N_BE_StreamToString(utf8s: Iterable): string { - return String.fromCodePoint(...decodeUtf8N_BE_Stream(utf8s)); +export function decodeUtf8_32_StreamToString(utf8s: Iterable): string { + return String.fromCodePoint(...decodeUtf8_32_Stream(utf8s)); } /** @@ -76,7 +110,7 @@ export function decodeUtf8N_BE_StreamToString(utf8s: Iterable): string * @param code - the code point to encode * @returns number containing the utf8 value. */ -export function encodeUtf8N_LE(code: CodePoint): Utf8LE32 { +export function encodeToUtf8_32Rev(code: CodePoint): Utf8_32Rev { if (code < 0x80) { return code; } @@ -92,7 +126,7 @@ export function encodeUtf8N_LE(code: CodePoint): Utf8LE32 { ); } -export function decodeUtf8N_LE(utf8: Utf8LE32): CodePoint { +export function decodeUtf8_32Rev(utf8: Utf8_32Rev): CodePoint { if (utf8 < 0) utf8 = 0x1_0000_0000 + utf8; if (utf8 < 0x80) { @@ -233,41 +267,7 @@ export function encodeUtf8into(code: CodePoint, into: Array | Uint8Array return 4; } -export function encodeTextToUtf8Into(text: string, into: Array | Uint8Array, offset = 0): number { - let i = offset; - const len = text.length; - for (let j = 0; j < len; j++) { - let code = text.charCodeAt(j); - code = (code & 0xf800) === 0xd800 ? text.codePointAt(j++) || 0 : code; - if (code < 0x80) { - into[i++] = code; - continue; - } - if (code < 0x800) { - const u = 0xc080 | ((code & 0x7c0) << 2) | (code & 0x3f); - into[i++] = u >>> 8; - into[i++] = u & 0xff; - continue; - } - if (code < 0x1_0000) { - const u = 0xe0_8080 | ((code & 0xf000) << 4) | ((code & 0x0fc0) << 2) | (code & 0x3f); - into[i++] = u >>> 16; - into[i++] = (u >>> 8) & 0xff; - into[i++] = u & 0xff; - continue; - } - const u = - 0xf080_8080 | - (((code & 0x1c_0000) << 6) | ((code & 0x03_f000) << 4) | ((code & 0x0fc0) << 2) | (code & 0x3f)); - into[i++] = (u >>> 24) & 0x0ff; - into[i++] = (u >>> 16) & 0xff; - into[i++] = (u >>> 8) & 0xff; - into[i++] = u & 0xff; - } - return i - offset; -} - -export function encodeTextToUtf8_32Into(text: string, into: Utf8BE32[]): number { +export function encodeTextToUtf8_32Into(text: string, into: Utf8_32[]): number { const len = text.length; let i = 0; for (let p = { text, offset: 0, bytes: 0 }; p.offset < len; ) { @@ -279,10 +279,13 @@ export function encodeTextToUtf8_32Into(text: string, into: Utf8BE32[]): number export interface TextOffset { text: string; offset: number; +} + +export interface TextOffsetWithByteCount extends TextOffset { bytes?: number; } -export function encodeTextToUtf8_32(offset: TextOffset): Utf8BE32 { +export function encodeTextToUtf8_32(offset: TextOffsetWithByteCount): Utf8_32 { const text = offset.text; let code = text.charCodeAt(offset.offset); code = (code & 0xf800) === 0xd800 ? text.codePointAt(offset.offset++) || 0 : code; @@ -306,13 +309,47 @@ export function encodeTextToUtf8_32(offset: TextOffset): Utf8BE32 { ); } -export function encodeTextToUtf8(text: string): number[] { - const array: number[] = new Array(text.length); - const len = encodeTextToUtf8Into(text, array); - if (array.length !== len) { - array.length = len; +export function encodeTextToUtf8_32Rev(offset: TextOffset): Utf8_32Rev { + const text = offset.text; + let code = text.charCodeAt(offset.offset); + code = (code & 0xf800) === 0xd800 ? text.codePointAt(offset.offset++) || 0 : code; + offset.offset++; + + if (code < 0x80) { + return code; } - return array; + if (code < 0x800) { + return 0x80c0 | ((code & 0x7c0) >> 6) | ((code & 0x3f) << 8); + } + if (code < 0x1_0000) { + return 0x80_80e0 | ((code & 0xf000) >>> 12) | ((code & 0xfc0) << 2) | ((code & 0x3f) << 16); + } + return ( + 0x8080_80f0 + + (((code & 0x1c_0000) >>> 18) | ((code & 0x03_f000) >>> 4) | ((code & 0xfc0) << 10) | ((code & 0x3f) << 24)) + ); +} + +export function encodeTextToUtf8Into(text: string, into: Array | Uint8Array, offset = 0): number { + const t = { text, offset: 0 }; + + let i = offset; + + for (; t.offset < text.length; ) { + const code = encodeTextToUtf8_32Rev(t); + for (let utf8_32Rev = code; utf8_32Rev !== 0; utf8_32Rev >>>= 8) { + into[i++] = utf8_32Rev & 0xff; + } + } + return i; +} + +export function encodeTextToUtf8(text: string): number[] { + const into: number[] = new Array(text.length); + + encodeTextToUtf8Into(text, into); + + return into; } export function textToCodePoints(text: string): CodePoint[] { From 73d4f41dce0492574dfd833fba2d3c6501478d6a Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Sun, 28 Dec 2025 08:35:51 +0000 Subject: [PATCH 2/4] [autofix.ci] apply automated fixes --- packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.test.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.test.ts index 29f63ec639b5..09e54792800c 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.test.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.test.ts @@ -3,10 +3,10 @@ import assert from 'node:assert'; import { describe, expect, test } from 'vitest'; import { - decodeUtf8ByteStream, decodeUtf8_32, decodeUtf8_32_StreamToString, decodeUtf8_32Rev, + decodeUtf8ByteStream, encodeCodePointsToUtf8Into, encodeTextToUtf8, encodeTextToUtf8_32Into, From 768a8b71f5e572e1245a6c9dae4c66fca119e25c Mon Sep 17 00:00:00 2001 From: Jason Dent Date: Mon, 29 Dec 2025 10:01:52 +0100 Subject: [PATCH 3/4] Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Jason Dent --- packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts index 11a7c442485b..4b6404a02f25 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts @@ -341,7 +341,7 @@ export function encodeTextToUtf8Into(text: string, into: Array | Uint8Ar into[i++] = utf8_32Rev & 0xff; } } - return i; + return i - offset; } export function encodeTextToUtf8(text: string): number[] { From 6fc771c2cdadec927ec364573ca134041ebceae3 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Mon, 29 Dec 2025 10:08:30 +0100 Subject: [PATCH 4/4] fix: Correct spelling 'hightest' to 'highest' in Utf8.ts documentation (#8244) Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Jason3S <3740137+Jason3S@users.noreply.github.com> --- packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts index 4b6404a02f25..3a481e3a5dc1 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts @@ -5,7 +5,7 @@ * * Utf8_32 number are comparable in utf8 order. * - * hightest byte lowest byte Code Point Range + * highest byte lowest byte Code Point Range * - 1 byte: 00000000 00000000 00000000 0xxxxxxx - 0x0000_0000 - 0x0000_007f * - 2 bytes: 00000000 00000000 110xxxxx 10xxxxxx - 0x0000_0080 - 0x0000_07ff * - 3 bytes: 00000000 1110xxxx 10xxxxxx 10xxxxxx - 0x0000_0800 - 0x0000_ffff @@ -19,7 +19,7 @@ export type Utf8_32 = number; * * These numbers DO NOT sort into the correct order for utf8. * - * hightest byte lowest byte Code Point Range + * highest byte lowest byte Code Point Range * - 1 byte: 00000000 00000000 00000000 0xxxxxxx - 0x0000_0000 - 0x0000_007f * - 2 bytes: 00000000 00000000 10xxxxxx 110xxxxx - 0x0000_0080 - 0x0000_07ff * - 3 bytes: 00000000 10xxxxxx 10xxxxxx 1110xxxx - 0x0000_0800 - 0x0000_ffff