From e679ea5db02c9db5fe8c80ac4bbd442072f4a905 Mon Sep 17 00:00:00 2001
From: Jason Dent <jason@streetsidesoftware.nl>
Date: Sun, 28 Dec 2025 09:32:07 +0100
Subject: [PATCH 1/4] refactor: A bit of refactoring of utf8 naming

---
 packages/cspell-trie-lib/perf/Utf8.perf.ts    |  16 +-
 .../src/lib/TrieBlob/CharIndex.ts             |   6 +-
 .../src/lib/TrieBlob/FastTrieBlobBuilder.ts   |  27 ++--
 .../src/lib/TrieBlob/TrieBlob.ts              |   9 +-
 .../src/lib/TrieBlob/Utf8.test.ts             |  32 ++--
 .../cspell-trie-lib/src/lib/TrieBlob/Utf8.ts  | 145 +++++++++++-------
 6 files changed, 135 insertions(+), 100 deletions(-)
diff --git a/packages/cspell-trie-lib/perf/Utf8.perf.ts b/packages/cspell-trie-lib/perf/Utf8.perf.ts
index d5b892852a07..e609cde2287c 100644
--- a/packages/cspell-trie-lib/perf/Utf8.perf.ts
+++ b/packages/cspell-trie-lib/perf/Utf8.perf.ts
@@ -3,16 +3,16 @@ import { Buffer } from 'node:buffer';
 import { suite } from 'perf-insight';
 
 import {
+    decodeUtf8_32,
+    decodeUtf8_32Rev,
     decodeUtf8ByteStream,
-    decodeUtf8N_BE,
-    decodeUtf8N_LE,
     encodeCodePointsToUtf8Into,
     encodeTextToUtf8,
     encodeTextToUtf8_32,
     encodeTextToUtf8_32Into,
     encodeTextToUtf8Into,
-    encodeUtf8N_BE,
-    encodeUtf8N_LE,
+    encodeToUtf8_32,
+    encodeToUtf8_32Rev,
     textToCodePoints,
 } from '../src/lib/TrieBlob/Utf8.ts';
 import { Utf8Encoder, Utf8Encoder2 } from '../src/lib/TrieBlob/Utf8Encoder.ts';
@@ -270,8 +270,8 @@ suite('Utf8 encode/decode', async (test) => {
         for (let i = iterations; i > 0; --i) {
             for (const char of chars) {
                 const cp = char.codePointAt(0) || 0;
-                const u8 = encodeUtf8N_BE(cp);
-                const dcp = decodeUtf8N_BE(u8);
+                const u8 = encodeToUtf8_32(cp);
+                const dcp = decodeUtf8_32(u8);
                 String.fromCodePoint(dcp);
             }
         }
@@ -281,8 +281,8 @@ suite('Utf8 encode/decode', async (test) => {
         for (let i = iterations; i > 0; --i) {
             for (const char of chars) {
                 const cp = char.codePointAt(0) || 0;
-                const u8 = encodeUtf8N_LE(cp);
-                const dcp = decodeUtf8N_LE(u8);
+                const u8 = encodeToUtf8_32Rev(cp);
+                const dcp = decodeUtf8_32Rev(u8);
                 String.fromCodePoint(dcp);
             }
         }
diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/CharIndex.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/CharIndex.ts
index a3cf74c5a0e5..2fafb89025d1 100644
--- a/packages/cspell-trie-lib/src/lib/TrieBlob/CharIndex.ts
+++ b/packages/cspell-trie-lib/src/lib/TrieBlob/CharIndex.ts
@@ -1,8 +1,8 @@
-import { encodeTextToUtf8, encodeUtf8N_BE, type Utf8BE32 } from './Utf8.ts';
+import { encodeTextToUtf8, encodeToUtf8_32, type Utf8_32 } from './Utf8.ts';
 
 export type Utf8Seq = Readonly<number[]>;
 
-export type CharIndexMap = Map<string, Utf8BE32>;
+export type CharIndexMap = Map<string, Utf8_32>;
 
 export type RO_CharIndexMap = Readonly<CharIndexMap>;
 
@@ -103,7 +103,7 @@ export class CharIndexBuilder {
         }
         const nc = c.normalize('NFC');
         this.charIndex.add(nc);
-        const utf8 = encodeUtf8N_BE(nc.codePointAt(0) || 0);
+        const utf8 = encodeToUtf8_32(nc.codePointAt(0) || 0);
         this.charIndexMap.set(c, utf8);
         this.charIndexMap.set(nc, utf8);
         this.charIndexMap.set(c.normalize('NFD'), utf8);
diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobBuilder.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobBuilder.ts
index 3f70e9d14b3a..9fd7c33178ca 100644
--- a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobBuilder.ts
+++ b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobBuilder.ts
@@ -11,6 +11,7 @@ import type { FastTrieBlobBitMaskInfo } from './FastTrieBlobBitMaskInfo.ts';
 import { FastTrieBlobInternals, sortNodes } from './FastTrieBlobInternals.ts';
 import { resolveMap } from './resolveMap.ts';
 import { TrieBlob } from './TrieBlob.ts';
+import { encodeToUtf8_32Rev } from './Utf8.ts';
 
 type FastTrieBlobNode = number[];
 
@@ -106,24 +107,24 @@ export class FastTrieBlobBuilder implements TrieBuilder<FastTrieBlob> {
         let nodeIdx = 0;
         let depth = 0;
 
-        const insertChar = (char: string) => {
+        function insertChar(char: string) {
             if (!nodes[nodeIdx]) {
                 refNodes.push(nodeIdx);
             }
             // console.warn('insertChar %o', { nodeIdx, depth, char });
             const pDepth = depth;
-            const utf8Seq = this.letterToUtf8Seq(char);
-            for (let i = 0; i < utf8Seq.length; ++i) {
-                insertCharIndexes(utf8Seq[i], pDepth);
+
+            for (let encoded = encodeToUtf8_32Rev(char.codePointAt(0) || 0); encoded; encoded >>>= 8) {
+                insertCharIndexes(encoded & 0xff, pDepth);
             }
-        };
+        }
 
         /**
          * A single character can result in multiple nodes being created
          * because it takes multiple bytes to represent a character.
          * @param seq - partial character index.
          */
-        const insertCharIndexes = (seq: number, pDepth: number) => {
+        function insertCharIndexes(seq: number, pDepth: number) {
             // console.warn('i %o at %o', char, nodeIdx);
             if (nodes[nodeIdx] && Object.isFrozen(nodes[nodeIdx])) {
                 nodeIdx = nodes.push([...nodes[nodeIdx]]) - 1;
@@ -148,9 +149,9 @@ export class FastTrieBlobBuilder implements TrieBuilder<FastTrieBlob> {
                 stack[depth] = { nodeIdx, pos, pDepth };
             }
             nodeIdx = childIdx;
-        };
+        }
 
-        const markEOW = () => {
+        function markEOW() {
             // console.warn('$');
             if (nodeIdx === eow) return;
             const node = nodes[nodeIdx];
@@ -164,9 +165,9 @@ export class FastTrieBlobBuilder implements TrieBuilder<FastTrieBlob> {
                 node[0] |= NodeMaskEOW;
             }
             nodeIdx = eow;
-        };
+        }
 
-        const reference = (refId: number) => {
+        function reference(refId: number) {
             const refNodeIdx = refNodes[refId];
             assert(refNodeIdx !== undefined);
             // console.warn('r %o', { refId, nodeIdx, refNodeIdx, depth });
@@ -178,9 +179,9 @@ export class FastTrieBlobBuilder implements TrieBuilder<FastTrieBlob> {
             const pos = s.pos;
             const node = nodes[nodeIdx];
             node[pos] = (refNodeIdx << NodeChildRefShift) | (node[pos] & LetterMask);
-        };
+        }
 
-        const backStep = (num: number) => {
+        function backStep(num: number) {
             if (!num) return;
             // console.warn('<< %o', num);
             assert(num <= depth && num > 0);
@@ -188,7 +189,7 @@ export class FastTrieBlobBuilder implements TrieBuilder<FastTrieBlob> {
                 depth = stack[depth].pDepth;
             }
             nodeIdx = stack[depth + 1].nodeIdx;
-        };
+        }
 
         const c: BuilderCursor = {
             insertChar,
diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts
index 262651683f40..ae3e1048b451 100644
--- a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts
+++ b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts
@@ -6,7 +6,7 @@ import { endianness } from '../utils/endian.ts';
 import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.ts';
 import { decodeTrieBlobToBTrie, encodeTrieBlobToBTrie } from './TrieBlobEncoder.ts';
 import { TrieBlobInternals, TrieBlobIRoot } from './TrieBlobIRoot.ts';
-import { encodeTextToUtf8_32, Utf8Accumulator } from './Utf8.ts';
+import { encodeTextToUtf8_32Rev, Utf8Accumulator } from './Utf8.ts';
 
 const NodeHeaderNumChildrenBits = 8 as const;
 const NodeHeaderNumChildrenShift = 0 as const;
@@ -143,15 +143,12 @@ export class TrieBlob implements TrieData {
         const _nodes8 = this.#nodes8;
 
         for (; p.offset < p.text.length; ) {
-            const code = encodeTextToUtf8_32(p);
-
             const nodes = _nodes;
             const nodes8 = _nodes8;
             let node = nodes[nodeIdx];
-            let s = (p.bytes - 1) * 8;
 
-            for (let mask = 0xff << s; mask; mask >>>= 8, s -= 8) {
-                const charVal = (code & mask) >>> s;
+            for (let code = encodeTextToUtf8_32Rev(p); code; code >>>= 8) {
+                const charVal = code & 0xff;
                 const count = node & 0xff; // TrieBlob.NodeMaskNumChildren
                 const idx4 = nodeIdx << 2;
                 // Binary search for the character in the child nodes.
diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.test.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.test.ts
index a5e8e2ca6564..29f63ec639b5 100644
--- a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.test.ts
+++ b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.test.ts
@@ -4,14 +4,14 @@ import { describe, expect, test } from 'vitest';
 
 import {
     decodeUtf8ByteStream,
-    decodeUtf8N_BE,
-    decodeUtf8N_BE_StreamToString,
-    decodeUtf8N_LE,
+    decodeUtf8_32,
+    decodeUtf8_32_StreamToString,
+    decodeUtf8_32Rev,
     encodeCodePointsToUtf8Into,
     encodeTextToUtf8,
     encodeTextToUtf8_32Into,
-    encodeUtf8N_BE,
-    encodeUtf8N_LE,
+    encodeToUtf8_32,
+    encodeToUtf8_32Rev,
     hex32,
     textToCodePoints,
     Utf8Accumulator,
@@ -35,16 +35,16 @@ describe('Utf8 lib', () => {
             encoder.encodeInto(char, buf8);
             const expectedUtf8_BE = extractUtf8BE(view);
 
-            const utf8BE = encodeUtf8N_BE(codePoint);
+            const utf8BE = encodeToUtf8_32(codePoint);
 
             expect(utf8BE).toBe(expectedUtf8_BE);
-            expect(decodeUtf8N_BE(utf8BE)).toBe(codePoint);
+            expect(decodeUtf8_32(utf8BE)).toBe(codePoint);
 
             const expectedUtf8_LE = view.getUint32(0, true);
-            const utf8LE = encodeUtf8N_LE(codePoint);
+            const utf8LE = encodeToUtf8_32Rev(codePoint);
 
             expect(utf8LE).toBe(expectedUtf8_LE);
-            expect(decodeUtf8N_LE(utf8LE)).toBe(codePoint);
+            expect(decodeUtf8_32Rev(utf8LE)).toBe(codePoint);
         }
     });
 
@@ -79,14 +79,14 @@ describe('Utf8 lib', () => {
         ${'é'}  | ${[0xc3a9]}
         ${'🇺🇸'} | ${[0xf09f_87ba, 0xf09f_87b8]}
     `('encodeUtf8N_BE $text', ({ text, expected }) => {
-        const utf = textToCodePoints(text).map((cp) => encodeUtf8N_BE(cp));
+        const utf = textToCodePoints(text).map((cp) => encodeToUtf8_32(cp));
         expect(utf).toEqual(expected);
         expect(
             String.fromCodePoint(
                 ...utf
                     .map((v) => v ^ ~1) // force it to be native
                     .map((v) => v ^ ~1)
-                    .map((c) => decodeUtf8N_BE(c)),
+                    .map((c) => decodeUtf8_32(c)),
             ),
         ).toEqual(text);
     });
@@ -104,11 +104,11 @@ describe('Utf8 lib', () => {
     });
 
     test('decodeUtf8N_BE invalid', () => {
-        expect(decodeUtf8N_BE(0xff)).toBe(0xfffd);
+        expect(decodeUtf8_32(0xff)).toBe(0xfffd);
     });
 
     test('decodeUtf8N_LE invalid', () => {
-        expect(decodeUtf8N_LE(0xff)).toBe(0xfffd);
+        expect(decodeUtf8_32Rev(0xff)).toBe(0xfffd);
     });
 
     test.each`
@@ -119,14 +119,14 @@ describe('Utf8 lib', () => {
         ${'ë'}  | ${[0xabc3]}
         ${'🇺🇸'} | ${[0xba87_9ff0, 0xb887_9ff0]}
     `('encodeUtf8N_LE $text', ({ text, expected }) => {
-        const utf = textToCodePoints(text).map((cp) => encodeUtf8N_LE(cp));
+        const utf = textToCodePoints(text).map((cp) => encodeToUtf8_32Rev(cp));
         expect(utf).toEqual(expected);
         expect(
             String.fromCodePoint(
                 ...utf
                     .map((v) => v ^ ~1) // force it to be native
                     .map((v) => v ^ ~1)
-                    .map((c) => decodeUtf8N_LE(c)),
+                    .map((c) => decodeUtf8_32Rev(c)),
             ),
         ).toEqual(text);
     });
@@ -147,7 +147,7 @@ describe('Utf8 lib', () => {
         const len = encodeTextToUtf8_32Into(text, buffer);
 
         expect(buffer.length).toBe(len);
-        expect(decodeUtf8N_BE_StreamToString(buffer)).toBe(text);
+        expect(decodeUtf8_32_StreamToString(buffer)).toBe(text);
     });
 });
 
diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts
index 8967098182c6..11a7c442485b 100644
--- a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts
+++ b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts
@@ -1,10 +1,44 @@
 /* eslint-disable unicorn/prefer-code-point */
 
-/** A utf8 value represented as big endian 32bit number */
-export type Utf8BE32 = number;
+/**
+ * A utf8 value represented as 32bit number
+ *
+ * Utf8_32 number are comparable in utf8 order.
+ *
+ *            hightest byte           lowest byte   Code Point Range
+ * - 1 byte:  00000000 00000000 00000000 0xxxxxxx - 0x0000_0000 - 0x0000_007f
+ * - 2 bytes: 00000000 00000000 110xxxxx 10xxxxxx - 0x0000_0080 - 0x0000_07ff
+ * - 3 bytes: 00000000 1110xxxx 10xxxxxx 10xxxxxx - 0x0000_0800 - 0x0000_ffff
+ * - 4 bytes: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - 0x0001_0000 - 0x001f_ffff
+ *
+ */
+export type Utf8_32 = number;
 
-/** A utf8 value represented as little endian 32bit number */
-export type Utf8LE32 = number;
+/**
+ * A utf8 value represented as little endian 32bit number
+ *
+ * These numbers DO NOT sort into the correct order for utf8.
+ *
+ *            hightest byte           lowest byte   Code Point Range
+ * - 1 byte:  00000000 00000000 00000000 0xxxxxxx - 0x0000_0000 - 0x0000_007f
+ * - 2 bytes: 00000000 00000000 10xxxxxx 110xxxxx - 0x0000_0080 - 0x0000_07ff
+ * - 3 bytes: 00000000 10xxxxxx 10xxxxxx 1110xxxx - 0x0000_0800 - 0x0000_ffff
+ * - 4 bytes: 10xxxxxx 10xxxxxx 10xxxxxx 11110xxx - 0x0001_0000 - 0x001f_ffff
+ *
+ * This number is useful when emitting code points to a byte stream:
+ *
+ * Example:
+ * ```ts
+ * for (const letter of text) {
+ *   const codePoint = letter.codePointAt(0) || 0;
+ *   for (let utf8_32Rev = encodeToUtf8_32Rev(codePoint); utf8_32Rev !== 0; utf8_32Rev >>>= 8) {
+ *      const byte = utf8_32Rev & 0xff;
+ *      emit(byte); // write byte to stream
+ *   }
+ * }
+ * ```
+ */
+export type Utf8_32Rev = number;
 
 export type CodePoint = number;
 
@@ -21,7 +55,7 @@ export type CodePoint = number;
  * @param code - the code point to encode
  * @returns number containing the utf8 value.
  */
-export function encodeUtf8N_BE(code: CodePoint): Utf8BE32 {
+export function encodeToUtf8_32(code: CodePoint): Utf8_32 {
     if (code < 0x80) {
         return code;
     }
@@ -36,7 +70,7 @@ export function encodeUtf8N_BE(code: CodePoint): Utf8BE32 {
     );
 }
 
-export function decodeUtf8N_BE(utf8: Utf8BE32): CodePoint {
+export function decodeUtf8_32(utf8: Utf8_32): CodePoint {
     if (utf8 >= 0 && utf8 < 0x80) {
         return utf8;
     }
@@ -52,14 +86,14 @@ export function decodeUtf8N_BE(utf8: Utf8BE32): CodePoint {
     return 0xfffd;
 }
 
-export function* decodeUtf8N_BE_Stream(utf8s: Iterable<Utf8BE32>): Iterable<CodePoint> {
+export function* decodeUtf8_32_Stream(utf8s: Iterable<Utf8_32>): Iterable<CodePoint> {
     for (const utf8 of utf8s) {
-        yield decodeUtf8N_BE(utf8);
+        yield decodeUtf8_32(utf8);
     }
 }
 
-export function decodeUtf8N_BE_StreamToString(utf8s: Iterable<Utf8BE32>): string {
-    return String.fromCodePoint(...decodeUtf8N_BE_Stream(utf8s));
+export function decodeUtf8_32_StreamToString(utf8s: Iterable<Utf8_32>): string {
+    return String.fromCodePoint(...decodeUtf8_32_Stream(utf8s));
 }
 
 /**
@@ -76,7 +110,7 @@ export function decodeUtf8N_BE_StreamToString(utf8s: Iterable<Utf8BE32>): string
  * @param code - the code point to encode
  * @returns number containing the utf8 value.
  */
-export function encodeUtf8N_LE(code: CodePoint): Utf8LE32 {
+export function encodeToUtf8_32Rev(code: CodePoint): Utf8_32Rev {
     if (code < 0x80) {
         return code;
     }
@@ -92,7 +126,7 @@ export function encodeUtf8N_LE(code: CodePoint): Utf8LE32 {
     );
 }
 
-export function decodeUtf8N_LE(utf8: Utf8LE32): CodePoint {
+export function decodeUtf8_32Rev(utf8: Utf8_32Rev): CodePoint {
     if (utf8 < 0) utf8 = 0x1_0000_0000 + utf8;
 
     if (utf8 < 0x80) {
@@ -233,41 +267,7 @@ export function encodeUtf8into(code: CodePoint, into: Array<number> | Uint8Array
     return 4;
 }
 
-export function encodeTextToUtf8Into(text: string, into: Array<number> | Uint8Array, offset = 0): number {
-    let i = offset;
-    const len = text.length;
-    for (let j = 0; j < len; j++) {
-        let code = text.charCodeAt(j);
-        code = (code & 0xf800) === 0xd800 ? text.codePointAt(j++) || 0 : code;
-        if (code < 0x80) {
-            into[i++] = code;
-            continue;
-        }
-        if (code < 0x800) {
-            const u = 0xc080 | ((code & 0x7c0) << 2) | (code & 0x3f);
-            into[i++] = u >>> 8;
-            into[i++] = u & 0xff;
-            continue;
-        }
-        if (code < 0x1_0000) {
-            const u = 0xe0_8080 | ((code & 0xf000) << 4) | ((code & 0x0fc0) << 2) | (code & 0x3f);
-            into[i++] = u >>> 16;
-            into[i++] = (u >>> 8) & 0xff;
-            into[i++] = u & 0xff;
-            continue;
-        }
-        const u =
-            0xf080_8080 |
-            (((code & 0x1c_0000) << 6) | ((code & 0x03_f000) << 4) | ((code & 0x0fc0) << 2) | (code & 0x3f));
-        into[i++] = (u >>> 24) & 0x0ff;
-        into[i++] = (u >>> 16) & 0xff;
-        into[i++] = (u >>> 8) & 0xff;
-        into[i++] = u & 0xff;
-    }
-    return i - offset;
-}
-
-export function encodeTextToUtf8_32Into(text: string, into: Utf8BE32[]): number {
+export function encodeTextToUtf8_32Into(text: string, into: Utf8_32[]): number {
     const len = text.length;
     let i = 0;
     for (let p = { text, offset: 0, bytes: 0 }; p.offset < len; ) {
@@ -279,10 +279,13 @@ export function encodeTextToUtf8_32Into(text: string, into: Utf8BE32[]): number
 export interface TextOffset {
     text: string;
     offset: number;
+}
+
+export interface TextOffsetWithByteCount extends TextOffset {
     bytes?: number;
 }
 
-export function encodeTextToUtf8_32(offset: TextOffset): Utf8BE32 {
+export function encodeTextToUtf8_32(offset: TextOffsetWithByteCount): Utf8_32 {
     const text = offset.text;
     let code = text.charCodeAt(offset.offset);
     code = (code & 0xf800) === 0xd800 ? text.codePointAt(offset.offset++) || 0 : code;
@@ -306,13 +309,47 @@ export function encodeTextToUtf8_32(offset: TextOffset): Utf8BE32 {
     );
 }
 
-export function encodeTextToUtf8(text: string): number[] {
-    const array: number[] = new Array(text.length);
-    const len = encodeTextToUtf8Into(text, array);
-    if (array.length !== len) {
-        array.length = len;
+export function encodeTextToUtf8_32Rev(offset: TextOffset): Utf8_32Rev {
+    const text = offset.text;
+    let code = text.charCodeAt(offset.offset);
+    code = (code & 0xf800) === 0xd800 ? text.codePointAt(offset.offset++) || 0 : code;
+    offset.offset++;
+
+    if (code < 0x80) {
+        return code;
     }
-    return array;
+    if (code < 0x800) {
+        return 0x80c0 | ((code & 0x7c0) >> 6) | ((code & 0x3f) << 8);
+    }
+    if (code < 0x1_0000) {
+        return 0x80_80e0 | ((code & 0xf000) >>> 12) | ((code & 0xfc0) << 2) | ((code & 0x3f) << 16);
+    }
+    return (
+        0x8080_80f0 +
+        (((code & 0x1c_0000) >>> 18) | ((code & 0x03_f000) >>> 4) | ((code & 0xfc0) << 10) | ((code & 0x3f) << 24))
+    );
+}
+
+export function encodeTextToUtf8Into(text: string, into: Array<number> | Uint8Array, offset = 0): number {
+    const t = { text, offset: 0 };
+
+    let i = offset;
+
+    for (; t.offset < text.length; ) {
+        const code = encodeTextToUtf8_32Rev(t);
+        for (let utf8_32Rev = code; utf8_32Rev !== 0; utf8_32Rev >>>= 8) {
+            into[i++] = utf8_32Rev & 0xff;
+        }
+    }
+    return i;
+}
+
+export function encodeTextToUtf8(text: string): number[] {
+    const into: number[] = new Array(text.length);
+
+    encodeTextToUtf8Into(text, into);
+
+    return into;
 }
 
 export function textToCodePoints(text: string): CodePoint[] {

From 73d4f41dce0492574dfd833fba2d3c6501478d6a Mon Sep 17 00:00:00 2001
From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com>
Date: Sun, 28 Dec 2025 08:35:51 +0000
Subject: [PATCH 2/4] [autofix.ci] apply automated fixes

---
 packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.test.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.test.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.test.ts
index 29f63ec639b5..09e54792800c 100644
--- a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.test.ts
+++ b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.test.ts
@@ -3,10 +3,10 @@ import assert from 'node:assert';
 import { describe, expect, test } from 'vitest';
 
 import {
-    decodeUtf8ByteStream,
     decodeUtf8_32,
     decodeUtf8_32_StreamToString,
     decodeUtf8_32Rev,
+    decodeUtf8ByteStream,
     encodeCodePointsToUtf8Into,
     encodeTextToUtf8,
     encodeTextToUtf8_32Into,

From 768a8b71f5e572e1245a6c9dae4c66fca119e25c Mon Sep 17 00:00:00 2001
From: Jason Dent <Jason3S@users.noreply.github.com>
Date: Mon, 29 Dec 2025 10:01:52 +0100
Subject: [PATCH 3/4] Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Signed-off-by: Jason Dent <Jason3S@users.noreply.github.com>
---
 packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts
index 11a7c442485b..4b6404a02f25 100644
--- a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts
+++ b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts
@@ -341,7 +341,7 @@ export function encodeTextToUtf8Into(text: string, into: Array<number> | Uint8Ar
             into[i++] = utf8_32Rev & 0xff;
         }
     }
-    return i;
+    return i - offset;
 }
 
 export function encodeTextToUtf8(text: string): number[] {

From 6fc771c2cdadec927ec364573ca134041ebceae3 Mon Sep 17 00:00:00 2001
From: Copilot <198982749+Copilot@users.noreply.github.com>
Date: Mon, 29 Dec 2025 10:08:30 +0100
Subject: [PATCH 4/4] fix: Correct spelling 'hightest' to 'highest' in Utf8.ts
 documentation (#8244)

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: Jason3S <3740137+Jason3S@users.noreply.github.com>
---
 packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts
index 4b6404a02f25..3a481e3a5dc1 100644
--- a/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts
+++ b/packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts
@@ -5,7 +5,7 @@
  *
  * Utf8_32 number are comparable in utf8 order.
  *
- *            hightest byte           lowest byte   Code Point Range
+ *            highest byte           lowest byte   Code Point Range
  * - 1 byte:  00000000 00000000 00000000 0xxxxxxx - 0x0000_0000 - 0x0000_007f
  * - 2 bytes: 00000000 00000000 110xxxxx 10xxxxxx - 0x0000_0080 - 0x0000_07ff
  * - 3 bytes: 00000000 1110xxxx 10xxxxxx 10xxxxxx - 0x0000_0800 - 0x0000_ffff
@@ -19,7 +19,7 @@ export type Utf8_32 = number;
  *
  * These numbers DO NOT sort into the correct order for utf8.
  *
- *            hightest byte           lowest byte   Code Point Range
+ *            highest byte           lowest byte   Code Point Range
  * - 1 byte:  00000000 00000000 00000000 0xxxxxxx - 0x0000_0000 - 0x0000_007f
  * - 2 bytes: 00000000 00000000 10xxxxxx 110xxxxx - 0x0000_0080 - 0x0000_07ff
  * - 3 bytes: 00000000 10xxxxxx 10xxxxxx 1110xxxx - 0x0000_0800 - 0x0000_ffff