Skip to content

Commit 64203ac

Browse files
committed
handle more edge cases
1 parent e48105c commit 64203ac

File tree

4 files changed

+279
-46
lines changed

4 files changed

+279
-46
lines changed

src/utils/utf8-wasm-binary.ts

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ AGFzbQEAAAABNQhedwFgAW8Bf2ACb38Bf2ADb2QAfwF/YANkAH9/AWRvYAJ/ZAABf2ABfwFkAGADZA
66
B/fwFvAnsEDndhc206anMtc3RyaW5nBmxlbmd0aAABDndhc206anMtc3RyaW5nCmNoYXJDb2RlQXQA
77
Ag53YXNtOmpzLXN0cmluZxFpbnRvQ2hhckNvZGVBcnJheQADDndhc206anMtc3RyaW5nEWZyb21DaG
88
FyQ29kZUFycmF5AAQDBgUBAgUGBwUDAQABB1QGBm1lbW9yeQIACXV0ZjhDb3VudAAECnV0ZjhFbmNv
9-
ZGUABRF1dGY4RGVjb2RlVG9BcnJheQAGCmFsbG9jQXJyYXkABw1hcnJheVRvU3RyaW5nAAgKsQcFlA
9+
ZGUABRF1dGY4RGVjb2RlVG9BcnJheQAGCmFsbG9jQXJyYXkABw1hcnJheVRvU3RyaW5nAAgKlwkFlA
1010
EBBH8gABAAIQQDQCADIARPRQRAIAAgAxABIgJBgAFJBH8gAUEBagUgAkGAEEkEfyABQQJqBSACQf+3
1111
A00gAkGAsANPcQR/IANBAWoiAiAESQR/IAAgAhABQYD4A3FBgLgDRgR/IAIhAyABQQRqBSABQQNqCw
1212
UgAUEDagsFIAFBA2oLCwshASADQQFqIQMMAQsLIAELwgMCBn8BZAAgASECIAAgABAAIgX7BwAiCEEA
@@ -17,11 +17,15 @@ EMdkE/cUGAAXI6AAAgAkECaiADQQZ2QT9xQYABcjoAACACQQNqIANBP3FBgAFyOgAAIAJBBGoFIAIg
1717
A0EMdkHgAXI6AAAgAkEBaiADQQZ2QT9xQYABcjoAACACQQJqIANBP3FBgAFyOgAAIAJBA2oLBSACIA
1818
NBDHZB4AFyOgAAIAJBAWogA0EGdkE/cUGAAXI6AAAgAkECaiADQT9xQYABcjoAACACQQNqCwUgAiAD
1919
QQx2QeABcjoAACACQQFqIANBBnZBP3FBgAFyOgAAIAJBAmogA0E/cUGAAXI6AAAgAkEDagsLCyECIA
20-
RBAWohBAwBCwsgAiABawvBAgEDfwNAIAAgA01FBEAgAy0AACIEQYABcQR/IARB4AFxQcABRgR/IAEg
21-
AiADQQFqLQAAQT9xIARBH3FBBnRy+w4AIAJBAWohAiADQQJqBSAEQfABcUHgAUYEfyABIAIgA0ECai
22-
0AAEE/cSAEQQ9xQQx0IANBAWotAABBP3FBBnRycvsOACACQQFqIQIgA0EDagUgBEH4AXFB8AFGBH8g
23-
ASACIANBA2otAABBP3EgBEEHcUESdCADQQFqLQAAQT9xQQx0ciADQQJqLQAAQT9xQQZ0cnJBgIAEay
24-
IEQQp2QYCwA3L7DgAgASACQQFqIgIgBEH/B3FBgLgDcvsOACACQQFqIQIgA0EEagUgASACIAT7DgAg
25-
AkEBaiECIANBAWoLCwsFIAEgAiAE+w4AIAJBAWohAiADQQFqCyEDDAELCyACCwcAIAD7BwALCgAgAC
26-
ABIAIQAws=
20+
RBAWohBAwBCwsgAiABawunBAEEfwNAAkAgACADTQ0AIAMtAAAiBEGAAXEEfyAAIANrIQUgBEHgAXFB
21+
wAFGBH8gBUECSQR/IAEgAiAE+w4AIAJBAWohAiADQQFqIQMDQCAAIANNRQRAIAEgAiADLQAA+w4AIA
22+
JBAWohAiADQQFqIQMMAQsLDAMFIAEgAiADQQFqLQAAQT9xIARBH3FBBnRy+w4AIAJBAWohAiADQQJq
23+
CwUgBEHwAXFB4AFGBH8gBUEDSQR/IAEgAiAE+w4AIAJBAWohAiADQQFqIQMDQCAAIANNRQRAIAEgAi
24+
ADLQAA+w4AIAJBAWohAiADQQFqIQMMAQsLDAQFIAEgAiADQQJqLQAAQT9xIARBD3FBDHQgA0EBai0A
25+
AEE/cUEGdHJy+w4AIAJBAWohAiADQQNqCwUgBEH4AXFB8AFGBH8gBUEESQR/IAEgAiAE+w4AIAJBAW
26+
ohAiADQQFqIQMDQCAAIANNRQRAIAEgAiADLQAA+w4AIAJBAWohAiADQQFqIQMMAQsLDAUFIAEgAiAD
27+
QQNqLQAAQT9xIARBB3FBEnQgA0EBai0AAEE/cUEMdHIgA0ECai0AAEE/cUEGdHJyQYCABGsiBEEKdk
28+
GAsANy+w4AIAEgAkEBaiICIARB/wdxQYC4A3L7DgAgAkEBaiECIANBBGoLBSABIAIgBPsOACACQQFq
29+
IQIgA0EBagsLCwUgASACIAT7DgAgAkEBaiECIANBAWoLIQMMAQsLIAILBwAgAPsHAAsKACAAIAEgAh
30+
ADCw==
2731
`;

src/utils/utf8.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,15 +150,36 @@ export function utf8DecodeJs(bytes: Uint8Array, inputOffset: number, byteLength:
150150
units.push(byte1);
151151
} else if ((byte1 & 0xe0) === 0xc0) {
152152
// 2 bytes
153+
if (offset >= end) {
154+
// Truncated sequence: preserve lead byte
155+
units.push(byte1);
156+
break;
157+
}
153158
const byte2 = bytes[offset++]! & 0x3f;
154159
units.push(((byte1 & 0x1f) << 6) | byte2);
155160
} else if ((byte1 & 0xf0) === 0xe0) {
156161
// 3 bytes
162+
if (offset + 1 >= end) {
163+
// Truncated sequence: preserve remaining bytes individually
164+
units.push(byte1);
165+
while (offset < end) {
166+
units.push(bytes[offset++]!);
167+
}
168+
break;
169+
}
157170
const byte2 = bytes[offset++]! & 0x3f;
158171
const byte3 = bytes[offset++]! & 0x3f;
159172
units.push(((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3);
160173
} else if ((byte1 & 0xf8) === 0xf0) {
161174
// 4 bytes
175+
if (offset + 2 >= end) {
176+
// Truncated sequence: preserve remaining bytes individually
177+
units.push(byte1);
178+
while (offset < end) {
179+
units.push(bytes[offset++]!);
180+
}
181+
break;
182+
}
162183
const byte2 = bytes[offset++]! & 0x3f;
163184
const byte3 = bytes[offset++]! & 0x3f;
164185
const byte4 = bytes[offset++]! & 0x3f;

test/utf8-wasm.test.ts

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,4 +361,158 @@ describe("utf8-wasm", () => {
361361
});
362362
});
363363
});
364+
365+
describe("edge cases: truncated multi-byte sequences at end of input", () => {
366+
// These test cases verify behavior when multi-byte UTF-8 sequences
367+
// are truncated at the end of input (missing continuation bytes)
368+
//
369+
// Expected behavior: preserve each byte individually when the sequence
370+
// cannot be completed due to end of input.
371+
372+
describe("truncated 2-byte at end preserves lead byte", () => {
373+
it("0xC2 at end becomes char(0xC2)", () => {
374+
const bytes = new Uint8Array([0xC2]);
375+
const jsResult = utf8DecodeJs(bytes, 0, 1);
376+
377+
assert.strictEqual(jsResult.length, 1);
378+
assert.strictEqual(jsResult.charCodeAt(0), 0xC2);
379+
380+
if (WASM_AVAILABLE) {
381+
const wasmResult = utf8DecodeWasm(bytes, 0, 1);
382+
assert.strictEqual(wasmResult.length, 1);
383+
assert.strictEqual(wasmResult.charCodeAt(0), 0xC2);
384+
}
385+
});
386+
387+
it("'A' then 0xC2 at end preserves both", () => {
388+
const bytes = new Uint8Array([0x41, 0xC2]); // 'A' + truncated 2-byte
389+
const jsResult = utf8DecodeJs(bytes, 0, 2);
390+
391+
assert.strictEqual(jsResult.length, 2);
392+
assert.strictEqual(jsResult.charCodeAt(0), 0x41); // 'A'
393+
assert.strictEqual(jsResult.charCodeAt(1), 0xC2); // preserved lead byte
394+
395+
if (WASM_AVAILABLE) {
396+
const wasmResult = utf8DecodeWasm(bytes, 0, 2);
397+
assert.strictEqual(wasmResult.length, 2);
398+
assert.strictEqual(wasmResult, jsResult);
399+
}
400+
});
401+
});
402+
403+
describe("truncated 3-byte at end preserves bytes", () => {
404+
it("0xE2 at end becomes char(0xE2)", () => {
405+
const bytes = new Uint8Array([0xE2]);
406+
const jsResult = utf8DecodeJs(bytes, 0, 1);
407+
408+
assert.strictEqual(jsResult.length, 1);
409+
assert.strictEqual(jsResult.charCodeAt(0), 0xE2);
410+
411+
if (WASM_AVAILABLE) {
412+
const wasmResult = utf8DecodeWasm(bytes, 0, 1);
413+
assert.strictEqual(wasmResult.length, 1);
414+
assert.strictEqual(wasmResult.charCodeAt(0), 0xE2);
415+
}
416+
});
417+
418+
it("0xE2 0x82 at end becomes two chars", () => {
419+
const bytes = new Uint8Array([0xE2, 0x82]);
420+
const jsResult = utf8DecodeJs(bytes, 0, 2);
421+
422+
assert.strictEqual(jsResult.length, 2);
423+
assert.strictEqual(jsResult.charCodeAt(0), 0xE2);
424+
assert.strictEqual(jsResult.charCodeAt(1), 0x82);
425+
426+
if (WASM_AVAILABLE) {
427+
const wasmResult = utf8DecodeWasm(bytes, 0, 2);
428+
assert.strictEqual(wasmResult.length, 2);
429+
assert.strictEqual(wasmResult.charCodeAt(0), 0xE2);
430+
assert.strictEqual(wasmResult.charCodeAt(1), 0x82);
431+
}
432+
});
433+
434+
it("'A' then 0xE2 0x82 at end preserves all", () => {
435+
const bytes = new Uint8Array([0x41, 0xE2, 0x82]);
436+
const jsResult = utf8DecodeJs(bytes, 0, 3);
437+
438+
assert.strictEqual(jsResult.length, 3);
439+
assert.strictEqual(jsResult.charCodeAt(0), 0x41); // 'A'
440+
assert.strictEqual(jsResult.charCodeAt(1), 0xE2);
441+
assert.strictEqual(jsResult.charCodeAt(2), 0x82);
442+
443+
if (WASM_AVAILABLE) {
444+
const wasmResult = utf8DecodeWasm(bytes, 0, 3);
445+
assert.strictEqual(wasmResult.length, 3);
446+
assert.strictEqual(wasmResult, jsResult);
447+
}
448+
});
449+
});
450+
451+
describe("truncated 4-byte at end preserves bytes", () => {
452+
it("0xF0 at end becomes char(0xF0)", () => {
453+
const bytes = new Uint8Array([0xF0]);
454+
const jsResult = utf8DecodeJs(bytes, 0, 1);
455+
456+
assert.strictEqual(jsResult.length, 1);
457+
assert.strictEqual(jsResult.charCodeAt(0), 0xF0);
458+
459+
if (WASM_AVAILABLE) {
460+
const wasmResult = utf8DecodeWasm(bytes, 0, 1);
461+
assert.strictEqual(wasmResult.length, 1);
462+
assert.strictEqual(wasmResult.charCodeAt(0), 0xF0);
463+
}
464+
});
465+
466+
it("0xF0 0x9F at end becomes two chars", () => {
467+
const bytes = new Uint8Array([0xF0, 0x9F]);
468+
const jsResult = utf8DecodeJs(bytes, 0, 2);
469+
470+
assert.strictEqual(jsResult.length, 2);
471+
assert.strictEqual(jsResult.charCodeAt(0), 0xF0);
472+
assert.strictEqual(jsResult.charCodeAt(1), 0x9F);
473+
474+
if (WASM_AVAILABLE) {
475+
const wasmResult = utf8DecodeWasm(bytes, 0, 2);
476+
assert.strictEqual(wasmResult.length, 2);
477+
assert.strictEqual(wasmResult.charCodeAt(0), 0xF0);
478+
assert.strictEqual(wasmResult.charCodeAt(1), 0x9F);
479+
}
480+
});
481+
482+
it("0xF0 0x9F 0x98 at end becomes three chars", () => {
483+
const bytes = new Uint8Array([0xF0, 0x9F, 0x98]);
484+
const jsResult = utf8DecodeJs(bytes, 0, 3);
485+
486+
assert.strictEqual(jsResult.length, 3);
487+
assert.strictEqual(jsResult.charCodeAt(0), 0xF0);
488+
assert.strictEqual(jsResult.charCodeAt(1), 0x9F);
489+
assert.strictEqual(jsResult.charCodeAt(2), 0x98);
490+
491+
if (WASM_AVAILABLE) {
492+
const wasmResult = utf8DecodeWasm(bytes, 0, 3);
493+
assert.strictEqual(wasmResult.length, 3);
494+
assert.strictEqual(wasmResult.charCodeAt(0), 0xF0);
495+
assert.strictEqual(wasmResult.charCodeAt(1), 0x9F);
496+
assert.strictEqual(wasmResult.charCodeAt(2), 0x98);
497+
}
498+
});
499+
500+
it("'A' then 0xF0 0x9F 0x98 at end preserves all", () => {
501+
const bytes = new Uint8Array([0x41, 0xF0, 0x9F, 0x98]);
502+
const jsResult = utf8DecodeJs(bytes, 0, 4);
503+
504+
assert.strictEqual(jsResult.length, 4);
505+
assert.strictEqual(jsResult.charCodeAt(0), 0x41); // 'A'
506+
assert.strictEqual(jsResult.charCodeAt(1), 0xF0);
507+
assert.strictEqual(jsResult.charCodeAt(2), 0x9F);
508+
assert.strictEqual(jsResult.charCodeAt(3), 0x98);
509+
510+
if (WASM_AVAILABLE) {
511+
const wasmResult = utf8DecodeWasm(bytes, 0, 4);
512+
assert.strictEqual(wasmResult.length, 4);
513+
assert.strictEqual(wasmResult, jsResult);
514+
}
515+
});
516+
});
517+
});
364518
});

wasm/utf8.wat

Lines changed: 92 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@
194194
(local $b3 i32)
195195
(local $b4 i32)
196196
(local $cp i32)
197+
(local $remaining i32)
197198

198199
(local.set $end (local.get $length))
199200

@@ -202,6 +203,8 @@
202203
(br_if $break (i32.ge_u (local.get $pos) (local.get $end)))
203204

204205
(local.set $b1 (i32.load8_u (local.get $pos)))
206+
;; Calculate remaining bytes including current
207+
(local.set $remaining (i32.sub (local.get $end) (local.get $pos)))
205208

206209
(if (i32.eqz (i32.and (local.get $b1) (i32.const 0x80)))
207210
(then
@@ -212,54 +215,105 @@
212215
(else
213216
(if (i32.eq (i32.and (local.get $b1) (i32.const 0xE0)) (i32.const 0xC0))
214217
(then
215-
;; 2-byte: 110xxxxx 10xxxxxx
216-
(local.set $b2 (i32.load8_u (i32.add (local.get $pos) (i32.const 1))))
217-
(array.set $i16_array (local.get $arr) (local.get $outIdx)
218-
(i32.or
219-
(i32.shl (i32.and (local.get $b1) (i32.const 0x1F)) (i32.const 6))
220-
(i32.and (local.get $b2) (i32.const 0x3F))))
221-
(local.set $outIdx (i32.add (local.get $outIdx) (i32.const 1)))
222-
(local.set $pos (i32.add (local.get $pos) (i32.const 2))))
223-
(else
224-
(if (i32.eq (i32.and (local.get $b1) (i32.const 0xF0)) (i32.const 0xE0))
218+
;; 2-byte: 110xxxxx 10xxxxxx - need 2 bytes
219+
(if (i32.lt_u (local.get $remaining) (i32.const 2))
225220
(then
226-
;; 3-byte: 1110xxxx 10xxxxxx 10xxxxxx
221+
;; Truncated: preserve lead byte and any remaining bytes
222+
(array.set $i16_array (local.get $arr) (local.get $outIdx) (local.get $b1))
223+
(local.set $outIdx (i32.add (local.get $outIdx) (i32.const 1)))
224+
(local.set $pos (i32.add (local.get $pos) (i32.const 1)))
225+
;; Preserve any remaining bytes
226+
(block $done_remaining
227+
(loop $copy_remaining
228+
(br_if $done_remaining (i32.ge_u (local.get $pos) (local.get $end)))
229+
(array.set $i16_array (local.get $arr) (local.get $outIdx)
230+
(i32.load8_u (local.get $pos)))
231+
(local.set $outIdx (i32.add (local.get $outIdx) (i32.const 1)))
232+
(local.set $pos (i32.add (local.get $pos) (i32.const 1)))
233+
(br $copy_remaining)))
234+
(br $break))
235+
(else
227236
(local.set $b2 (i32.load8_u (i32.add (local.get $pos) (i32.const 1))))
228-
(local.set $b3 (i32.load8_u (i32.add (local.get $pos) (i32.const 2))))
229237
(array.set $i16_array (local.get $arr) (local.get $outIdx)
230238
(i32.or
231-
(i32.or
232-
(i32.shl (i32.and (local.get $b1) (i32.const 0x0F)) (i32.const 12))
233-
(i32.shl (i32.and (local.get $b2) (i32.const 0x3F)) (i32.const 6)))
234-
(i32.and (local.get $b3) (i32.const 0x3F))))
239+
(i32.shl (i32.and (local.get $b1) (i32.const 0x1F)) (i32.const 6))
240+
(i32.and (local.get $b2) (i32.const 0x3F))))
235241
(local.set $outIdx (i32.add (local.get $outIdx) (i32.const 1)))
236-
(local.set $pos (i32.add (local.get $pos) (i32.const 3))))
237-
(else
238-
(if (i32.eq (i32.and (local.get $b1) (i32.const 0xF8)) (i32.const 0xF0))
242+
(local.set $pos (i32.add (local.get $pos) (i32.const 2))))))
243+
(else
244+
(if (i32.eq (i32.and (local.get $b1) (i32.const 0xF0)) (i32.const 0xE0))
245+
(then
246+
;; 3-byte: 1110xxxx 10xxxxxx 10xxxxxx - need 3 bytes
247+
(if (i32.lt_u (local.get $remaining) (i32.const 3))
239248
(then
240-
;; 4-byte: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
249+
;; Truncated: preserve all remaining bytes individually
250+
(array.set $i16_array (local.get $arr) (local.get $outIdx) (local.get $b1))
251+
(local.set $outIdx (i32.add (local.get $outIdx) (i32.const 1)))
252+
(local.set $pos (i32.add (local.get $pos) (i32.const 1)))
253+
;; Preserve any remaining bytes
254+
(block $done_remaining2
255+
(loop $copy_remaining2
256+
(br_if $done_remaining2 (i32.ge_u (local.get $pos) (local.get $end)))
257+
(array.set $i16_array (local.get $arr) (local.get $outIdx)
258+
(i32.load8_u (local.get $pos)))
259+
(local.set $outIdx (i32.add (local.get $outIdx) (i32.const 1)))
260+
(local.set $pos (i32.add (local.get $pos) (i32.const 1)))
261+
(br $copy_remaining2)))
262+
(br $break))
263+
(else
241264
(local.set $b2 (i32.load8_u (i32.add (local.get $pos) (i32.const 1))))
242265
(local.set $b3 (i32.load8_u (i32.add (local.get $pos) (i32.const 2))))
243-
(local.set $b4 (i32.load8_u (i32.add (local.get $pos) (i32.const 3))))
244-
(local.set $cp
245-
(i32.sub
246-
(i32.or
247-
(i32.or
248-
(i32.or
249-
(i32.shl (i32.and (local.get $b1) (i32.const 0x07)) (i32.const 18))
250-
(i32.shl (i32.and (local.get $b2) (i32.const 0x3F)) (i32.const 12)))
251-
(i32.shl (i32.and (local.get $b3) (i32.const 0x3F)) (i32.const 6)))
252-
(i32.and (local.get $b4) (i32.const 0x3F)))
253-
(i32.const 0x10000)))
254-
;; High surrogate
255266
(array.set $i16_array (local.get $arr) (local.get $outIdx)
256-
(i32.or (i32.const 0xD800) (i32.shr_u (local.get $cp) (i32.const 10))))
257-
(local.set $outIdx (i32.add (local.get $outIdx) (i32.const 1)))
258-
;; Low surrogate
259-
(array.set $i16_array (local.get $arr) (local.get $outIdx)
260-
(i32.or (i32.const 0xDC00) (i32.and (local.get $cp) (i32.const 0x3FF))))
267+
(i32.or
268+
(i32.or
269+
(i32.shl (i32.and (local.get $b1) (i32.const 0x0F)) (i32.const 12))
270+
(i32.shl (i32.and (local.get $b2) (i32.const 0x3F)) (i32.const 6)))
271+
(i32.and (local.get $b3) (i32.const 0x3F))))
261272
(local.set $outIdx (i32.add (local.get $outIdx) (i32.const 1)))
262-
(local.set $pos (i32.add (local.get $pos) (i32.const 4))))
273+
(local.set $pos (i32.add (local.get $pos) (i32.const 3))))))
274+
(else
275+
(if (i32.eq (i32.and (local.get $b1) (i32.const 0xF8)) (i32.const 0xF0))
276+
(then
277+
;; 4-byte: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - need 4 bytes
278+
(if (i32.lt_u (local.get $remaining) (i32.const 4))
279+
(then
280+
;; Truncated: preserve all remaining bytes individually
281+
(array.set $i16_array (local.get $arr) (local.get $outIdx) (local.get $b1))
282+
(local.set $outIdx (i32.add (local.get $outIdx) (i32.const 1)))
283+
(local.set $pos (i32.add (local.get $pos) (i32.const 1)))
284+
;; Preserve any remaining bytes
285+
(block $done_remaining3
286+
(loop $copy_remaining3
287+
(br_if $done_remaining3 (i32.ge_u (local.get $pos) (local.get $end)))
288+
(array.set $i16_array (local.get $arr) (local.get $outIdx)
289+
(i32.load8_u (local.get $pos)))
290+
(local.set $outIdx (i32.add (local.get $outIdx) (i32.const 1)))
291+
(local.set $pos (i32.add (local.get $pos) (i32.const 1)))
292+
(br $copy_remaining3)))
293+
(br $break))
294+
(else
295+
(local.set $b2 (i32.load8_u (i32.add (local.get $pos) (i32.const 1))))
296+
(local.set $b3 (i32.load8_u (i32.add (local.get $pos) (i32.const 2))))
297+
(local.set $b4 (i32.load8_u (i32.add (local.get $pos) (i32.const 3))))
298+
(local.set $cp
299+
(i32.sub
300+
(i32.or
301+
(i32.or
302+
(i32.or
303+
(i32.shl (i32.and (local.get $b1) (i32.const 0x07)) (i32.const 18))
304+
(i32.shl (i32.and (local.get $b2) (i32.const 0x3F)) (i32.const 12)))
305+
(i32.shl (i32.and (local.get $b3) (i32.const 0x3F)) (i32.const 6)))
306+
(i32.and (local.get $b4) (i32.const 0x3F)))
307+
(i32.const 0x10000)))
308+
;; High surrogate
309+
(array.set $i16_array (local.get $arr) (local.get $outIdx)
310+
(i32.or (i32.const 0xD800) (i32.shr_u (local.get $cp) (i32.const 10))))
311+
(local.set $outIdx (i32.add (local.get $outIdx) (i32.const 1)))
312+
;; Low surrogate
313+
(array.set $i16_array (local.get $arr) (local.get $outIdx)
314+
(i32.or (i32.const 0xDC00) (i32.and (local.get $cp) (i32.const 0x3FF))))
315+
(local.set $outIdx (i32.add (local.get $outIdx) (i32.const 1)))
316+
(local.set $pos (i32.add (local.get $pos) (i32.const 4))))))
263317
(else
264318
;; Invalid byte: preserve as code unit (same as JS)
265319
(array.set $i16_array (local.get $arr) (local.get $outIdx) (local.get $b1))

0 commit comments

Comments
 (0)