@@ -361,4 +361,158 @@ describe("utf8-wasm", () => {
361361 } ) ;
362362 } ) ;
363363 } ) ;
364+
365+ describe ( "edge cases: truncated multi-byte sequences at end of input" , ( ) => {
366+ // These test cases verify behavior when multi-byte UTF-8 sequences
367+ // are truncated at the end of input (missing continuation bytes)
368+ //
369+ // Expected behavior: preserve each byte individually when the sequence
370+ // cannot be completed due to end of input.
371+
372+ describe ( "truncated 2-byte at end preserves lead byte" , ( ) => {
373+ it ( "0xC2 at end becomes char(0xC2)" , ( ) => {
374+ const bytes = new Uint8Array ( [ 0xC2 ] ) ;
375+ const jsResult = utf8DecodeJs ( bytes , 0 , 1 ) ;
376+
377+ assert . strictEqual ( jsResult . length , 1 ) ;
378+ assert . strictEqual ( jsResult . charCodeAt ( 0 ) , 0xC2 ) ;
379+
380+ if ( WASM_AVAILABLE ) {
381+ const wasmResult = utf8DecodeWasm ( bytes , 0 , 1 ) ;
382+ assert . strictEqual ( wasmResult . length , 1 ) ;
383+ assert . strictEqual ( wasmResult . charCodeAt ( 0 ) , 0xC2 ) ;
384+ }
385+ } ) ;
386+
387+ it ( "'A' then 0xC2 at end preserves both" , ( ) => {
388+ const bytes = new Uint8Array ( [ 0x41 , 0xC2 ] ) ; // 'A' + truncated 2-byte
389+ const jsResult = utf8DecodeJs ( bytes , 0 , 2 ) ;
390+
391+ assert . strictEqual ( jsResult . length , 2 ) ;
392+ assert . strictEqual ( jsResult . charCodeAt ( 0 ) , 0x41 ) ; // 'A'
393+ assert . strictEqual ( jsResult . charCodeAt ( 1 ) , 0xC2 ) ; // preserved lead byte
394+
395+ if ( WASM_AVAILABLE ) {
396+ const wasmResult = utf8DecodeWasm ( bytes , 0 , 2 ) ;
397+ assert . strictEqual ( wasmResult . length , 2 ) ;
398+ assert . strictEqual ( wasmResult , jsResult ) ;
399+ }
400+ } ) ;
401+ } ) ;
402+
403+ describe ( "truncated 3-byte at end preserves bytes" , ( ) => {
404+ it ( "0xE2 at end becomes char(0xE2)" , ( ) => {
405+ const bytes = new Uint8Array ( [ 0xE2 ] ) ;
406+ const jsResult = utf8DecodeJs ( bytes , 0 , 1 ) ;
407+
408+ assert . strictEqual ( jsResult . length , 1 ) ;
409+ assert . strictEqual ( jsResult . charCodeAt ( 0 ) , 0xE2 ) ;
410+
411+ if ( WASM_AVAILABLE ) {
412+ const wasmResult = utf8DecodeWasm ( bytes , 0 , 1 ) ;
413+ assert . strictEqual ( wasmResult . length , 1 ) ;
414+ assert . strictEqual ( wasmResult . charCodeAt ( 0 ) , 0xE2 ) ;
415+ }
416+ } ) ;
417+
418+ it ( "0xE2 0x82 at end becomes two chars" , ( ) => {
419+ const bytes = new Uint8Array ( [ 0xE2 , 0x82 ] ) ;
420+ const jsResult = utf8DecodeJs ( bytes , 0 , 2 ) ;
421+
422+ assert . strictEqual ( jsResult . length , 2 ) ;
423+ assert . strictEqual ( jsResult . charCodeAt ( 0 ) , 0xE2 ) ;
424+ assert . strictEqual ( jsResult . charCodeAt ( 1 ) , 0x82 ) ;
425+
426+ if ( WASM_AVAILABLE ) {
427+ const wasmResult = utf8DecodeWasm ( bytes , 0 , 2 ) ;
428+ assert . strictEqual ( wasmResult . length , 2 ) ;
429+ assert . strictEqual ( wasmResult . charCodeAt ( 0 ) , 0xE2 ) ;
430+ assert . strictEqual ( wasmResult . charCodeAt ( 1 ) , 0x82 ) ;
431+ }
432+ } ) ;
433+
434+ it ( "'A' then 0xE2 0x82 at end preserves all" , ( ) => {
435+ const bytes = new Uint8Array ( [ 0x41 , 0xE2 , 0x82 ] ) ;
436+ const jsResult = utf8DecodeJs ( bytes , 0 , 3 ) ;
437+
438+ assert . strictEqual ( jsResult . length , 3 ) ;
439+ assert . strictEqual ( jsResult . charCodeAt ( 0 ) , 0x41 ) ; // 'A'
440+ assert . strictEqual ( jsResult . charCodeAt ( 1 ) , 0xE2 ) ;
441+ assert . strictEqual ( jsResult . charCodeAt ( 2 ) , 0x82 ) ;
442+
443+ if ( WASM_AVAILABLE ) {
444+ const wasmResult = utf8DecodeWasm ( bytes , 0 , 3 ) ;
445+ assert . strictEqual ( wasmResult . length , 3 ) ;
446+ assert . strictEqual ( wasmResult , jsResult ) ;
447+ }
448+ } ) ;
449+ } ) ;
450+
451+ describe ( "truncated 4-byte at end preserves bytes" , ( ) => {
452+ it ( "0xF0 at end becomes char(0xF0)" , ( ) => {
453+ const bytes = new Uint8Array ( [ 0xF0 ] ) ;
454+ const jsResult = utf8DecodeJs ( bytes , 0 , 1 ) ;
455+
456+ assert . strictEqual ( jsResult . length , 1 ) ;
457+ assert . strictEqual ( jsResult . charCodeAt ( 0 ) , 0xF0 ) ;
458+
459+ if ( WASM_AVAILABLE ) {
460+ const wasmResult = utf8DecodeWasm ( bytes , 0 , 1 ) ;
461+ assert . strictEqual ( wasmResult . length , 1 ) ;
462+ assert . strictEqual ( wasmResult . charCodeAt ( 0 ) , 0xF0 ) ;
463+ }
464+ } ) ;
465+
466+ it ( "0xF0 0x9F at end becomes two chars" , ( ) => {
467+ const bytes = new Uint8Array ( [ 0xF0 , 0x9F ] ) ;
468+ const jsResult = utf8DecodeJs ( bytes , 0 , 2 ) ;
469+
470+ assert . strictEqual ( jsResult . length , 2 ) ;
471+ assert . strictEqual ( jsResult . charCodeAt ( 0 ) , 0xF0 ) ;
472+ assert . strictEqual ( jsResult . charCodeAt ( 1 ) , 0x9F ) ;
473+
474+ if ( WASM_AVAILABLE ) {
475+ const wasmResult = utf8DecodeWasm ( bytes , 0 , 2 ) ;
476+ assert . strictEqual ( wasmResult . length , 2 ) ;
477+ assert . strictEqual ( wasmResult . charCodeAt ( 0 ) , 0xF0 ) ;
478+ assert . strictEqual ( wasmResult . charCodeAt ( 1 ) , 0x9F ) ;
479+ }
480+ } ) ;
481+
482+ it ( "0xF0 0x9F 0x98 at end becomes three chars" , ( ) => {
483+ const bytes = new Uint8Array ( [ 0xF0 , 0x9F , 0x98 ] ) ;
484+ const jsResult = utf8DecodeJs ( bytes , 0 , 3 ) ;
485+
486+ assert . strictEqual ( jsResult . length , 3 ) ;
487+ assert . strictEqual ( jsResult . charCodeAt ( 0 ) , 0xF0 ) ;
488+ assert . strictEqual ( jsResult . charCodeAt ( 1 ) , 0x9F ) ;
489+ assert . strictEqual ( jsResult . charCodeAt ( 2 ) , 0x98 ) ;
490+
491+ if ( WASM_AVAILABLE ) {
492+ const wasmResult = utf8DecodeWasm ( bytes , 0 , 3 ) ;
493+ assert . strictEqual ( wasmResult . length , 3 ) ;
494+ assert . strictEqual ( wasmResult . charCodeAt ( 0 ) , 0xF0 ) ;
495+ assert . strictEqual ( wasmResult . charCodeAt ( 1 ) , 0x9F ) ;
496+ assert . strictEqual ( wasmResult . charCodeAt ( 2 ) , 0x98 ) ;
497+ }
498+ } ) ;
499+
500+ it ( "'A' then 0xF0 0x9F 0x98 at end preserves all" , ( ) => {
501+ const bytes = new Uint8Array ( [ 0x41 , 0xF0 , 0x9F , 0x98 ] ) ;
502+ const jsResult = utf8DecodeJs ( bytes , 0 , 4 ) ;
503+
504+ assert . strictEqual ( jsResult . length , 4 ) ;
505+ assert . strictEqual ( jsResult . charCodeAt ( 0 ) , 0x41 ) ; // 'A'
506+ assert . strictEqual ( jsResult . charCodeAt ( 1 ) , 0xF0 ) ;
507+ assert . strictEqual ( jsResult . charCodeAt ( 2 ) , 0x9F ) ;
508+ assert . strictEqual ( jsResult . charCodeAt ( 3 ) , 0x98 ) ;
509+
510+ if ( WASM_AVAILABLE ) {
511+ const wasmResult = utf8DecodeWasm ( bytes , 0 , 4 ) ;
512+ assert . strictEqual ( wasmResult . length , 4 ) ;
513+ assert . strictEqual ( wasmResult , jsResult ) ;
514+ }
515+ } ) ;
516+ } ) ;
517+ } ) ;
364518} ) ;
0 commit comments