From 8c078a6e693598ca89f4590424278f4c59c3e791 Mon Sep 17 00:00:00 2001 From: Frank Denis Date: Wed, 26 Nov 2025 23:01:23 +0100 Subject: [PATCH 1/2] utf8ByteSequenceLength: reject lead bytes > 0xF4 UTF-8 start bytes in the range [0xF5 .. 0xFF] are disallowed as they would encode code points above U+10FFFF --- lib/std/unicode.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig index 1aae6d488fbe..acf05d652479 100644 --- a/lib/std/unicode.zig +++ b/lib/std/unicode.zig @@ -31,7 +31,7 @@ pub fn utf8ByteSequenceLength(first_byte: u8) !u3 { 0b0000_0000...0b0111_1111 => 1, 0b1100_0000...0b1101_1111 => 2, 0b1110_0000...0b1110_1111 => 3, - 0b1111_0000...0b1111_0111 => 4, + 0b1111_0000...0b1111_0100 => 4, else => error.Utf8InvalidStartByte, }; } From d22376cd13eb4fa1df2ac107522b50e5d043db49 Mon Sep 17 00:00:00 2001 From: Frank Denis Date: Wed, 26 Nov 2025 23:00:38 +0100 Subject: [PATCH 2/2] Update test --- lib/std/unicode.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig index acf05d652479..90908e37a97a 100644 --- a/lib/std/unicode.zig +++ b/lib/std/unicode.zig @@ -752,7 +752,7 @@ test "misc invalid utf8" { fn testMiscInvalidUtf8() !void { // codepoint out of bounds try testError("\xf4\x90\x80\x80", error.Utf8CodepointTooLarge); - try testError("\xf7\xbf\xbf\xbf", error.Utf8CodepointTooLarge); + try testError("\xf7\xbf\xbf\xbf", error.Utf8InvalidStartByte); // surrogate halves try testValid("\xed\x9f\xbf", 0xd7ff); try testError("\xed\xa0\x80", error.Utf8EncodesSurrogateHalf);