diff --git a/src/UTF8.cs b/src/UTF8.cs index ab2e857..17453bc 100644 --- a/src/UTF8.cs +++ b/src/UTF8.cs @@ -124,7 +124,8 @@ private static (int utfAdjust, int scalarAdjust) GetFinalScalarUtfAdjustments(by } // Too short // range check codePoint = (uint)(firstByte & 0b00011111) << 6 | (uint)(buf[pos + 1] & 0b00111111); - if ((codePoint < 0x80) || (0x7ff < codePoint)) + // codePoint is necessarily <= 0x7ff + if (codePoint < 0x80) { return buf + pos; } // Overlong @@ -141,7 +142,8 @@ private static (int utfAdjust, int scalarAdjust) GetFinalScalarUtfAdjustments(by (uint)(buf[pos + 1] & 0b00111111) << 6 | (uint)(buf[pos + 2] & 0b00111111); // Either overlong or too large: - if ((codePoint < 0x800) || (0xffff < codePoint) || + // codePoint is necessarily <= 0xffff + if ((codePoint < 0x800) || (0xd7ff < codePoint && codePoint < 0xe000)) { return buf + pos; @@ -238,7 +240,8 @@ private static (int utfAdjust, int scalarAdjust) GetFinalScalarUtfAdjustments(by } // Too short // range check codePoint = (uint)(firstByte & 0b00011111) << 6 | (uint)(pInputBuffer[pos + 1] & 0b00111111); - if ((codePoint < 0x80) || (0x7ff < codePoint)) + // codePoint is necessarily <= 0x7ff + if (codePoint < 0x80) { utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment; scalarCountAdjustment = TempScalarCountAdjustment; @@ -261,7 +264,8 @@ private static (int utfAdjust, int scalarAdjust) GetFinalScalarUtfAdjustments(by (uint)(pInputBuffer[pos + 1] & 0b00111111) << 6 | (uint)(pInputBuffer[pos + 2] & 0b00111111); // Either overlong or too large: - if ((codePoint < 0x800) || (0xffff < codePoint) || + // codePoint is necessarily <= 0xffff + if ((codePoint < 0x800) || (0xd7ff < codePoint && codePoint < 0xe000)) { utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment; diff --git a/test/AsciiTest.cs b/test/AsciiTest.cs index 2928302..c771bef 100644 --- a/test/AsciiTest.cs +++ b/test/AsciiTest.cs @@ -1,4 +1,5 @@ -namespace tests; +namespace tests; + using System.Text; using SimdUnicode; diff --git a/test/UTF8ValidationTests.cs b/test/UTF8ValidationTests.cs index 9b21e5f..4aba082 100644 --- a/test/UTF8ValidationTests.cs +++ b/test/UTF8ValidationTests.cs @@ -1,4 +1,5 @@ namespace tests; + using System.Text; using SimdUnicode; using System.Diagnostics; @@ -1245,7 +1246,8 @@ public static bool ValidateUtf8Fuschia(byte[] data) if ((data[pos + 1] & 0b11000000) != 0b10000000) return false; codePoint = (uint)((byte1 & 0b00011111) << 6 | (data[pos + 1] & 0b00111111)); - if (codePoint < 0x80 || 0x7ff < codePoint) return false; + // codePoint is necessarily <= 0x7ff + if (codePoint < 0x80) return false; pos += 2; } else if ((byte1 & 0b11110000) == 0b11100000) @@ -1255,7 +1257,7 @@ public static bool ValidateUtf8Fuschia(byte[] data) if ((data[pos + 2] & 0b11000000) != 0b10000000) return false; codePoint = (uint)((byte1 & 0b00001111) << 12 | (data[pos + 1] & 0b00111111) << 6 | (data[pos + 2] & 0b00111111)); - if (codePoint < 0x800 || 0xffff < codePoint || (0xd7ff < codePoint && codePoint < 0xe000)) return false; + if (codePoint < 0x800 || (0xd7ff < codePoint && codePoint < 0xe000)) return false; pos += 3; } else if ((byte1 & 0b11111000) == 0b11110000) diff --git a/test/helpers/randomutf8.cs b/test/helpers/randomutf8.cs index 7c0ff20..73a63c0 100644 --- a/test/helpers/randomutf8.cs +++ b/test/helpers/randomutf8.cs @@ -1,4 +1,5 @@ namespace tests; + using System; using System.Collections.Generic; using System.Linq;