From 81db3798101d82d4df2b03e37875b35f13dc1f57 Mon Sep 17 00:00:00 2001 From: Daniel Duan Date: Tue, 30 Dec 2025 23:21:38 -0800 Subject: [PATCH] Defer basic-string escape validation in tokenizer Stop validating escape sequences while scanning double-quoted strings. Validation remains in unpacking, reducing work during parsing of large inputs. --- Sources/TOMLDecoder/Parsing/Parser.swift | 59 ++---------------------- 1 file changed, 3 insertions(+), 56 deletions(-) diff --git a/Sources/TOMLDecoder/Parsing/Parser.swift b/Sources/TOMLDecoder/Parsing/Parser.swift index 64ad65f..7f145b9 100644 --- a/Sources/TOMLDecoder/Parsing/Parser.swift +++ b/Sources/TOMLDecoder/Parsing/Parser.swift @@ -253,9 +253,6 @@ struct Parser { if ch == CodeUnits.doubleQuote { var i = start + 1 - var expectedHexDigit = 0 - var localExpectedHexDigit = 0 - var localEscape = false // 8x unrolling for double-quoted strings while i + 8 <= range.upperBound { @@ -272,71 +269,21 @@ struct Parser { while i < range.upperBound { let ch = bytes[i] - if localEscape { - localEscape = false - if ch == CodeUnits.lowerB || ch == CodeUnits.lowerT - || ch == CodeUnits.lowerN || ch == CodeUnits.lowerF - || ch == CodeUnits.lowerR || ch == CodeUnits.doubleQuote - || ch == CodeUnits.backslash - { - i += 1 - continue - } - - if ch == CodeUnits.lowerU { - localExpectedHexDigit = 4 - i += 1 - continue - } - - if ch == CodeUnits.upperU { - localExpectedHexDigit = 8 - i += 1 - continue - } - - // Set error flag and break - expectedHexDigit = -1 - break - } - - if localExpectedHexDigit > 0 { - localExpectedHexDigit -= 1 - if ch.isHexDigit { + if ch == CodeUnits.backslash { + i += 1 + if i < range.upperBound { i += 1 continue } - // Set error flag and break - expectedHexDigit = -2 break } - if ch == CodeUnits.backslash { - localEscape = true - i += 1 - continue - } - - if ch == CodeUnits.singleQuote { - i += 1 - continue - } - if ch == CodeUnits.lf || ch == CodeUnits.doubleQuote { break } i += 1 } - expectedHexDigit = localExpectedHexDigit - - if expectedHexDigit == -1 { - throw TOMLError( - .syntax(lineNumber: lineNumber, message: "expected escape char")) - } - if expectedHexDigit == -2 { - throw TOMLError(.syntax(lineNumber: lineNumber, message: "expect hex char")) - } if i >= range.upperBound || bytes[i] != CodeUnits.doubleQuote { throw TOMLError( .syntax(lineNumber: lineNumber, message: "unterminated quote"))