Skip to content

Commit 4ce5ba7

Browse files
committed
fix(build-infra): escape regex patterns for string literal context in Unicode transform
The Unicode property escape transform was breaking when converting regex literals to RegExp constructor calls. When Babel gives us pattern strings from StringLiteral.value, backslashes are interpreted. But when writing back into source code as string literals, we need to re-escape them. Changes: - Add escapeForStringLiteral() to double-escape backslashes for string context - Update NewExpression handler to escape transformed patterns - Update RegExpLiteral handler to work with original source text This fixes the compromise package RegExp error that was causing 81 test failures with invalid `?+?` quantifier syntax.
1 parent 2d154e9 commit 4ce5ba7

File tree

1 file changed

+47
-9
lines changed

1 file changed

+47
-9
lines changed

packages/build-infra/lib/unicode-property-escape-transform.mjs

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,19 @@ function transformRegexPattern(pattern) {
155155
return transformed
156156
}
157157

158+
/**
159+
* Escape a string for insertion into JavaScript string literal context.
160+
* When we get a pattern from Babel's StringLiteral.value, backslashes are interpreted.
161+
* But when writing back into source code, we need to re-escape them.
162+
*/
163+
function escapeForStringLiteral(str) {
164+
return str
165+
.replace(/\\/g, '\\\\') // Backslash must be doubled.
166+
.replace(/"/g, '\\"') // Escape quotes if needed (handled by keeping original quotes).
167+
.replace(/'/g, "\\'") // Escape single quotes if needed.
168+
}
169+
170+
158171
/**
159172
* Transform Unicode property escapes in regex patterns for ICU-free environments.
160173
*
@@ -193,7 +206,10 @@ export function transformUnicodePropertyEscapes(content) {
193206
return
194207
}
195208

196-
// Transform the pattern.
209+
// Get the original regex literal from source.
210+
const originalRegex = content.slice(start, end)
211+
212+
// Transform the pattern (using Babel's interpreted pattern for replacements).
197213
const transformedPattern = transformRegexPattern(pattern)
198214

199215
// Check if transformed pattern still has unsupported Unicode features.
@@ -203,21 +219,40 @@ export function transformUnicodePropertyEscapes(content) {
203219
return
204220
}
205221

206-
// If pattern changed, update it and remove Unicode flags.
222+
// If pattern changed, update it by doing string replacement on the original source.
207223
if (transformedPattern !== pattern) {
208-
// Remove /u and /v flags.
209-
const newFlags = flags.replace(/[uv]/g, '')
210-
const newRegex = `/${transformedPattern}/${newFlags}`
224+
// Work with the original regex source text, removing opening/closing slashes and flags.
225+
// Extract just the pattern part from /pattern/flags.
226+
const lastSlash = originalRegex.lastIndexOf('/')
227+
const originalPattern = originalRegex.slice(1, lastSlash)
228+
const originalFlags = originalRegex.slice(lastSlash + 1)
229+
230+
// Do the same transformations on the source text.
231+
let newPattern = originalPattern
232+
for (const [prop, replacement] of Object.entries(unicodePropertyMap)) {
233+
const escapedProp = prop.replace(/[\\{}]/g, '\\$&')
234+
newPattern = newPattern.replace(
235+
new RegExp(`\\\\p\\{${escapedProp}\\}`, 'g'),
236+
`[${replacement}]`,
237+
)
238+
}
239+
240+
// Remove /u and /v flags from the original flags.
241+
const newFlags = originalFlags.replace(/[uv]/g, '')
242+
const newRegex = `/${newPattern}/${newFlags}`
211243
s.overwrite(start, end, newRegex)
212244
return
213245
}
214246

215247
// Pattern unchanged but has Unicode flags - check if safe to remove flags.
216248
// Only remove flags if pattern has no \u{} escapes or other Unicode-specific syntax.
217249
if (!hasUnsupportedUnicodeFeatures(pattern)) {
218-
// Safe to remove Unicode flags.
219-
const newFlags = flags.replace(/[uv]/g, '')
220-
const newRegex = `/${pattern}/${newFlags}`
250+
// Safe to remove Unicode flags - just remove the flags from the original source.
251+
const lastSlash = originalRegex.lastIndexOf('/')
252+
const originalPattern = originalRegex.slice(1, lastSlash)
253+
const originalFlags = originalRegex.slice(lastSlash + 1)
254+
const newFlags = originalFlags.replace(/[uv]/g, '')
255+
const newRegex = `/${originalPattern}/${newFlags}`
221256
s.overwrite(start, end, newRegex)
222257
} else {
223258
// Has unsupported features, replace with no-op.
@@ -277,8 +312,11 @@ export function transformUnicodePropertyEscapes(content) {
277312
const patternQuote = content[patternArg.start]
278313
const flagsQuote = content[flagsArg.start]
279314

315+
// Escape the transformed pattern for string literal context.
316+
const escapedPattern = escapeForStringLiteral(transformedPattern)
317+
280318
// Replace pattern.
281-
s.overwrite(patternArg.start, patternArg.end, `${patternQuote}${transformedPattern}${patternQuote}`)
319+
s.overwrite(patternArg.start, patternArg.end, `${patternQuote}${escapedPattern}${patternQuote}`)
282320

283321
// Replace flags.
284322
s.overwrite(flagsArg.start, flagsArg.end, `${flagsQuote}${newFlags}${flagsQuote}`)

0 commit comments

Comments
 (0)