Skip to content

Commit 17b7d8f

Browse files
committed
Add support for nested character classes in RegExpParser
1 parent 4a03410 commit 17b7d8f

File tree

1 file changed

+26
-1
lines changed

1 file changed

+26
-1
lines changed

javascript/extractor/src/com/semmle/js/parser/RegExpParser.java

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ public List<Error> getErrors() {
6868
private List<Error> errors;
6969
private List<BackReference> backrefs;
7070
private int maxbackref;
71+
private boolean vFlag = true;
7172

7273
/** Parse the given string as a regular expression. */
7374
public Result parse(String src) {
@@ -507,9 +508,9 @@ private RegExpTerm parseAtomEscape(SourceLocation loc, boolean inCharClass) {
507508
}
508509

509510
private RegExpTerm parseCharacterClass() {
511+
if (vFlag) return parseNestedCharacterClass();
510512
SourceLocation loc = new SourceLocation(pos());
511513
List<RegExpTerm> elements = new ArrayList<>();
512-
513514
this.match("[");
514515
boolean inverted = this.match("^");
515516
while (!this.match("]")) {
@@ -522,6 +523,27 @@ private RegExpTerm parseCharacterClass() {
522523
return this.finishTerm(new CharacterClass(loc, elements, inverted));
523524
}
524525

526+
// New method to support nested character classes.
527+
private RegExpTerm parseNestedCharacterClass() {
528+
SourceLocation loc = new SourceLocation(pos());
529+
this.match("["); // consume '['
530+
boolean inverted = this.match("^");
531+
List<RegExpTerm> elements = new ArrayList<>();
532+
while (!this.match("]")) {
533+
if (this.atEOS()) {
534+
this.error(Error.EXPECTED_RBRACKET);
535+
break;
536+
}
537+
// If nested '[' is found, recursively parse it.
538+
if (vFlag && lookahead("[")) {
539+
elements.add(parseNestedCharacterClass());
540+
} else {
541+
elements.add(this.parseCharacterClassElement());
542+
}
543+
}
544+
return this.finishTerm(new CharacterClass(loc, elements, inverted));
545+
}
546+
525547
private static final List<String> escapeClasses = Arrays.asList("d", "D", "s", "S", "w", "W");
526548

527549
private RegExpTerm parseCharacterClassElement() {
@@ -540,6 +562,9 @@ private RegExpTerm parseCharacterClassElement() {
540562

541563
private RegExpTerm parseCharacterClassAtom() {
542564
SourceLocation loc = new SourceLocation(pos());
565+
if (vFlag && peekChar(true) == '[') {
566+
return parseNestedCharacterClass();
567+
}
543568
char c = this.nextChar();
544569
if (c == '\\') {
545570
if (this.match("b")) return this.finishTerm(new ControlEscape(loc, "\b", 8, "\\b"));

0 commit comments

Comments
 (0)