Skip to content

Commit cb93017

Browse files
authored
Merge pull request #606 from markshannon/python-fix-regex-fp
Python: Fix off-by-one error in regex parsing.
2 parents 6a7b528 + a5b79e9 commit cb93017

File tree

8 files changed

+48
-5
lines changed

8 files changed

+48
-5
lines changed

python/ql/src/semmle/python/regex.qll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -565,10 +565,8 @@ abstract class RegexString extends Expr {
565565
this.sequenceOrQualified(start, end) and not this.isOptionDivider(start-1) and
566566
item_start = start
567567
or
568-
exists(int endp1 | end = endp1-1 |
569-
start = end and not this.item_end(start) and this.isOptionDivider(endp1) and
570-
item_start = start
571-
)
568+
start = end and not this.item_end(start) and this.isOptionDivider(end) and
569+
item_start = start
572570
or
573571
exists(int mid |
574572
this.subalternation(start, mid, _) and
Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
| (?:(?:\n\r?)\|^)( *)\\S | 3 | 12 | (?:\n\r?)\|^ | 3 | 10 | (?:\n\r?) |
22
| (?:(?:\n\r?)\|^)( *)\\S | 3 | 12 | (?:\n\r?)\|^ | 11 | 12 | ^ |
3+
| (?:(?P<n1>^(?:\|x))) | 14 | 16 | \|x | 14 | 14 | |
4+
| (?:(?P<n1>^(?:\|x))) | 14 | 16 | \|x | 15 | 16 | x |
35
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 3 | 9 | [^%]\|^ | 3 | 7 | [^%] |
46
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 3 | 9 | [^%]\|^ | 8 | 9 | ^ |
57
| (?P<name>[\\w]+)\| | 0 | 16 | (?P<name>[\\w]+)\| | 0 | 15 | (?P<name>[\\w]+) |
@@ -8,10 +10,13 @@
810
| (\\033\|~{) | 1 | 8 | \\033\|~{ | 6 | 8 | ~{ |
911
| \\\|\\[\\][123]\|\\{\\} | 0 | 16 | \\\|\\[\\][123]\|\\{\\} | 0 | 11 | \\\|\\[\\][123] |
1012
| \\\|\\[\\][123]\|\\{\\} | 0 | 16 | \\\|\\[\\][123]\|\\{\\} | 12 | 16 | \\{\\} |
13+
| \|x | 0 | 2 | \|x | 0 | 0 | |
14+
| \|x | 0 | 2 | \|x | 1 | 2 | x |
1115
| ^(^y\|^z)(u$\|v$)$ | 2 | 7 | ^y\|^z | 2 | 4 | ^y |
1216
| ^(^y\|^z)(u$\|v$)$ | 2 | 7 | ^y\|^z | 5 | 7 | ^z |
1317
| ^(^y\|^z)(u$\|v$)$ | 9 | 14 | u$\|v$ | 9 | 11 | u$ |
1418
| ^(^y\|^z)(u$\|v$)$ | 9 | 14 | u$\|v$ | 12 | 14 | v$ |
15-
| x\|(?<!\\w)l | 0 | 10 | x\|(?<!\\w)l | 0 | 0 | |
19+
| x\| | 0 | 2 | x\| | 0 | 1 | x |
20+
| x\| | 0 | 2 | x\| | 2 | 2 | |
1621
| x\|(?<!\\w)l | 0 | 10 | x\|(?<!\\w)l | 0 | 1 | x |
1722
| x\|(?<!\\w)l | 0 | 10 | x\|(?<!\\w)l | 2 | 10 | (?<!\\w)l |

python/ql/test/library-tests/regex/Characters.expected

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
| (?:(?:\n\r?)\|^)( *)\\S | 11 | 12 |
2626
| (?:(?:\n\r?)\|^)( *)\\S | 14 | 15 |
2727
| (?:(?:\n\r?)\|^)( *)\\S | 17 | 19 |
28+
| (?:(?P<n1>^(?:\|x))) | 10 | 11 |
29+
| (?:(?P<n1>^(?:\|x))) | 15 | 16 |
2830
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 5 | 6 |
2931
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 8 | 9 |
3032
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 11 | 12 |
@@ -62,6 +64,7 @@
6264
| \\\|\\[\\][123]\|\\{\\} | 9 | 10 |
6365
| \\\|\\[\\][123]\|\\{\\} | 12 | 14 |
6466
| \\\|\\[\\][123]\|\\{\\} | 14 | 16 |
67+
| \|x | 1 | 2 |
6568
| ^(^y\|^z)(u$\|v$)$ | 0 | 1 |
6669
| ^(^y\|^z)(u$\|v$)$ | 2 | 3 |
6770
| ^(^y\|^z)(u$\|v$)$ | 3 | 4 |
@@ -110,6 +113,7 @@
110113
| ax{,3} | 3 | 4 |
111114
| ax{,3} | 4 | 5 |
112115
| ax{,3} | 5 | 6 |
116+
| x\| | 0 | 1 |
113117
| x\|(?<!\\w)l | 0 | 1 |
114118
| x\|(?<!\\w)l | 6 | 8 |
115119
| x\|(?<!\\w)l | 9 | 10 |

python/ql/test/library-tests/regex/FirstLast.expected

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
| (?:(?:\n\r?)\|^)( *)\\S | first | 6 | 7 |
1111
| (?:(?:\n\r?)\|^)( *)\\S | first | 11 | 12 |
1212
| (?:(?:\n\r?)\|^)( *)\\S | last | 17 | 19 |
13+
| (?:(?P<n1>^(?:\|x))) | first | 10 | 11 |
14+
| (?:(?P<n1>^(?:\|x))) | first | 15 | 16 |
15+
| (?:(?P<n1>^(?:\|x))) | last | 15 | 16 |
1316
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | first | 0 | 11 |
1417
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | first | 3 | 7 |
1518
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | first | 8 | 9 |
@@ -46,6 +49,8 @@
4649
| \\\|\\[\\][123]\|\\{\\} | first | 12 | 14 |
4750
| \\\|\\[\\][123]\|\\{\\} | last | 6 | 11 |
4851
| \\\|\\[\\][123]\|\\{\\} | last | 14 | 16 |
52+
| \|x | first | 1 | 2 |
53+
| \|x | last | 1 | 2 |
4954
| ^(^y\|^z)(u$\|v$)$ | first | 0 | 1 |
5055
| ^(^y\|^z)(u$\|v$)$ | first | 2 | 3 |
5156
| ^(^y\|^z)(u$\|v$)$ | first | 3 | 4 |
@@ -82,6 +87,8 @@
8287
| ax{,3} | last | 1 | 2 |
8388
| ax{,3} | last | 1 | 6 |
8489
| ax{,3} | last | 5 | 6 |
90+
| x\| | first | 0 | 1 |
91+
| x\| | last | 0 | 1 |
8592
| x\|(?<!\\w)l | first | 0 | 1 |
8693
| x\|(?<!\\w)l | first | 6 | 8 |
8794
| x\|(?<!\\w)l | first | 9 | 10 |

python/ql/test/library-tests/regex/GroupContents.expected

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
| (?:(?:\n\r?)\|^)( *)\\S | 0 | 13 | (?:(?:\n\r?)\|^) | 3 | 12 | (?:\n\r?)\|^ |
33
| (?:(?:\n\r?)\|^)( *)\\S | 3 | 10 | (?:\n\r?) | 6 | 9 | \n\r? |
44
| (?:(?:\n\r?)\|^)( *)\\S | 13 | 17 | ( *) | 14 | 16 | * |
5+
| (?:(?P<n1>^(?:\|x))) | 0 | 19 | (?:(?P<n1>^(?:\|x))) | 3 | 18 | (?P<n1>^(?:\|x)) |
6+
| (?:(?P<n1>^(?:\|x))) | 3 | 18 | (?P<n1>^(?:\|x)) | 10 | 17 | ^(?:\|x) |
7+
| (?:(?P<n1>^(?:\|x))) | 11 | 17 | (?:\|x) | 14 | 16 | \|x |
58
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 0 | 10 | (?:[^%]\|^) | 3 | 9 | [^%]\|^ |
69
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 14 | 19 | (\\w*) | 15 | 18 | \\w* |
710
| (?P<name>[\\w]+)\| | 0 | 15 | (?P<name>[\\w]+) | 9 | 14 | [\\w]+ |

python/ql/test/library-tests/regex/Regex.expected

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,16 @@
4141
| (?:(?:\n\r?)\|^)( *)\\S | sequence | 3 | 10 |
4242
| (?:(?:\n\r?)\|^)( *)\\S | sequence | 6 | 9 |
4343
| (?:(?:\n\r?)\|^)( *)\\S | sequence | 11 | 12 |
44+
| (?:(?P<n1>^(?:\|x))) | ^ | 10 | 11 |
45+
| (?:(?P<n1>^(?:\|x))) | char | 15 | 16 |
46+
| (?:(?P<n1>^(?:\|x))) | choice | 14 | 16 |
47+
| (?:(?P<n1>^(?:\|x))) | non-empty group | 0 | 19 |
48+
| (?:(?P<n1>^(?:\|x))) | non-empty group | 3 | 18 |
49+
| (?:(?P<n1>^(?:\|x))) | non-empty group | 11 | 17 |
50+
| (?:(?P<n1>^(?:\|x))) | sequence | 0 | 19 |
51+
| (?:(?P<n1>^(?:\|x))) | sequence | 3 | 18 |
52+
| (?:(?P<n1>^(?:\|x))) | sequence | 10 | 17 |
53+
| (?:(?P<n1>^(?:\|x))) | sequence | 15 | 16 |
4454
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | ^ | 8 | 9 |
4555
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | char | 5 | 6 |
4656
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | char | 11 | 12 |
@@ -123,6 +133,9 @@
123133
| \\\|\\[\\][123]\|\\{\\} | choice | 0 | 16 |
124134
| \\\|\\[\\][123]\|\\{\\} | sequence | 0 | 11 |
125135
| \\\|\\[\\][123]\|\\{\\} | sequence | 12 | 16 |
136+
| \|x | char | 1 | 2 |
137+
| \|x | choice | 0 | 2 |
138+
| \|x | sequence | 1 | 2 |
126139
| ^(^y\|^z)(u$\|v$)$ | $ | 10 | 11 |
127140
| ^(^y\|^z)(u$\|v$)$ | $ | 13 | 14 |
128141
| ^(^y\|^z)(u$\|v$)$ | $ | 15 | 16 |
@@ -193,6 +206,9 @@
193206
| ax{,3} | char | 5 | 6 |
194207
| ax{,3} | qualified | 1 | 6 |
195208
| ax{,3} | sequence | 0 | 6 |
209+
| x\| | char | 0 | 1 |
210+
| x\| | choice | 0 | 2 |
211+
| x\| | sequence | 0 | 1 |
196212
| x\|(?<!\\w)l | char | 0 | 1 |
197213
| x\|(?<!\\w)l | char | 6 | 8 |
198214
| x\|(?<!\\w)l | char | 9 | 10 |

python/ql/test/library-tests/regex/test.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,10 @@
5050
re.compile("", flags=re.VERBOSE|re.IGNORECASE)
5151
re.search("", None, re.UNICODE)
5252
x = re.search("", flags=re.UNICODE)
53+
54+
#empty choice
55+
re.compile(r'|x')
56+
re.compile(r'x|')
57+
58+
#Named group with caret and empty choice.
59+
re.compile(r'(?:(?P<n1>^(?:|x)))')

python/ql/test/query-tests/Expressions/Regex/test.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,3 +133,6 @@
133133

134134
# Compiled regular expression marking it as verbose
135135
ODASA_6786 = re.compile(VERBOSE_REGEX, re.VERBOSE)
136+
137+
#Named group with caret and empty choice.
138+
re.compile(r'(?:(?P<n1>^(?:|x)))')

0 commit comments

Comments
 (0)