Skip to content

Commit 07e2c11

Browse files
committed
fix(security): prevent RCE via eval() in filter expressions (#21, #20)
- Replace unrestricted eval() with AST-validated safe evaluator that blocks __import__, arbitrary function calls, and dunder attribute access - Restrict eval namespace to {"__builtins__": {}} as defense-in-depth - Replace eval()-based slice parsing with direct string parsing - Support bare @ in filter expressions for primitive array filtering - Fix len() in filters being silently dropped by _gen_obj
1 parent 0b6f508 commit 07e2c11

File tree

2 files changed

+267
-8
lines changed

2 files changed

+267
-8
lines changed

jsonpath/jsonpath.py

Lines changed: 139 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
[10]
1717
"""
1818

19+
import ast
1920
import logging
2021
import os
2122
import re
@@ -111,6 +112,62 @@ class JSONPath:
111112
REP_REGEX_PATTERN = re.compile(r"=~\s*/(.*?)/")
112113
REP_ATTR_PATH = re.compile(r"\.(\w+|'[^']*'|\"[^\"]*\")")
113114
REP_DOTDOT_BRACKET = re.compile(r"\.(\.#B)")
115+
REP_BARE_AT = re.compile(r"(?<!\w)@(?![.\[\w])")
116+
117+
# Safe expression evaluation: allowed AST node types for filter expressions
118+
_ALLOWED_AST_NODES = frozenset(
119+
{
120+
ast.Expression,
121+
# Boolean operators
122+
ast.BoolOp,
123+
ast.And,
124+
ast.Or,
125+
# Binary operators
126+
ast.BinOp,
127+
ast.Add,
128+
ast.Sub,
129+
ast.Mult,
130+
ast.Div,
131+
ast.FloorDiv,
132+
ast.Mod,
133+
ast.MatMult,
134+
# Unary operators
135+
ast.UnaryOp,
136+
ast.Not,
137+
ast.UAdd,
138+
ast.USub,
139+
# Comparisons
140+
ast.Compare,
141+
ast.Eq,
142+
ast.NotEq,
143+
ast.Lt,
144+
ast.LtE,
145+
ast.Gt,
146+
ast.GtE,
147+
ast.Is,
148+
ast.IsNot,
149+
ast.In,
150+
ast.NotIn,
151+
# Values and names
152+
ast.Constant,
153+
ast.Name,
154+
# Subscripts
155+
ast.Subscript,
156+
ast.Slice,
157+
# Collections
158+
ast.List,
159+
ast.Tuple,
160+
ast.Dict,
161+
# Attribute access (validated separately)
162+
ast.Attribute,
163+
# Function calls (validated separately)
164+
ast.Call,
165+
# Context
166+
ast.Load,
167+
}
168+
)
169+
_ALLOWED_NAMES = frozenset({"__obj", "len", "RegexPattern"})
170+
_ALLOWED_CALLS = frozenset({"len", "RegexPattern"})
114171

115172
def __init__(self, expr: str):
116173
"""Initialize JSONPath with an expression.
@@ -124,23 +181,26 @@ def __init__(self, expr: str):
124181
self.lpath = 0
125182
self.result = []
126183
self.result_type = "VALUE"
127-
self.eval_func = eval
184+
self._custom_eval_func = None
128185

129186
expr = self._parse_expr(expr)
130187
self.segments = [s for s in expr.split(JSONPath.SEP) if s]
131188
self.lpath = len(self.segments)
132189
if logger.isEnabledFor(logging.DEBUG):
133190
logger.debug(f"segments : {self.segments}")
134191

135-
def parse(self, obj, result_type="VALUE", eval_func=eval):
192+
def parse(self, obj, result_type="VALUE", eval_func=None):
136193
"""Parse JSON object using the JSONPath expression.
137194
138195
Args:
139196
obj: JSON object (dict or list) to parse
140197
result_type: Type of result to return
141198
- 'VALUE': Return matched values (default)
142199
- 'PATH': Return JSONPath strings of matched locations
143-
eval_func: Custom eval function for filter expressions (default: builtin eval)
200+
eval_func: Custom eval function for filter expressions.
201+
If None (default), uses a safe expression evaluator that
202+
prevents code injection. Pass a custom function only if
203+
you trust the JSONPath expressions being evaluated.
144204
145205
Returns:
146206
List of matched values or paths depending on result_type
@@ -155,7 +215,7 @@ def parse(self, obj, result_type="VALUE", eval_func=eval):
155215
if result_type not in JSONPath.RESULT_TYPE:
156216
raise ValueError(f"result_type must be one of {tuple(JSONPath.RESULT_TYPE.keys())}")
157217
self.result_type = result_type
158-
self.eval_func = eval_func
218+
self._custom_eval_func = eval_func
159219

160220
# Reset state for each parse call
161221
self.result = []
@@ -257,6 +317,7 @@ def _put_paren(self, m):
257317

258318
@staticmethod
259319
def _gen_obj(m):
320+
is_len = m.group(2) is not None
260321
content = m.group(1) or m.group(2) # group 2 is for len()
261322

262323
def repl(m):
@@ -266,7 +327,10 @@ def repl(m):
266327
return f"['{g}']"
267328

268329
content = JSONPath.REP_ATTR_PATH.sub(repl, content)
269-
return "__obj" + content
330+
result = "__obj" + content
331+
if is_len:
332+
result = f"len({result})"
333+
return result
270334

271335
@staticmethod
272336
def _build_path(path: str, key) -> str:
@@ -382,6 +446,66 @@ def key_func(t, k):
382446
except TypeError as e:
383447
raise JSONPathTypeError(f"not possible to compare str and int when sorting: {e}") from e
384448

449+
@staticmethod
450+
def _validate_filter_expr(expr):
451+
"""Validate that a filter expression only contains safe AST constructs.
452+
453+
Raises ValueError if the expression contains potentially dangerous
454+
constructs like function calls (except len/RegexPattern), attribute
455+
access to dunder names, or disallowed node types.
456+
"""
457+
try:
458+
tree = ast.parse(expr, mode="eval")
459+
except SyntaxError as e:
460+
raise ValueError(f"Invalid filter expression syntax: {e}") from e
461+
462+
for node in ast.walk(tree):
463+
node_type = type(node)
464+
if node_type not in JSONPath._ALLOWED_AST_NODES:
465+
raise ValueError(f"Disallowed expression construct: {node_type.__name__}")
466+
if node_type is ast.Name and node.id not in JSONPath._ALLOWED_NAMES:
467+
raise ValueError(f"Disallowed name in filter expression: {node.id}")
468+
if node_type is ast.Attribute and node.attr.startswith("_"):
469+
raise ValueError(f"Disallowed attribute access: {node.attr}")
470+
if node_type is ast.Call:
471+
if not (isinstance(node.func, ast.Name) and node.func.id in JSONPath._ALLOWED_CALLS):
472+
raise ValueError("Only len() and RegexPattern() calls are allowed in filter expressions")
473+
474+
@staticmethod
475+
def _safe_eval_filter(expr, obj):
476+
"""Safely evaluate a filter expression against an object.
477+
478+
Validates the expression AST before evaluation and uses a restricted
479+
namespace with no access to Python builtins (defense-in-depth for
480+
the RCE fix — AST validation is the primary gate, restricted
481+
__builtins__ is the secondary gate).
482+
"""
483+
JSONPath._validate_filter_expr(expr)
484+
# fmt: off
485+
return eval(expr, {"__builtins__": {}}, {"__obj": obj, "RegexPattern": RegexPattern, "len": len}) # noqa: S307 — safe: AST-validated, builtins stripped
486+
# fmt: on
487+
488+
@staticmethod
489+
def _parse_slice(s):
490+
"""Parse a slice expression string into a slice object.
491+
492+
Args:
493+
s: Slice string like '1:3', '::2', '-1:', etc.
494+
495+
Returns:
496+
A slice object
497+
"""
498+
parts = s.split(":")
499+
500+
def to_int(v):
501+
v = v.strip()
502+
return int(v) if v else None
503+
504+
start = to_int(parts[0]) if len(parts) > 0 else None
505+
stop = to_int(parts[1]) if len(parts) > 1 else None
506+
step = to_int(parts[2]) if len(parts) > 2 else None
507+
return slice(start, stop, step)
508+
385509
def _filter(self, obj, i: int, path: str, step: str):
386510
"""Evaluate filter expression and continue trace if condition is true.
387511
@@ -393,7 +517,10 @@ def _filter(self, obj, i: int, path: str, step: str):
393517
"""
394518
r = False
395519
try:
396-
r = self.eval_func(step, None, {"__obj": obj, "RegexPattern": RegexPattern})
520+
if self._custom_eval_func is not None:
521+
r = self._custom_eval_func(step, None, {"__obj": obj, "RegexPattern": RegexPattern})
522+
else:
523+
r = self._safe_eval_filter(step, obj)
397524
except Exception:
398525
pass
399526
if r:
@@ -450,8 +577,8 @@ def _trace(self, obj, i: int, path):
450577

451578
# slice
452579
if isinstance(obj, list) and JSONPath.REP_SLICE_CONTENT.fullmatch(step):
453-
obj = list(enumerate(obj))
454-
vals = self.eval_func(f"obj[{step}]")
580+
indexed = list(enumerate(obj))
581+
vals = indexed[self._parse_slice(step)]
455582
for idx, v in vals:
456583
self._trace(v, i + 1, f"{path}[{idx}]")
457584
return
@@ -470,6 +597,10 @@ def _trace(self, obj, i: int, path):
470597
# filter
471598
step = step[2:-1]
472599
step = JSONPath.REP_FILTER_CONTENT.sub(self._gen_obj, step)
600+
# Replace bare @ (current element reference) with __obj
601+
# Must happen after REP_FILTER_CONTENT (handles @.x, @[x])
602+
# and before REP_REGEX_PATTERN (introduces @ as matmul operator)
603+
step = JSONPath.REP_BARE_AT.sub("__obj", step)
473604

474605
if "=~" in step:
475606
step = JSONPath.REP_REGEX_PATTERN.sub(r"@ RegexPattern(r'\1')", step)

tests/test_issues.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,134 @@
33
from jsonpath import JSONPath, JSONPathTypeError
44

55

6+
class TestIssue21Security:
7+
"""Issue #21: CVE - Remote Code Execution via eval() in filter expressions."""
8+
9+
def test_import_blocked(self):
10+
"""__import__() calls in filter expressions should be blocked."""
11+
data = {"users": [{"name": "Alice"}]}
12+
result = JSONPath('$.users[?(__import__("os"))]').parse(data)
13+
assert result == []
14+
15+
def test_os_system_blocked(self):
16+
"""os.system() calls via __import__ should be blocked."""
17+
data = {"users": [{"name": "Alice"}]}
18+
result = JSONPath('$.users[?(__import__("os").system("echo PWNED"))]').parse(data)
19+
assert result == []
20+
21+
def test_builtins_open_blocked(self):
22+
"""open() calls should be blocked."""
23+
data = {"users": [{"name": "Alice"}]}
24+
result = JSONPath('$.users[?(__import__("builtins").open("/etc/passwd"))]').parse(data)
25+
assert result == []
26+
27+
def test_type_function_blocked(self):
28+
"""type() calls should be blocked."""
29+
data = {"users": [{"name": "Alice"}]}
30+
result = JSONPath("$.users[?(type(__obj) == dict)]").parse(data)
31+
assert result == []
32+
33+
def test_dunder_attribute_blocked(self):
34+
"""Access to __class__, __bases__, etc. should be blocked."""
35+
data = {"users": [{"name": "Alice"}]}
36+
result = JSONPath("$.users[?(@.__class__)]").parse(data)
37+
assert result == []
38+
39+
def test_class_subclasses_blocked(self):
40+
"""Sandbox escape via __class__.__subclasses__ should be blocked."""
41+
data = [1, 2, 3]
42+
result = JSONPath("$[?(@.__class__.__bases__[0].__subclasses__())]").parse(data)
43+
assert result == []
44+
45+
def test_legitimate_filters_still_work(self):
46+
"""Normal filter operations should still work after security fix."""
47+
data = {"items": [{"v": 1}, {"v": 2}, {"v": 3}]}
48+
assert JSONPath("$.items[?(@.v > 1)].v").parse(data) == [2, 3]
49+
assert JSONPath("$.items[?(@.v == 2)].v").parse(data) == [2]
50+
assert JSONPath("$.items[?(@.v >= 1 and @.v <= 2)].v").parse(data) == [1, 2]
51+
52+
def test_len_allowed(self):
53+
"""len() should still be allowed in filter expressions."""
54+
data = {"items": [{"tags": ["a", "b"]}, {"tags": ["c"]}]}
55+
assert JSONPath("$.items[?(len(@.tags) > 1)]").parse(data) == [{"tags": ["a", "b"]}]
56+
57+
def test_regex_still_works(self):
58+
"""Regex matching should still work after security fix."""
59+
data = {"items": [{"name": "apple"}, {"name": "banana"}]}
60+
assert JSONPath("$.items[?(@.name =~ /^app/)].name").parse(data) == ["apple"]
61+
62+
def test_custom_eval_func_opt_in(self):
63+
"""Users can opt in to custom eval via eval_func parameter."""
64+
data = {"items": [{"v": 1}, {"v": 2}]}
65+
jp = JSONPath("$.items[?(@.v > 0)]")
66+
67+
calls = []
68+
69+
def custom_eval_fn(expr, *args, **kwargs): # noqa: S307
70+
calls.append(expr)
71+
return __builtins__["eval"](expr, *args, **kwargs) # noqa: S307
72+
73+
result = jp.parse(data, eval_func=custom_eval_fn)
74+
assert len(result) == 2
75+
assert len(calls) > 0
76+
77+
78+
class TestIssue20BareAtFilter:
79+
"""Issue #20: Filtering primitive arrays with bare @ reference."""
80+
81+
def test_bare_at_string_equality(self):
82+
"""$.tags[?(@ == 'web')] should work on string arrays."""
83+
data = {"tags": ["web", "test2"]}
84+
assert JSONPath("$.tags[?(@ == 'web')]").parse(data) == ["web"]
85+
86+
def test_bare_at_numeric_comparison(self):
87+
"""Bare @ should work for numeric comparisons."""
88+
data = {"nums": [1, 5, 10, 15]}
89+
assert JSONPath("$.nums[?(@ > 5)]").parse(data) == [10, 15]
90+
assert JSONPath("$.nums[?(@ >= 10)]").parse(data) == [10, 15]
91+
assert JSONPath("$.nums[?(@ == 5)]").parse(data) == [5]
92+
93+
def test_bare_at_regex(self):
94+
"""Bare @ should work with regex matching."""
95+
data = {"tags": ["web", "owasp:software_and_data_integrity_failures"]}
96+
assert JSONPath("$.tags[?(@ =~ /owasp:.+/)]").parse(data) == [
97+
"owasp:software_and_data_integrity_failures"
98+
]
99+
100+
def test_bare_at_in_operator(self):
101+
"""Bare @ with 'in' operator."""
102+
data = {"items": ["apple", "banana", "cherry"]}
103+
assert JSONPath("$.items[?('an' in @)]").parse(data) == ["banana"]
104+
105+
def test_bare_at_on_root_array(self):
106+
"""Bare @ on a root-level array."""
107+
data = [1, 2, 3, 4, 5]
108+
assert JSONPath("$[?(@ > 3)]").parse(data) == [4, 5]
109+
110+
def test_bare_at_does_not_affect_dotted_access(self):
111+
"""Dotted @ access should still work as before."""
112+
data = {"items": [{"v": 1}, {"v": 2}, {"v": 3}]}
113+
assert JSONPath("$.items[?(@.v > 1)].v").parse(data) == [2, 3]
114+
115+
116+
class TestIssue19KeyExistence:
117+
"""Issue #19: Key existence check works via truthiness."""
118+
119+
def test_key_existence_with_and(self):
120+
data = [
121+
{"id": "room1", "type": "HotelRoom", "temperature": {"type": "Number"}},
122+
{"id": "room2", "type": "HotelRoom", "co2": {"type": "Number"}},
123+
]
124+
result = JSONPath('$[?(@.type == "HotelRoom" and @.co2)]').parse(data)
125+
assert len(result) == 1
126+
assert result[0]["id"] == "room2"
127+
128+
def test_key_existence_simple(self):
129+
data = [{"a": 1, "b": 2}, {"a": 3}]
130+
result = JSONPath("$[?(@.b)]").parse(data)
131+
assert result == [{"a": 1, "b": 2}]
132+
133+
6134
def test_issue_17():
7135
data = [
8136
{"time": "2023-01-02T20:32:01Z", "user": "user1"},

0 commit comments

Comments
 (0)