1616 [10]
1717"""
1818
19+ import ast
1920import logging
2021import os
2122import re
@@ -111,6 +112,62 @@ class JSONPath:
111112 REP_REGEX_PATTERN = re .compile (r"=~\s*/(.*?)/" )
112113 REP_ATTR_PATH = re .compile (r"\.(\w+|'[^']*'|\"[^\"]*\")" )
113114 REP_DOTDOT_BRACKET = re .compile (r"\.(\.#B)" )
115+ REP_BARE_AT = re .compile (r"(?<!\w)@(?![.\[\w])" )
116+
117+ # Safe expression evaluation: allowed AST node types for filter expressions
118+ _ALLOWED_AST_NODES = frozenset (
119+ {
120+ ast .Expression ,
121+ # Boolean operators
122+ ast .BoolOp ,
123+ ast .And ,
124+ ast .Or ,
125+ # Binary operators
126+ ast .BinOp ,
127+ ast .Add ,
128+ ast .Sub ,
129+ ast .Mult ,
130+ ast .Div ,
131+ ast .FloorDiv ,
132+ ast .Mod ,
133+ ast .MatMult ,
134+ # Unary operators
135+ ast .UnaryOp ,
136+ ast .Not ,
137+ ast .UAdd ,
138+ ast .USub ,
139+ # Comparisons
140+ ast .Compare ,
141+ ast .Eq ,
142+ ast .NotEq ,
143+ ast .Lt ,
144+ ast .LtE ,
145+ ast .Gt ,
146+ ast .GtE ,
147+ ast .Is ,
148+ ast .IsNot ,
149+ ast .In ,
150+ ast .NotIn ,
151+ # Values and names
152+ ast .Constant ,
153+ ast .Name ,
154+ # Subscripts
155+ ast .Subscript ,
156+ ast .Slice ,
157+ # Collections
158+ ast .List ,
159+ ast .Tuple ,
160+ ast .Dict ,
161+ # Attribute access (validated separately)
162+ ast .Attribute ,
163+ # Function calls (validated separately)
164+ ast .Call ,
165+ # Context
166+ ast .Load ,
167+ }
168+ )
169+ _ALLOWED_NAMES = frozenset ({"__obj" , "len" , "RegexPattern" })
170+ _ALLOWED_CALLS = frozenset ({"len" , "RegexPattern" })
114171
115172 def __init__ (self , expr : str ):
116173 """Initialize JSONPath with an expression.
@@ -124,23 +181,26 @@ def __init__(self, expr: str):
124181 self .lpath = 0
125182 self .result = []
126183 self .result_type = "VALUE"
127- self .eval_func = eval
184+ self ._custom_eval_func = None
128185
129186 expr = self ._parse_expr (expr )
130187 self .segments = [s for s in expr .split (JSONPath .SEP ) if s ]
131188 self .lpath = len (self .segments )
132189 if logger .isEnabledFor (logging .DEBUG ):
133190 logger .debug (f"segments : { self .segments } " )
134191
135- def parse (self , obj , result_type = "VALUE" , eval_func = eval ):
192+ def parse (self , obj , result_type = "VALUE" , eval_func = None ):
136193 """Parse JSON object using the JSONPath expression.
137194
138195 Args:
139196 obj: JSON object (dict or list) to parse
140197 result_type: Type of result to return
141198 - 'VALUE': Return matched values (default)
142199 - 'PATH': Return JSONPath strings of matched locations
143- eval_func: Custom eval function for filter expressions (default: builtin eval)
200+ eval_func: Custom eval function for filter expressions.
201+ If None (default), uses a safe expression evaluator that
202+ prevents code injection. Pass a custom function only if
203+ you trust the JSONPath expressions being evaluated.
144204
145205 Returns:
146206 List of matched values or paths depending on result_type
@@ -155,7 +215,7 @@ def parse(self, obj, result_type="VALUE", eval_func=eval):
155215 if result_type not in JSONPath .RESULT_TYPE :
156216 raise ValueError (f"result_type must be one of { tuple (JSONPath .RESULT_TYPE .keys ())} " )
157217 self .result_type = result_type
158- self .eval_func = eval_func
218+ self ._custom_eval_func = eval_func
159219
160220 # Reset state for each parse call
161221 self .result = []
@@ -257,6 +317,7 @@ def _put_paren(self, m):
257317
258318 @staticmethod
259319 def _gen_obj (m ):
320+ is_len = m .group (2 ) is not None
260321 content = m .group (1 ) or m .group (2 ) # group 2 is for len()
261322
262323 def repl (m ):
@@ -266,7 +327,10 @@ def repl(m):
266327 return f"['{ g } ']"
267328
268329 content = JSONPath .REP_ATTR_PATH .sub (repl , content )
269- return "__obj" + content
330+ result = "__obj" + content
331+ if is_len :
332+ result = f"len({ result } )"
333+ return result
270334
271335 @staticmethod
272336 def _build_path (path : str , key ) -> str :
@@ -382,6 +446,66 @@ def key_func(t, k):
382446 except TypeError as e :
383447 raise JSONPathTypeError (f"not possible to compare str and int when sorting: { e } " ) from e
384448
449+ @staticmethod
450+ def _validate_filter_expr (expr ):
451+ """Validate that a filter expression only contains safe AST constructs.
452+
453+ Raises ValueError if the expression contains potentially dangerous
454+ constructs like function calls (except len/RegexPattern), attribute
455+ access to dunder names, or disallowed node types.
456+ """
457+ try :
458+ tree = ast .parse (expr , mode = "eval" )
459+ except SyntaxError as e :
460+ raise ValueError (f"Invalid filter expression syntax: { e } " ) from e
461+
462+ for node in ast .walk (tree ):
463+ node_type = type (node )
464+ if node_type not in JSONPath ._ALLOWED_AST_NODES :
465+ raise ValueError (f"Disallowed expression construct: { node_type .__name__ } " )
466+ if node_type is ast .Name and node .id not in JSONPath ._ALLOWED_NAMES :
467+ raise ValueError (f"Disallowed name in filter expression: { node .id } " )
468+ if node_type is ast .Attribute and node .attr .startswith ("_" ):
469+ raise ValueError (f"Disallowed attribute access: { node .attr } " )
470+ if node_type is ast .Call :
471+ if not (isinstance (node .func , ast .Name ) and node .func .id in JSONPath ._ALLOWED_CALLS ):
472+ raise ValueError ("Only len() and RegexPattern() calls are allowed in filter expressions" )
473+
474+ @staticmethod
475+ def _safe_eval_filter (expr , obj ):
476+ """Safely evaluate a filter expression against an object.
477+
478+ Validates the expression AST before evaluation and uses a restricted
479+ namespace with no access to Python builtins (defense-in-depth for
480+ the RCE fix — AST validation is the primary gate, restricted
481+ __builtins__ is the secondary gate).
482+ """
483+ JSONPath ._validate_filter_expr (expr )
484+ # fmt: off
485+ return eval (expr , {"__builtins__" : {}}, {"__obj" : obj , "RegexPattern" : RegexPattern , "len" : len }) # noqa: S307 — safe: AST-validated, builtins stripped
486+ # fmt: on
487+
488+ @staticmethod
489+ def _parse_slice (s ):
490+ """Parse a slice expression string into a slice object.
491+
492+ Args:
493+ s: Slice string like '1:3', '::2', '-1:', etc.
494+
495+ Returns:
496+ A slice object
497+ """
498+ parts = s .split (":" )
499+
500+ def to_int (v ):
501+ v = v .strip ()
502+ return int (v ) if v else None
503+
504+ start = to_int (parts [0 ]) if len (parts ) > 0 else None
505+ stop = to_int (parts [1 ]) if len (parts ) > 1 else None
506+ step = to_int (parts [2 ]) if len (parts ) > 2 else None
507+ return slice (start , stop , step )
508+
385509 def _filter (self , obj , i : int , path : str , step : str ):
386510 """Evaluate filter expression and continue trace if condition is true.
387511
@@ -393,7 +517,10 @@ def _filter(self, obj, i: int, path: str, step: str):
393517 """
394518 r = False
395519 try :
396- r = self .eval_func (step , None , {"__obj" : obj , "RegexPattern" : RegexPattern })
520+ if self ._custom_eval_func is not None :
521+ r = self ._custom_eval_func (step , None , {"__obj" : obj , "RegexPattern" : RegexPattern })
522+ else :
523+ r = self ._safe_eval_filter (step , obj )
397524 except Exception :
398525 pass
399526 if r :
@@ -450,8 +577,8 @@ def _trace(self, obj, i: int, path):
450577
451578 # slice
452579 if isinstance (obj , list ) and JSONPath .REP_SLICE_CONTENT .fullmatch (step ):
453- obj = list (enumerate (obj ))
454- vals = self .eval_func ( f"obj[ { step } ]" )
580+ indexed = list (enumerate (obj ))
581+ vals = indexed [ self ._parse_slice ( step )]
455582 for idx , v in vals :
456583 self ._trace (v , i + 1 , f"{ path } [{ idx } ]" )
457584 return
@@ -470,6 +597,10 @@ def _trace(self, obj, i: int, path):
470597 # filter
471598 step = step [2 :- 1 ]
472599 step = JSONPath .REP_FILTER_CONTENT .sub (self ._gen_obj , step )
600+ # Replace bare @ (current element reference) with __obj
601+ # Must happen after REP_FILTER_CONTENT (handles @.x, @[x])
602+ # and before REP_REGEX_PATTERN (introduces @ as matmul operator)
603+ step = JSONPath .REP_BARE_AT .sub ("__obj" , step )
473604
474605 if "=~" in step :
475606 step = JSONPath .REP_REGEX_PATTERN .sub (r"@ RegexPattern(r'\1')" , step )
0 commit comments