From 8771498b380c164a08d86a174032b016418d6955 Mon Sep 17 00:00:00 2001 From: James Prior Date: Thu, 7 Aug 2025 10:46:52 +0100 Subject: [PATCH 1/5] Version 2 WIP [skip ci] --- jsonpath/env.py | 30 +- jsonpath/filter.py | 22 +- jsonpath/lex.py | 96 +-- jsonpath/match.py | 13 + jsonpath/parse.py | 294 ++++---- jsonpath/path.py | 55 +- jsonpath/segments.py | 128 ++++ jsonpath/selectors.py | 657 ++++++----------- jsonpath/stream.py | 15 + jsonpath/token.py | 117 ++-- pyproject.toml | 3 +- tests/test_filter_expression_caching.py | 41 +- tests/test_find_reference.py | 253 +++---- tests/test_lex.py | 818 +++++++++++++++------- tests/test_walk_filter_expression_tree.py | 14 +- 15 files changed, 1348 insertions(+), 1208 deletions(-) create mode 100644 jsonpath/segments.py diff --git a/jsonpath/env.py b/jsonpath/env.py index d951c90..8542b32 100644 --- a/jsonpath/env.py +++ b/jsonpath/env.py @@ -25,9 +25,9 @@ from .filter import UNDEFINED from .filter import VALUE_TYPE_EXPRESSIONS from .filter import FilterExpression +from .filter import FilterQuery from .filter import FunctionExtension from .filter import InfixExpression -from .filter import Path from .fluent_api import Query from .function_extensions import ExpressionType from .function_extensions import FilterFunction @@ -40,8 +40,8 @@ from .path import JSONPath from .stream import TokenStream from .token import TOKEN_EOF -from .token import TOKEN_FAKE_ROOT from .token import TOKEN_INTERSECTION +from .token import TOKEN_PSEUDO_ROOT from .token import TOKEN_UNION from .token import Token @@ -92,7 +92,7 @@ class attributes `root_token`, `self_token` and `filter_context_token`. ## Class attributes Attributes: - fake_root_token (str): The pattern used to select a "fake" root node, one level + pseudo_root_token (str): The pattern used to select a "fake" root node, one level above the real root node. filter_context_token (str): The pattern used to select extra filter context data. Defaults to `"_"`. @@ -117,7 +117,7 @@ class attributes `root_token`, `self_token` and `filter_context_token`. # These should be unescaped strings. `re.escape` will be called # on them automatically when compiling lexer rules. - fake_root_token = "^" + pseudo_root_token = "^" filter_context_token = "_" intersection_token = "&" key_token = "#" @@ -180,9 +180,9 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 """ tokens = self.lexer.tokenize(path) stream = TokenStream(tokens) - fake_root = stream.current.kind == TOKEN_FAKE_ROOT + pseudo_root = stream.current.kind == TOKEN_PSEUDO_ROOT _path: Union[JSONPath, CompoundJSONPath] = JSONPath( - env=self, selectors=self.parser.parse(stream), fake_root=fake_root + env=self, segments=self.parser.parse(stream), pseudo_root=pseudo_root ) if stream.current.kind != TOKEN_EOF: @@ -197,22 +197,22 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 if stream.current.kind == TOKEN_UNION: stream.next_token() - fake_root = stream.current.kind == TOKEN_FAKE_ROOT + pseudo_root = stream.current.kind == TOKEN_PSEUDO_ROOT _path = _path.union( JSONPath( env=self, - selectors=self.parser.parse(stream), - fake_root=fake_root, + segments=self.parser.parse(stream), + pseudo_root=pseudo_root, ) ) elif stream.current.kind == TOKEN_INTERSECTION: stream.next_token() - fake_root = stream.current.kind == TOKEN_FAKE_ROOT + pseudo_root = stream.current.kind == TOKEN_PSEUDO_ROOT _path = _path.intersection( JSONPath( env=self, - selectors=self.parser.parse(stream), - fake_root=fake_root, + segments=self.parser.parse(stream), + pseudo_root=pseudo_root, ) ) else: # pragma: no cover @@ -456,7 +456,7 @@ def check_well_typedness( if typ == ExpressionType.VALUE: if not ( isinstance(arg, VALUE_TYPE_EXPRESSIONS) - or (isinstance(arg, Path) and arg.path.singular_query()) + or (isinstance(arg, FilterQuery) and arg.path.singular_query()) or (self._function_return_type(arg) == ExpressionType.VALUE) ): raise JSONPathTypeError( @@ -464,13 +464,13 @@ def check_well_typedness( token=token, ) elif typ == ExpressionType.LOGICAL: - if not isinstance(arg, (Path, InfixExpression)): + if not isinstance(arg, (FilterQuery, InfixExpression)): raise JSONPathTypeError( f"{token.value}() argument {idx} must be of LogicalType", token=token, ) elif typ == ExpressionType.NODES and not ( - isinstance(arg, Path) + isinstance(arg, FilterQuery) or self._function_return_type(arg) == ExpressionType.NODES ): raise JSONPathTypeError( diff --git a/jsonpath/filter.py b/jsonpath/filter.py index 00cbca4..9452273 100644 --- a/jsonpath/filter.py +++ b/jsonpath/filter.py @@ -23,7 +23,6 @@ from .function_extensions import FilterFunction from .match import NodeList from .selectors import Filter as FilterSelector -from .selectors import ListSelector from .serialize import canonical_string if TYPE_CHECKING: @@ -494,7 +493,7 @@ def set_children(self, children: List[FilterExpression]) -> None: self._expr.set_children(children) -class Path(FilterExpression, ABC): +class FilterQuery(FilterExpression, ABC): """Base expression for all _sub paths_ found in filter expressions.""" __slots__ = ("path",) @@ -504,17 +503,14 @@ def __init__(self, path: JSONPath) -> None: super().__init__() def __eq__(self, other: object) -> bool: - return isinstance(other, Path) and str(self) == str(other) + return isinstance(other, FilterQuery) and str(self) == str(other) def children(self) -> List[FilterExpression]: _children: List[FilterExpression] = [] - for segment in self.path.selectors: - if isinstance(segment, ListSelector): - _children.extend( - selector.expression - for selector in segment.items - if isinstance(selector, FilterSelector) - ) + for segment in self.path.segments: + for selector in segment.selectors: + if isinstance(selector, FilterSelector): + _children.append(selector.expression) return _children def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG002 @@ -522,7 +518,7 @@ def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG00 return -class SelfPath(Path): +class RelativeFilterQuery(FilterQuery): """A JSONPath starting at the current node.""" __slots__ = () @@ -561,7 +557,7 @@ async def evaluate_async(self, context: FilterContext) -> object: ) -class RootPath(Path): +class RootFilterQuery(FilterQuery): """A JSONPath starting at the root node.""" __slots__ = () @@ -584,7 +580,7 @@ async def evaluate_async(self, context: FilterContext) -> object: ) -class FilterContextPath(Path): +class FilterContextPath(FilterQuery): """A JSONPath starting at the root of any extra context data.""" __slots__ = () diff --git a/jsonpath/lex.py b/jsonpath/lex.py index 4c4422d..837f6dc 100644 --- a/jsonpath/lex.py +++ b/jsonpath/lex.py @@ -10,41 +10,40 @@ from .exceptions import JSONPathSyntaxError from .token import TOKEN_AND -from .token import TOKEN_BARE_PROPERTY +from .token import TOKEN_COLON from .token import TOKEN_COMMA from .token import TOKEN_CONTAINS from .token import TOKEN_DDOT -from .token import TOKEN_DOT_PROPERTY +from .token import TOKEN_DOT from .token import TOKEN_DOUBLE_QUOTE_STRING from .token import TOKEN_EQ -from .token import TOKEN_FAKE_ROOT +from .token import TOKEN_ERROR from .token import TOKEN_FALSE from .token import TOKEN_FILTER from .token import TOKEN_FILTER_CONTEXT from .token import TOKEN_FLOAT -from .token import TOKEN_FUNCTION from .token import TOKEN_GE from .token import TOKEN_GT -from .token import TOKEN_ILLEGAL from .token import TOKEN_IN from .token import TOKEN_INT from .token import TOKEN_INTERSECTION from .token import TOKEN_KEY from .token import TOKEN_KEYS +from .token import TOKEN_KEYS_FILTER +from .token import TOKEN_LBRACKET from .token import TOKEN_LE from .token import TOKEN_LG -from .token import TOKEN_LIST_SLICE -from .token import TOKEN_LIST_START from .token import TOKEN_LPAREN from .token import TOKEN_LT from .token import TOKEN_MISSING +from .token import TOKEN_NAME from .token import TOKEN_NE from .token import TOKEN_NIL from .token import TOKEN_NONE from .token import TOKEN_NOT from .token import TOKEN_NULL from .token import TOKEN_OR -from .token import TOKEN_PROPERTY +from .token import TOKEN_PSEUDO_ROOT from .token import TOKEN_RBRACKET from .token import TOKEN_RE from .token import TOKEN_RE_FLAGS @@ -53,13 +52,10 @@ from .token import TOKEN_RPAREN from .token import TOKEN_SELF from .token import TOKEN_SINGLE_QUOTE_STRING -from .token import TOKEN_SKIP -from .token import TOKEN_SLICE_START -from .token import TOKEN_SLICE_STEP -from .token import TOKEN_SLICE_STOP from .token import TOKEN_TRUE from .token import TOKEN_UNDEFINED from .token import TOKEN_UNION +from .token import TOKEN_WHITESPACE from .token import TOKEN_WILD from .token import Token @@ -86,8 +82,9 @@ class attributes. Then setting `lexer_class` on a `JSONPathEnvironment`. """ key_pattern = r"[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*" + name_pattern = key_pattern # XXX: - # `not` or ! + # ! or `not` logical_not_pattern = r"(?:not\b)|!" # && or `and` @@ -102,28 +99,17 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: self.double_quote_pattern = r'"(?P(?:(?!(?(?:(?!(?{self.key_pattern})" - - self.slice_list_pattern = ( - r"(?P\-?\d*)\s*" - r":\s*(?P\-?\d*)\s*" - r"(?::\s*(?P\-?\d*))?" - ) - + # TODO: separate re literal tokens # /pattern/ or /pattern/flags self.re_pattern = r"/(?P.+?)/(?P[aims]*)" - # func( - self.function_pattern = r"(?P[a-z][a-z_0-9]+)\(\s*" - self.rules = self.compile_rules() def compile_rules(self) -> Pattern[str]: """Prepare regular expression rules.""" env_tokens = [ (TOKEN_ROOT, self.env.root_token), - (TOKEN_FAKE_ROOT, self.env.fake_root_token), + (TOKEN_PSEUDO_ROOT, self.env.pseudo_root_token), (TOKEN_SELF, self.env.self_token), (TOKEN_KEY, self.env.key_token), (TOKEN_UNION, self.env.union_token), @@ -136,12 +122,10 @@ def compile_rules(self) -> Pattern[str]: (TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern), (TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern), (TOKEN_RE_PATTERN, self.re_pattern), - (TOKEN_LIST_SLICE, self.slice_list_pattern), - (TOKEN_FUNCTION, self.function_pattern), - (TOKEN_DOT_PROPERTY, self.dot_property_pattern), (TOKEN_FLOAT, r"-?\d+\.\d*(?:[eE][+-]?\d+)?"), (TOKEN_INT, r"-?\d+(?P[eE][+\-]?\d+)?\b"), (TOKEN_DDOT, r"\.\."), + (TOKEN_DOT, r"\."), (TOKEN_AND, self.logical_and_pattern), (TOKEN_OR, self.logical_or_pattern), *[ @@ -153,6 +137,7 @@ def compile_rules(self) -> Pattern[str]: ], (TOKEN_WILD, r"\*"), (TOKEN_FILTER, r"\?"), + (TOKEN_KEYS_FILTER, r"~\?"), # TODO: get from env (TOKEN_IN, r"in\b"), (TOKEN_TRUE, r"[Tt]rue\b"), (TOKEN_FALSE, r"[Ff]alse\b"), @@ -162,9 +147,10 @@ def compile_rules(self) -> Pattern[str]: (TOKEN_CONTAINS, r"contains\b"), (TOKEN_UNDEFINED, r"undefined\b"), (TOKEN_MISSING, r"missing\b"), - (TOKEN_LIST_START, r"\["), + (TOKEN_LBRACKET, r"\["), (TOKEN_RBRACKET, r"]"), (TOKEN_COMMA, r","), + (TOKEN_COLON, r":"), (TOKEN_EQ, r"=="), (TOKEN_NE, r"!="), (TOKEN_LG, r"<>"), @@ -173,12 +159,12 @@ def compile_rules(self) -> Pattern[str]: (TOKEN_RE, r"=~"), (TOKEN_LT, r"<"), (TOKEN_GT, r">"), - (TOKEN_NOT, self.logical_not_pattern), - (TOKEN_BARE_PROPERTY, self.key_pattern), + (TOKEN_NOT, self.logical_not_pattern), # Must go after "!=" + (TOKEN_NAME, self.key_pattern), # Must go after reserved words (TOKEN_LPAREN, r"\("), (TOKEN_RPAREN, r"\)"), - (TOKEN_SKIP, r"[ \n\t\r\.]+"), - (TOKEN_ILLEGAL, r"."), + (TOKEN_WHITESPACE, r"[ \n\t\r]+"), + (TOKEN_ERROR, r"."), ] return re.compile( @@ -194,35 +180,7 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912 kind = match.lastgroup assert kind is not None - if kind == TOKEN_DOT_PROPERTY: - yield _token( - kind=TOKEN_PROPERTY, - value=match.group("G_PROP"), - index=match.start("G_PROP"), - ) - elif kind == TOKEN_BARE_PROPERTY: - yield _token( - kind=TOKEN_BARE_PROPERTY, - value=match.group(), - index=match.start(), - ) - elif kind == TOKEN_LIST_SLICE: - yield _token( - kind=TOKEN_SLICE_START, - value=match.group("G_LSLICE_START"), - index=match.start("G_LSLICE_START"), - ) - yield _token( - kind=TOKEN_SLICE_STOP, - value=match.group("G_LSLICE_STOP"), - index=match.start("G_LSLICE_STOP"), - ) - yield _token( - kind=TOKEN_SLICE_STEP, - value=match.group("G_LSLICE_STEP") or "", - index=match.start("G_LSLICE_STEP"), - ) - elif kind == TOKEN_DOUBLE_QUOTE_STRING: + if kind == TOKEN_DOUBLE_QUOTE_STRING: yield _token( kind=TOKEN_DOUBLE_QUOTE_STRING, value=match.group("G_DQUOTE"), @@ -264,19 +222,11 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912 value=match.group(), index=match.start(), ) - elif kind == TOKEN_FUNCTION: - yield _token( - kind=TOKEN_FUNCTION, - value=match.group("G_FUNC"), - index=match.start("G_FUNC"), - ) - elif kind == TOKEN_SKIP: - continue - elif kind == TOKEN_ILLEGAL: + elif kind == TOKEN_ERROR: raise JSONPathSyntaxError( f"unexpected token {match.group()!r}", token=_token( - TOKEN_ILLEGAL, + TOKEN_ERROR, value=match.group(), index=match.start(), ), diff --git a/jsonpath/match.py b/jsonpath/match.py index dea2fee..964dff4 100644 --- a/jsonpath/match.py +++ b/jsonpath/match.py @@ -11,6 +11,7 @@ from typing import Union from .pointer import JSONPointer +from .serialize import canonical_string FilterContextVars = Mapping[str, Any] PathPart = Union[int, str] @@ -69,6 +70,18 @@ def add_child(self, *children: JSONPathMatch) -> None: """Append one or more children to this match.""" self.children.extend(children) + def new_child(self, obj: object, key: Union[int, str]) -> JSONPathMatch: + """Return a new JSONPathMatch instance with this instance as its parent.""" + return self.__class__( + filter_context=self.filter_context(), + obj=obj, + parent=self, + parts=self.parts + (key,), + path=self.path + + f"[{canonical_string(key) if isinstance(key, str) else key}]", + root=self.root, + ) + def filter_context(self) -> FilterContextVars: """Return filter context data for this match.""" return self._filter_context diff --git a/jsonpath/parse.py b/jsonpath/parse.py index eaef7fc..82ccea5 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -8,9 +8,9 @@ from typing import Callable from typing import Dict from typing import Iterable +from typing import Iterator from typing import List from typing import Optional -from typing import Union from jsonpath.function_extensions.filter_function import ExpressionType from jsonpath.function_extensions.filter_function import FilterFunction @@ -25,6 +25,7 @@ from .filter import BooleanExpression from .filter import FilterContextPath from .filter import FilterExpression +from .filter import FilterQuery from .filter import FloatLiteral from .filter import FunctionExtension from .filter import InfixExpression @@ -32,31 +33,31 @@ from .filter import ListLiteral from .filter import Literal from .filter import Nil -from .filter import Path from .filter import PrefixExpression from .filter import RegexLiteral -from .filter import RootPath -from .filter import SelfPath +from .filter import RelativeFilterQuery +from .filter import RootFilterQuery from .filter import StringLiteral from .path import JSONPath +from .segments import JSONPathChildSegment +from .segments import JSONPathRecursiveDescentSegment +from .segments import JSONPathSegment from .selectors import Filter from .selectors import IndexSelector from .selectors import JSONPathSelector from .selectors import KeysSelector -from .selectors import ListSelector from .selectors import PropertySelector -from .selectors import RecursiveDescentSelector from .selectors import SliceSelector from .selectors import WildSelector from .token import TOKEN_AND -from .token import TOKEN_BARE_PROPERTY +from .token import TOKEN_COLON from .token import TOKEN_COMMA from .token import TOKEN_CONTAINS from .token import TOKEN_DDOT +from .token import TOKEN_DOT from .token import TOKEN_DOUBLE_QUOTE_STRING from .token import TOKEN_EOF from .token import TOKEN_EQ -from .token import TOKEN_FAKE_ROOT from .token import TOKEN_FALSE from .token import TOKEN_FILTER from .token import TOKEN_FILTER_CONTEXT @@ -69,19 +70,20 @@ from .token import TOKEN_INTERSECTION from .token import TOKEN_KEY from .token import TOKEN_KEYS +from .token import TOKEN_LBRACKET from .token import TOKEN_LE from .token import TOKEN_LG -from .token import TOKEN_LIST_START from .token import TOKEN_LPAREN from .token import TOKEN_LT from .token import TOKEN_MISSING +from .token import TOKEN_NAME from .token import TOKEN_NE from .token import TOKEN_NIL from .token import TOKEN_NONE from .token import TOKEN_NOT from .token import TOKEN_NULL from .token import TOKEN_OR -from .token import TOKEN_PROPERTY +from .token import TOKEN_PSEUDO_ROOT from .token import TOKEN_RBRACKET from .token import TOKEN_RE from .token import TOKEN_RE_FLAGS @@ -90,9 +92,6 @@ from .token import TOKEN_RPAREN from .token import TOKEN_SELF from .token import TOKEN_SINGLE_QUOTE_STRING -from .token import TOKEN_SLICE_START -from .token import TOKEN_SLICE_STEP -from .token import TOKEN_SLICE_STOP from .token import TOKEN_TRUE from .token import TOKEN_UNDEFINED from .token import TOKEN_UNION @@ -145,7 +144,6 @@ class Parser: """A JSONPath parser bound to a JSONPathEnvironment.""" PRECEDENCE_LOWEST = 1 - PRECEDENCE_LOGICALRIGHT = 2 PRECEDENCE_LOGICAL_OR = 3 PRECEDENCE_LOGICAL_AND = 4 PRECEDENCE_RELATIONAL = 5 @@ -236,14 +234,13 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: self.token_map: Dict[str, Callable[[TokenStream], FilterExpression]] = { TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal, - TOKEN_FAKE_ROOT: self.parse_root_path, + TOKEN_PSEUDO_ROOT: self.parse_root_path, TOKEN_FALSE: self.parse_boolean, TOKEN_FILTER_CONTEXT: self.parse_filter_context_path, TOKEN_FLOAT: self.parse_float_literal, TOKEN_FUNCTION: self.parse_function_extension, TOKEN_INT: self.parse_integer_literal, TOKEN_KEY: self.parse_current_key, - TOKEN_LIST_START: self.parse_list_literal, TOKEN_LPAREN: self.parse_grouped_expression, TOKEN_MISSING: self.parse_undefined, TOKEN_NIL: self.parse_nil, @@ -274,7 +271,7 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: str, Callable[[TokenStream], FilterExpression] ] = { TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal, - TOKEN_FAKE_ROOT: self.parse_root_path, + TOKEN_PSEUDO_ROOT: self.parse_root_path, TOKEN_FALSE: self.parse_boolean, TOKEN_FILTER_CONTEXT: self.parse_filter_context_path, TOKEN_FLOAT: self.parse_float_literal, @@ -290,10 +287,11 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: TOKEN_TRUE: self.parse_boolean, } - def parse(self, stream: TokenStream) -> Iterable[JSONPathSelector]: + def parse(self, stream: TokenStream) -> Iterator[JSONPathSegment]: """Parse a JSONPath from a stream of tokens.""" - if stream.current.kind in {TOKEN_ROOT, TOKEN_FAKE_ROOT}: + if stream.current.kind in {TOKEN_ROOT, TOKEN_PSEUDO_ROOT}: stream.next_token() + yield from self.parse_path(stream, in_filter=False) if stream.current.kind not in (TOKEN_EOF, TOKEN_INTERSECTION, TOKEN_UNION): @@ -307,37 +305,34 @@ def parse_path( stream: TokenStream, *, in_filter: bool = False, - ) -> Iterable[JSONPathSelector]: + ) -> Iterable[JSONPathSegment]: """Parse a top-level JSONPath, or one that is nested in a filter.""" while True: - if stream.current.kind in (TOKEN_PROPERTY, TOKEN_BARE_PROPERTY): - yield PropertySelector( - env=self.env, - token=stream.current, - name=stream.current.value, - shorthand=True, - ) - elif stream.current.kind == TOKEN_SLICE_START: - yield self.parse_slice(stream) - elif stream.current.kind == TOKEN_WILD: - yield WildSelector( - env=self.env, - token=stream.current, - shorthand=True, + if stream.current.kind == TOKEN_DDOT: + token = stream.next_token() + selectors = self.parse_selectors(stream) + if not selectors: + raise JSONPathSyntaxError( + "missing selector for recursive descent segment", + token=stream.current, + ) + yield JSONPathRecursiveDescentSegment( + env=self.env, token=token, selectors=selectors ) - elif stream.current.kind == TOKEN_KEYS: - yield KeysSelector( - env=self.env, - token=stream.current, - shorthand=True, + elif ( + stream.skip(TOKEN_DOT) + and stream.current.kind + in { + TOKEN_NAME, + TOKEN_WILD, + TOKEN_KEYS, + } + ) or stream.current.kind == TOKEN_LBRACKET: + token = stream.current + selectors = self.parse_selectors(stream) + yield JSONPathChildSegment( + env=self.env, token=token, selectors=selectors ) - elif stream.current.kind == TOKEN_DDOT: - yield RecursiveDescentSelector( - env=self.env, - token=stream.current, - ) - elif stream.current.kind == TOKEN_LIST_START: - yield self.parse_selector_list(stream) else: if in_filter: stream.push(stream.current) @@ -345,95 +340,112 @@ def parse_path( stream.next_token() + def parse_selectors(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: + if stream.current.kind == TOKEN_NAME: + return ( + PropertySelector( + env=self.env, + token=stream.current, + name=stream.current.value, + shorthand=True, + ), + ) + + if stream.current.kind == TOKEN_WILD: + return (WildSelector(env=self.env, token=stream.current, shorthand=True),) + + if stream.current.kind == TOKEN_KEYS: + return ( + KeysSelector( + env=self.env, + token=stream.current, + shorthand=True, + ), + ) + + if stream.current.kind == TOKEN_LBRACKET: + return tuple(self.parse_bracketed_selection(stream)) + + return () + def parse_slice(self, stream: TokenStream) -> SliceSelector: """Parse a slice JSONPath expression from a stream of tokens.""" - start_token = stream.next_token() - stream.expect(TOKEN_SLICE_STOP) - stop_token = stream.next_token() - stream.expect(TOKEN_SLICE_STEP) - step_token = stream.current - - if not start_token.value: - start: Optional[int] = None - else: - start = int(start_token.value) - - if not stop_token.value: - stop: Optional[int] = None - else: - stop = int(stop_token.value) - - if not step_token.value: - step: Optional[int] = None - else: - step = int(step_token.value) + tok = stream.current + start: Optional[int] = None + stop: Optional[int] = None + step: Optional[int] = None + + def _maybe_index(token: Token) -> bool: + if token.kind == TOKEN_INT: + if len(token.value) > 1 and token.value.startswith(("0", "-0")): + raise JSONPathSyntaxError( + f"invalid index {token.value!r}", token=token + ) + return True + return False + + # 1: or : + if _maybe_index(stream.current): + start = int(stream.current.value) + stream.next_token() + + stream.expect(TOKEN_COLON) + stream.next_token() + + # 1 or 1: or : or ? + if _maybe_index(stream.current): + stop = int(stream.current.value) + stream.next_token() + if stream.current.kind == TOKEN_COLON: + stream.next_token() + elif stream.current.kind == TOKEN_COLON: + stream.expect(TOKEN_COLON) + stream.next_token() + + # 1 or ? + if _maybe_index(stream.current): + step = int(stream.current.value) + stream.next_token() + + stream.push(stream.current) return SliceSelector( env=self.env, - token=start_token, + token=tok, start=start, stop=stop, step=step, ) - def parse_selector_list(self, stream: TokenStream) -> ListSelector: # noqa: PLR0912 - """Parse a comma separated list JSONPath selectors from a stream of tokens.""" - tok = stream.next_token() - list_items: List[ - Union[ - IndexSelector, - KeysSelector, - PropertySelector, - SliceSelector, - WildSelector, - Filter, - ] - ] = [] + def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelector]: # noqa: PLR0912 + """Parse a comma separated list of JSONPath selectors.""" + tok = stream.next_token() # Skip LBRACKET + selectors: List[JSONPathSelector] = [] while stream.current.kind != TOKEN_RBRACKET: if stream.current.kind == TOKEN_INT: - if ( - len(stream.current.value) > 1 - and stream.current.value.startswith("0") - ) or stream.current.value.startswith("-0"): - raise JSONPathSyntaxError( - "leading zero in index selector", token=stream.current + if stream.peek.kind == TOKEN_COLON: + selectors.append(self.parse_slice(stream)) + else: + if ( + len(stream.current.value) > 1 + and stream.current.value.startswith("0") + ) or stream.current.value.startswith("-0"): + raise JSONPathSyntaxError( + "leading zero in index selector", token=stream.current + ) + selectors.append( + IndexSelector( + env=self.env, + token=stream.current, + index=int(stream.current.value), + ) ) - list_items.append( - IndexSelector( - env=self.env, - token=stream.current, - index=int(stream.current.value), - ) - ) - elif stream.current.kind == TOKEN_BARE_PROPERTY: - list_items.append( - PropertySelector( - env=self.env, - token=stream.current, - name=stream.current.value, - shorthand=False, - ), - ) - elif stream.current.kind == TOKEN_KEYS: - list_items.append( - KeysSelector( - env=self.env, - token=stream.current, - shorthand=False, - ) - ) elif stream.current.kind in ( TOKEN_DOUBLE_QUOTE_STRING, TOKEN_SINGLE_QUOTE_STRING, ): - if self.RE_INVALID_NAME_SELECTOR.search(stream.current.value): - raise JSONPathSyntaxError( - f"invalid name selector {stream.current.value!r}", - token=stream.current, - ) - - list_items.append( + selectors.append( PropertySelector( env=self.env, token=stream.current, @@ -441,10 +453,10 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector: # noqa: PLR shorthand=False, ), ) - elif stream.current.kind == TOKEN_SLICE_START: - list_items.append(self.parse_slice(stream)) + elif stream.current.kind == TOKEN_COLON: + selectors.append(self.parse_slice(stream)) elif stream.current.kind == TOKEN_WILD: - list_items.append( + selectors.append( WildSelector( env=self.env, token=stream.current, @@ -452,7 +464,7 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector: # noqa: PLR ) ) elif stream.current.kind == TOKEN_FILTER: - list_items.append(self.parse_filter(stream)) + selectors.append(self.parse_filter_selector(stream)) elif stream.current.kind == TOKEN_EOF: raise JSONPathSyntaxError( "unexpected end of query", token=stream.current @@ -470,26 +482,20 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector: # noqa: PLR ) if stream.peek.kind != TOKEN_RBRACKET: - # TODO: error message .. expected a comma or logical operator stream.expect_peek(TOKEN_COMMA) stream.next_token() - - if stream.peek.kind == TOKEN_RBRACKET: - raise JSONPathSyntaxError( - "unexpected trailing comma", - token=stream.peek, - ) + stream.expect_peek_not(TOKEN_RBRACKET, "unexpected trailing comma") stream.next_token() - if not list_items: + if not selectors: raise JSONPathSyntaxError("empty bracketed segment", token=tok) - return ListSelector(env=self.env, token=tok, items=list_items) + return selectors - def parse_filter(self, stream: TokenStream) -> Filter: + def parse_filter_selector(self, stream: TokenStream) -> Filter: tok = stream.next_token() - expr = self.parse_filter_selector(stream) + expr = self.parse_filter_expression(stream) if self.env.well_typed and isinstance(expr, FunctionExtension): func = self.env.function_extensions.get(expr.name) @@ -537,7 +543,9 @@ def parse_prefix_expression(self, stream: TokenStream) -> FilterExpression: assert tok.kind == TOKEN_NOT return PrefixExpression( operator="!", - right=self.parse_filter_selector(stream, precedence=self.PRECEDENCE_PREFIX), + right=self.parse_filter_expression( + stream, precedence=self.PRECEDENCE_PREFIX + ), ) def parse_infix_expression( @@ -545,7 +553,7 @@ def parse_infix_expression( ) -> FilterExpression: tok = stream.next_token() precedence = self.PRECEDENCES.get(tok.kind, self.PRECEDENCE_LOWEST) - right = self.parse_filter_selector(stream, precedence) + right = self.parse_filter_expression(stream, precedence) operator = self.BINARY_OPERATORS[tok.kind] if self.env.well_typed and operator in self.COMPARISON_OPERATORS: @@ -570,7 +578,7 @@ def parse_infix_expression( def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression: stream.next_token() - expr = self.parse_filter_selector(stream) + expr = self.parse_filter_expression(stream) stream.next_token() while stream.current.kind != TOKEN_RPAREN: @@ -592,18 +600,18 @@ def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression: def parse_root_path(self, stream: TokenStream) -> FilterExpression: root = stream.next_token() - return RootPath( + return RootFilterQuery( JSONPath( env=self.env, - selectors=self.parse_path(stream, in_filter=True), - fake_root=root.kind == TOKEN_FAKE_ROOT, + segments=self.parse_path(stream, in_filter=True), + pseudo_root=root.kind == TOKEN_PSEUDO_ROOT, ) ) def parse_self_path(self, stream: TokenStream) -> FilterExpression: stream.next_token() - return SelfPath( - JSONPath(env=self.env, selectors=self.parse_path(stream, in_filter=True)) + return RelativeFilterQuery( + JSONPath(env=self.env, segments=self.parse_path(stream, in_filter=True)) ) def parse_current_key(self, _: TokenStream) -> FilterExpression: @@ -612,7 +620,7 @@ def parse_current_key(self, _: TokenStream) -> FilterExpression: def parse_filter_context_path(self, stream: TokenStream) -> FilterExpression: stream.next_token() return FilterContextPath( - JSONPath(env=self.env, selectors=self.parse_path(stream, in_filter=True)) + JSONPath(env=self.env, segments=self.parse_path(stream, in_filter=True)) ) def parse_regex(self, stream: TokenStream) -> FilterExpression: @@ -680,7 +688,7 @@ def parse_function_extension(self, stream: TokenStream) -> FilterExpression: self.env.validate_function_extension_signature(tok, function_arguments), ) - def parse_filter_selector( + def parse_filter_expression( self, stream: TokenStream, precedence: int = PRECEDENCE_LOWEST ) -> FilterExpression: try: @@ -728,7 +736,7 @@ def _decode_string_literal(self, token: Token) -> str: def _raise_for_non_comparable_function( self, expr: FilterExpression, token: Token ) -> None: - if isinstance(expr, Path) and not expr.path.singular_query(): + if isinstance(expr, FilterQuery) and not expr.path.singular_query(): raise JSONPathTypeError("non-singular query is not comparable", token=token) if isinstance(expr, FunctionExtension): diff --git a/jsonpath/path.py b/jsonpath/path.py index 9cf3d98..2b9cf70 100644 --- a/jsonpath/path.py +++ b/jsonpath/path.py @@ -18,15 +18,15 @@ from jsonpath.fluent_api import Query from jsonpath.match import FilterContextVars from jsonpath.match import JSONPathMatch +from jsonpath.segments import JSONPathRecursiveDescentSegment from jsonpath.selectors import IndexSelector -from jsonpath.selectors import ListSelector from jsonpath.selectors import PropertySelector if TYPE_CHECKING: from io import IOBase from .env import JSONPathEnvironment - from .selectors import JSONPathSelector + from .segments import JSONPathSegment class JSONPath: @@ -34,9 +34,9 @@ class JSONPath: Arguments: env: The `JSONPathEnvironment` this path is bound to. - selectors: An iterable of `JSONPathSelector` objects, as generated by + segments: An iterable of `JSONPathSegment` instances, as generated by a `Parser`. - fake_root: Indicates if target JSON values should be wrapped in a single- + pseudo_root: Indicates if target JSON values should be wrapped in a single- element array, so as to make the target root value selectable. @@ -45,29 +45,27 @@ class JSONPath: selectors: The `JSONPathSelector` instances that make up this path. """ - __slots__ = ("env", "fake_root", "selectors") + __slots__ = ("env", "pseudo_root", "segments") def __init__( self, *, env: JSONPathEnvironment, - selectors: Iterable[JSONPathSelector], - fake_root: bool = False, + segments: Iterable[JSONPathSegment], + pseudo_root: bool = False, ) -> None: self.env = env - self.selectors = tuple(selectors) - self.fake_root = fake_root + self.segments = tuple(segments) + self.pseudo_root = pseudo_root def __str__(self) -> str: - return self.env.root_token + "".join( - str(selector) for selector in self.selectors - ) + return self.env.root_token + "".join(str(segment) for segment in self.segments) def __eq__(self, __value: object) -> bool: - return isinstance(__value, JSONPath) and self.selectors == __value.selectors + return isinstance(__value, JSONPath) and self.segments == __value.segments def __hash__(self) -> int: - return hash(self.selectors) + return hash(self.segments) def findall( self, @@ -128,7 +126,7 @@ def finditer( matches: Iterable[JSONPathMatch] = [ JSONPathMatch( filter_context=filter_context or {}, - obj=[_data] if self.fake_root else _data, + obj=[_data] if self.pseudo_root else _data, parent=None, path=self.env.root_token, parts=(), @@ -136,8 +134,8 @@ def finditer( ) ] - for selector in self.selectors: - matches = selector.resolve(matches) + for segment in self.segments: + matches = segment.resolve(matches) return matches @@ -167,7 +165,7 @@ async def finditer_async( async def root_iter() -> AsyncIterable[JSONPathMatch]: yield self.env.match_class( filter_context=filter_context or {}, - obj=[_data] if self.fake_root else _data, + obj=[_data] if self.pseudo_root else _data, parent=None, path=self.env.root_token, parts=(), @@ -176,8 +174,8 @@ async def root_iter() -> AsyncIterable[JSONPathMatch]: matches: AsyncIterable[JSONPathMatch] = root_iter() - for selector in self.selectors: - matches = selector.resolve_async(matches) + for segment in self.segments: + matches = segment.resolve_async(matches) return matches @@ -237,20 +235,21 @@ def query( def empty(self) -> bool: """Return `True` if this path has no selectors.""" - return not bool(self.selectors) + return not bool(self.segments) def singular_query(self) -> bool: """Return `True` if this JSONPath query is a singular query.""" - for selector in self.selectors: - if isinstance(selector, (PropertySelector, IndexSelector)): - continue - if ( - isinstance(selector, ListSelector) - and len(selector.items) == 1 - and isinstance(selector.items[0], (PropertySelector, IndexSelector)) + for segment in self.segments: + if isinstance(segment, JSONPathRecursiveDescentSegment): + return False + + if len(segment.selectors) == 1 and isinstance( + segment.selectors[0], (PropertySelector, IndexSelector) ): continue + return False + return True diff --git a/jsonpath/segments.py b/jsonpath/segments.py new file mode 100644 index 0000000..8aeb892 --- /dev/null +++ b/jsonpath/segments.py @@ -0,0 +1,128 @@ +"""JSONPath child and descendant segment definitions.""" + +from __future__ import annotations + +from abc import ABC +from abc import abstractmethod +from typing import TYPE_CHECKING +from typing import AsyncIterable +from typing import Iterable +from typing import Mapping +from typing import Sequence +from typing import Tuple + +if TYPE_CHECKING: + from .env import JSONPathEnvironment + from .match import JSONPathMatch + from .selectors import JSONPathSelector + from .token import Token + + +class JSONPathSegment(ABC): + """Base class for all JSONPath segments.""" + + __slots__ = ("env", "token", "selectors") + + def __init__( + self, + *, + env: JSONPathEnvironment, + token: Token, + selectors: Tuple[JSONPathSelector, ...], + ) -> None: + self.env = env + self.token = token + self.selectors = selectors + + @abstractmethod + def resolve(self, nodes: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: + """Apply this segment to each `JSONPathMatch` in _nodes_.""" + + @abstractmethod + def resolve_async( + self, nodes: AsyncIterable[JSONPathMatch] + ) -> AsyncIterable[JSONPathMatch]: + """An async version of `resolve`.""" + + +class JSONPathChildSegment(JSONPathSegment): + """The JSONPath child selection segment.""" + + def resolve(self, nodes: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: + """Select children of each node in _nodes_.""" + for node in nodes: + for selector in self.selectors: + yield from selector.resolve(node) + + async def resolve_async( + self, nodes: AsyncIterable[JSONPathMatch] + ) -> AsyncIterable[JSONPathMatch]: + """An async version of `resolve`.""" + async for node in nodes: + for selector in self.selectors: + async for match in selector.resolve_async(node): + yield match + + def __str__(self) -> str: + return f"[{', '.join(str(itm) for itm in self.selectors)}]" + + def __eq__(self, __value: object) -> bool: + return ( + isinstance(__value, JSONPathChildSegment) + and self.selectors == __value.selectors + and self.token == __value.token + ) + + def __hash__(self) -> int: + return hash((self.selectors, self.token)) + + +class JSONPathRecursiveDescentSegment(JSONPathSegment): + """The JSONPath recursive descent segment.""" + + def resolve(self, nodes: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: + """Select descendants of each node in _nodes_.""" + for node in nodes: + for _node in self._visit(node): + for selector in self.selectors: + yield from selector.resolve(_node) + + async def resolve_async( + self, nodes: AsyncIterable[JSONPathMatch] + ) -> AsyncIterable[JSONPathMatch]: + """An async version of `resolve`.""" + async for node in nodes: + for _node in self._visit(node): + for selector in self.selectors: + async for match in selector.resolve_async(_node): + yield match + + def _visit(self, node: JSONPathMatch, depth: int = 1) -> Iterable[JSONPathMatch]: + """Depth-first, pre-order node traversal.""" + # TODO: check for recursion limit + + yield node + + if isinstance(node.obj, Mapping): + for name, val in node.obj.items(): + if isinstance(val, (Mapping, Sequence)): + _node = node.new_child(val, name) + yield from self._visit(_node, depth + 1) + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + for i, item in enumerate(node.obj): + if isinstance(item, (Mapping, Sequence)): + _node = node.new_child(item, i) + yield from self._visit(_node, depth + 1) + + def __str__(self) -> str: + return f"..[{', '.join(str(itm) for itm in self.selectors)}]" + + def __eq__(self, __value: object) -> bool: + return ( + isinstance(__value, JSONPathRecursiveDescentSegment) + and self.selectors == __value.selectors + and self.token == __value.token + ) + + def __hash__(self) -> int: + return hash(("..", self.selectors, self.token)) diff --git a/jsonpath/selectors.py b/jsonpath/selectors.py index 44007e9..89e2490 100644 --- a/jsonpath/selectors.py +++ b/jsonpath/selectors.py @@ -11,9 +11,7 @@ from typing import Any from typing import AsyncIterable from typing import Iterable -from typing import List from typing import Optional -from typing import TypeVar from typing import Union from .exceptions import JSONPathIndexError @@ -39,13 +37,11 @@ def __init__(self, *, env: JSONPathEnvironment, token: Token) -> None: self.token = token @abstractmethod - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: """Apply the segment/selector to each node in _matches_. Arguments: - matches: Nodes matched by preceding segments/selectors. This is like - a lazy _NodeList_, as described in RFC 9535, but each match carries - more than the node's value and location. + node: A node matched by preceding segments/selectors. Returns: The `JSONPathMatch` instances created by applying this selector to each @@ -53,9 +49,7 @@ def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: """ @abstractmethod - def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: + def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: """An async version of `resolve`.""" @@ -93,46 +87,29 @@ def __eq__(self, __value: object) -> bool: def __hash__(self) -> int: return hash((self.name, self.token)) - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match in matches: - if not isinstance(match.obj, Mapping): - continue - + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): with suppress(KeyError): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=self.env.getitem(match.obj, self.name), - parent=match, - parts=match.parts + (self.name,), - path=match.path + f"[{canonical_string(self.name)}]", - root=match.root, - ) - match.add_child(_match) - yield _match - - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match in matches: - if not isinstance(match.obj, Mapping): - continue + match = node.new_child(self.env.getitem(node.obj, self.name), self.name) + node.add_child(match) + yield match + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): with suppress(KeyError): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=await self.env.getitem_async(match.obj, self.name), - parent=match, - parts=match.parts + (self.name,), - path=match.path + f"[{canonical_string(self.name)}]", - root=match.root, + match = node.new_child( + await self.env.getitem_async(node.obj, self.name), self.name ) - match.add_child(_match) - yield _match + node.add_child(match) + yield match class IndexSelector(JSONPathSelector): """Select an element from an array by index. + XXX: Change to make unquoted keys/properties a "singular path selector" + https://github.com/ietf-wg-jsonpath/draft-ietf-jsonpath-base/issues/522 + Considering we don't require mapping (JSON object) keys/properties to be quoted, and that we support mappings with numeric keys, we also check to see if the "index" is a mapping key, which is non-standard. @@ -172,65 +149,41 @@ def _normalized_index(self, obj: Sequence[object]) -> int: return len(obj) + self.index return self.index - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match in matches: - if isinstance(match.obj, Mapping): - # Try the string representation of the index as a key. - with suppress(KeyError): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=self.env.getitem(match.obj, self._as_key), - parent=match, - parts=match.parts + (self._as_key,), - path=f"{match.path}['{self.index}']", - root=match.root, - ) - match.add_child(_match) - yield _match - elif isinstance(match.obj, Sequence) and not isinstance(match.obj, str): - norm_index = self._normalized_index(match.obj) - with suppress(IndexError): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=self.env.getitem(match.obj, self.index), - parent=match, - parts=match.parts + (norm_index,), - path=match.path + f"[{norm_index}]", - root=match.root, - ) - match.add_child(_match) - yield _match - - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match in matches: - if isinstance(match.obj, Mapping): - # Try the string representation of the index as a key. - with suppress(KeyError): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=await self.env.getitem_async(match.obj, self._as_key), - parent=match, - parts=match.parts + (self._as_key,), - path=f"{match.path}['{self.index}']", - root=match.root, - ) - match.add_child(_match) - yield _match - elif isinstance(match.obj, Sequence) and not isinstance(match.obj, str): - norm_index = self._normalized_index(match.obj) - with suppress(IndexError): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=await self.env.getitem_async(match.obj, self.index), - parent=match, - parts=match.parts + (norm_index,), - path=match.path + f"[{norm_index}]", - root=match.root, - ) - match.add_child(_match) - yield _match + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): + # Try the string representation of the index as a key. + with suppress(KeyError): + match = node.new_child( + self.env.getitem(node.obj, self._as_key), self.index + ) + node.add_child(match) + yield match + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + norm_index = self._normalized_index(node.obj) + with suppress(IndexError): + match = node.new_child( + self.env.getitem(node.obj, self.index), norm_index + ) + node.add_child(match) + yield match + + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): + # Try the string representation of the index as a key. + with suppress(KeyError): + match = node.new_child( + await self.env.getitem_async(node.obj, self._as_key), self.index + ) + node.add_child(match) + yield match + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + norm_index = self._normalized_index(node.obj) + with suppress(IndexError): + match = node.new_child( + await self.env.getitem_async(node.obj, self.index), norm_index + ) + node.add_child(match) + yield match class KeysSelector(JSONPathSelector): @@ -260,30 +213,26 @@ def __eq__(self, __value: object) -> bool: def __hash__(self) -> int: return hash(self.token) - def _keys(self, match: JSONPathMatch) -> Iterable[JSONPathMatch]: - if isinstance(match.obj, Mapping): - for i, key in enumerate(match.obj.keys()): - _match = self.env.match_class( - filter_context=match.filter_context(), + def _keys(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): + for i, key in enumerate(node.obj.keys()): + match = node.__class__( + filter_context=node.filter_context(), obj=key, - parent=match, - parts=match.parts + (f"{self.env.keys_selector_token}{key}",), - path=f"{match.path}[{self.env.keys_selector_token}][{i}]", - root=match.root, + parent=node, + parts=node.parts + (f"{self.env.keys_selector_token}{key}",), + path=f"{node.path}[{self.env.keys_selector_token}][{i}]", + root=node.root, ) - match.add_child(_match) - yield _match + node.add_child(match) + yield match - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match in matches: - yield from self._keys(match) + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + yield from self._keys(node) - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match in matches: - for _match in self._keys(match): - yield _match + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + for match in self._keys(node): + yield match class SliceSelector(JSONPathSelector): @@ -327,47 +276,29 @@ def _check_range(self, *indices: Optional[int]) -> None: ): raise JSONPathIndexError("index out of range", token=self.token) - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match in matches: - if not isinstance(match.obj, Sequence) or self.slice.step == 0: - continue + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if not isinstance(node.obj, Sequence) or self.slice.step == 0: + return - for norm_index, obj in zip( # noqa: B905 - range(*self.slice.indices(len(match.obj))), - self.env.getitem(match.obj, self.slice), - ): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=obj, - parent=match, - parts=match.parts + (norm_index,), - path=f"{match.path}[{norm_index}]", - root=match.root, - ) - match.add_child(_match) - yield _match - - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match in matches: - if not isinstance(match.obj, Sequence) or self.slice.step == 0: - continue - - for norm_index, obj in zip( # noqa: B905 - range(*self.slice.indices(len(match.obj))), - await self.env.getitem_async(match.obj, self.slice), - ): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=obj, - parent=match, - parts=match.parts + (norm_index,), - path=f"{match.path}[{norm_index}]", - root=match.root, - ) - match.add_child(_match) - yield _match + for norm_index, obj in zip( # noqa: B905 + range(*self.slice.indices(len(node.obj))), + self.env.getitem(node.obj, self.slice), + ): + match = node.new_child(obj, norm_index) + node.add_child(match) + yield match + + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + if not isinstance(node.obj, Sequence) or self.slice.step == 0: + return + + for norm_index, obj in zip( # noqa: B905 + range(*self.slice.indices(len(node.obj))), + await self.env.getitem_async(node.obj, self.slice), + ): + match = node.new_child(obj, norm_index) + node.add_child(match) + yield match class WildSelector(JSONPathSelector): @@ -390,191 +321,31 @@ def __eq__(self, __value: object) -> bool: def __hash__(self) -> int: return hash(self.token) - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match in matches: - if isinstance(match.obj, str): - continue - if isinstance(match.obj, Mapping): - for key, val in match.obj.items(): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (key,), - path=match.path + f"[{canonical_string(key)}]", - root=match.root, - ) - match.add_child(_match) - yield _match - elif isinstance(match.obj, Sequence): - for i, val in enumerate(match.obj): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (i,), - path=f"{match.path}[{i}]", - root=match.root, - ) - match.add_child(_match) - yield _match - - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match in matches: - if isinstance(match.obj, Mapping): - for key, val in match.obj.items(): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (key,), - path=match.path + f"[{canonical_string(key)}]", - root=match.root, - ) - match.add_child(_match) - yield _match - elif isinstance(match.obj, Sequence): - for i, val in enumerate(match.obj): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (i,), - path=f"{match.path}[{i}]", - root=match.root, - ) - match.add_child(_match) - yield _match - - -class RecursiveDescentSelector(JSONPathSelector): - """A JSONPath selector that visits all nodes recursively. - - NOTE: Strictly this is a "segment", not a "selector". - """ - - def __str__(self) -> str: - return ".." - - def __eq__(self, __value: object) -> bool: - return ( - isinstance(__value, RecursiveDescentSelector) - and self.token == __value.token - ) - - def __hash__(self) -> int: - return hash(self.token) - - def _expand(self, match: JSONPathMatch) -> Iterable[JSONPathMatch]: - if isinstance(match.obj, Mapping): - for key, val in match.obj.items(): - if isinstance(val, str): - pass - elif isinstance(val, (Mapping, Sequence)): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (key,), - path=match.path + f"[{canonical_string(key)}]", - root=match.root, - ) - match.add_child(_match) - yield _match - yield from self._expand(_match) - elif isinstance(match.obj, Sequence) and not isinstance(match.obj, str): - for i, val in enumerate(match.obj): - if isinstance(val, str): - pass - elif isinstance(val, (Mapping, Sequence)): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (i,), - path=f"{match.path}[{i}]", - root=match.root, - ) - match.add_child(_match) - yield _match - yield from self._expand(_match) - - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match in matches: - yield match - yield from self._expand(match) - - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match in matches: - yield match - for _match in self._expand(match): - yield _match - - -T = TypeVar("T") - - -async def _alist(it: List[T]) -> AsyncIterable[T]: - for item in it: - yield item - - -class ListSelector(JSONPathSelector): - """A bracketed list of selectors, the results of which are concatenated together. - - NOTE: Strictly this is a "segment", not a "selector". - """ - - __slots__ = ("items",) - - def __init__( - self, - *, - env: JSONPathEnvironment, - token: Token, - items: List[ - Union[ - SliceSelector, - KeysSelector, - IndexSelector, - PropertySelector, - WildSelector, - Filter, - ] - ], - ) -> None: - super().__init__(env=env, token=token) - self.items = tuple(items) - - def __str__(self) -> str: - return f"[{', '.join(str(itm) for itm in self.items)}]" - - def __eq__(self, __value: object) -> bool: - return ( - isinstance(__value, ListSelector) - and self.items == __value.items - and self.token == __value.token - ) - - def __hash__(self) -> int: - return hash((self.items, self.token)) - - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match_ in matches: - for item in self.items: - yield from item.resolve([match_]) - - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match_ in matches: - for item in self.items: - async for m in item.resolve_async(_alist([match_])): - yield m + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): + for key, val in node.obj.items(): + match = node.new_child(val, key) + node.add_child(match) + yield match + + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + for i, val in enumerate(node.obj): + match = node.new_child(val, i) + node.add_child(match) + yield match + + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): + for key, val in node.obj.items(): + match = node.new_child(val, key) + node.add_child(match) + yield match + + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + for i, val in enumerate(node.obj): + match = node.new_child(val, i) + node.add_child(match) + yield match class Filter(JSONPathSelector): @@ -607,132 +378,98 @@ def __eq__(self, __value: object) -> bool: def __hash__(self) -> int: return hash((str(self.expression), self.token)) - def resolve( # noqa: PLR0912 - self, matches: Iterable[JSONPathMatch] - ) -> Iterable[JSONPathMatch]: + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: if self.cacheable_nodes and self.env.filter_caching: expr = self.expression.cache_tree() else: expr = self.expression - for match in matches: - if isinstance(match.obj, Mapping): - for key, val in match.obj.items(): - context = FilterContext( - env=self.env, - current=val, - root=match.root, - extra_context=match.filter_context(), - current_key=key, - ) - try: - if expr.evaluate(context): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (key,), - path=match.path + f"[{canonical_string(key)}]", - root=match.root, - ) - match.add_child(_match) - yield _match - except JSONPathTypeError as err: - if not err.token: - err.token = self.token - raise - - elif isinstance(match.obj, Sequence) and not isinstance(match.obj, str): - for i, obj in enumerate(match.obj): - context = FilterContext( - env=self.env, - current=obj, - root=match.root, - extra_context=match.filter_context(), - current_key=i, - ) - try: - if expr.evaluate(context): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=obj, - parent=match, - parts=match.parts + (i,), - path=f"{match.path}[{i}]", - root=match.root, - ) - match.add_child(_match) - yield _match - except JSONPathTypeError as err: - if not err.token: - err.token = self.token - raise - - async def resolve_async( # noqa: PLR0912 - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): + for key, val in node.obj.items(): + context = FilterContext( + env=self.env, + current=val, + root=node.root, + extra_context=node.filter_context(), + current_key=key, + ) + try: + if expr.evaluate(context): + match = node.new_child(val, key) + node.add_child(match) + yield match + except JSONPathTypeError as err: + if not err.token: + err.token = self.token + raise + + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + for i, obj in enumerate(node.obj): + context = FilterContext( + env=self.env, + current=obj, + root=node.root, + extra_context=node.filter_context(), + current_key=i, + ) + try: + if expr.evaluate(context): + match = node.new_child(obj, i) + node.add_child(match) + yield match + except JSONPathTypeError as err: + if not err.token: + err.token = self.token + raise + + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: if self.cacheable_nodes and self.env.filter_caching: expr = self.expression.cache_tree() else: expr = self.expression - async for match in matches: - if isinstance(match.obj, Mapping): - for key, val in match.obj.items(): - context = FilterContext( - env=self.env, - current=val, - root=match.root, - extra_context=match.filter_context(), - current_key=key, - ) - - try: - result = await expr.evaluate_async(context) - except JSONPathTypeError as err: - if not err.token: - err.token = self.token - raise - - if result: - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (key,), - path=match.path + f"[{canonical_string(key)}]", - root=match.root, - ) - match.add_child(_match) - yield _match - - elif isinstance(match.obj, Sequence) and not isinstance(match.obj, str): - for i, obj in enumerate(match.obj): - context = FilterContext( - env=self.env, - current=obj, - root=match.root, - extra_context=match.filter_context(), - current_key=i, - ) - - try: - result = await expr.evaluate_async(context) - except JSONPathTypeError as err: - if not err.token: - err.token = self.token - raise - if result: - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=obj, - parent=match, - parts=match.parts + (i,), - path=f"{match.path}[{i}]", - root=match.root, - ) - match.add_child(_match) - yield _match + if isinstance(node.obj, Mapping): + for key, val in node.obj.items(): + context = FilterContext( + env=self.env, + current=val, + root=node.root, + extra_context=node.filter_context(), + current_key=key, + ) + + try: + result = await expr.evaluate_async(context) + except JSONPathTypeError as err: + if not err.token: + err.token = self.token + raise + + if result: + match = node.new_child(val, key) + node.add_child(match) + yield match + + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + for i, obj in enumerate(node.obj): + context = FilterContext( + env=self.env, + current=obj, + root=node.root, + extra_context=node.filter_context(), + current_key=i, + ) + + try: + result = await expr.evaluate_async(context) + except JSONPathTypeError as err: + if not err.token: + err.token = self.token + raise + if result: + match = node.new_child(obj, i) + node.add_child(match) + yield match class FilterContext: diff --git a/jsonpath/stream.py b/jsonpath/stream.py index 4a38afb..0a6e052 100644 --- a/jsonpath/stream.py +++ b/jsonpath/stream.py @@ -4,6 +4,7 @@ from collections import deque from typing import Deque from typing import Iterator +from typing import Optional from .exceptions import JSONPathSyntaxError from .token import TOKEN_EOF @@ -97,3 +98,17 @@ def expect_peek(self, *typ: str) -> None: f"expected {_typ}, found {self.peek.kind!r}", token=self.peek, ) + + def expect_peek_not(self, typ: str, message: str) -> None: + """Raise an exception if the next token kind of _typ_.""" + if self.peek.kind == typ: + raise JSONPathSyntaxError(message, token=self.peek) + + def eat(self, *typ: str) -> Token: + self.expect(*typ) + return self.next_token() + + def skip(self, *typ: str) -> Optional[Token]: + if self.current.kind in typ: + return self.next_token() + return None diff --git a/jsonpath/token.py b/jsonpath/token.py index a2392e3..c9f6592 100644 --- a/jsonpath/token.py +++ b/jsonpath/token.py @@ -1,74 +1,69 @@ """JSONPath tokens.""" + import sys from typing import Tuple # Utility tokens -TOKEN_EOF = sys.intern("EOF") -TOKEN_ILLEGAL = sys.intern("ILLEGAL") -TOKEN_SKIP = sys.intern("SKIP") +TOKEN_EOF = sys.intern("TOKEN_EOF") +TOKEN_WHITESPACE = sys.intern("TOKEN_WHITESPACE") +TOKEN_ERROR = sys.intern("TOKEN_ERROR") # JSONPath expression tokens -TOKEN_COLON = sys.intern("COLON") -TOKEN_COMMA = sys.intern("COMMA") -TOKEN_DDOT = sys.intern("DDOT") -TOKEN_DOT = sys.intern("DOT") -TOKEN_DOT_INDEX = sys.intern("DINDEX") -TOKEN_DOT_PROPERTY = sys.intern("DOT_PROPERTY") -TOKEN_FILTER = sys.intern("FILTER") -TOKEN_FAKE_ROOT = sys.intern("FAKE_ROOT") -TOKEN_KEY = sys.intern("KEY") -TOKEN_KEYS = sys.intern("KEYS") -TOKEN_RBRACKET = sys.intern("RBRACKET") -TOKEN_BARE_PROPERTY = sys.intern("BARE_PROPERTY") -TOKEN_LIST_SLICE = sys.intern("LSLICE") -TOKEN_LIST_START = sys.intern("LBRACKET") -TOKEN_PROPERTY = sys.intern("PROP") -TOKEN_ROOT = sys.intern("ROOT") -TOKEN_SLICE_START = sys.intern("SLICE_START") -TOKEN_SLICE_STEP = sys.intern("SLICE_STEP") -TOKEN_SLICE_STOP = sys.intern("SLICE_STOP") -TOKEN_WILD = sys.intern("WILD") +TOKEN_COLON = sys.intern("TOKEN_COLON") +TOKEN_COMMA = sys.intern("TOKEN_COMMA") +TOKEN_DDOT = sys.intern("TOKEN_DDOT") +TOKEN_DOT = sys.intern("TOKEN_DOT") +TOKEN_FILTER = sys.intern("TOKEN_FILTER") +TOKEN_KEY = sys.intern("TOKEN_KEY") +TOKEN_KEYS = sys.intern("TOKEN_KEYS") +TOKEN_KEYS_FILTER = sys.intern("TOKEN_KEYS_FILTER") +TOKEN_LBRACKET = sys.intern("TOKEN_LBRACKET") +TOKEN_PSEUDO_ROOT = sys.intern("TOKEN_PSEUDO_ROOT") +TOKEN_RBRACKET = sys.intern("TOKEN_RBRACKET") +TOKEN_ROOT = sys.intern("TOKEN_ROOT") +TOKEN_WILD = sys.intern("TOKEN_WILD") +TOKEN_NAME = sys.intern("TOKEN_NAME") # An object property/key or a function name # Filter expression tokens -TOKEN_AND = sys.intern("AND") -TOKEN_BLANK = sys.intern("BLANK") -TOKEN_CONTAINS = sys.intern("CONTAINS") -TOKEN_FILTER_CONTEXT = sys.intern("FILTER_CONTEXT") -TOKEN_FUNCTION = sys.intern("FUNCTION") -TOKEN_EMPTY = sys.intern("EMPTY") -TOKEN_EQ = sys.intern("EQ") -TOKEN_FALSE = sys.intern("FALSE") -TOKEN_FLOAT = sys.intern("FLOAT") -TOKEN_GE = sys.intern("GE") -TOKEN_GT = sys.intern("GT") -TOKEN_IN = sys.intern("IN") -TOKEN_INT = sys.intern("INT") -TOKEN_LE = sys.intern("LE") -TOKEN_LG = sys.intern("LG") -TOKEN_LPAREN = sys.intern("LPAREN") -TOKEN_LT = sys.intern("LT") -TOKEN_NE = sys.intern("NE") -TOKEN_NIL = sys.intern("NIL") -TOKEN_NONE = sys.intern("NONE") -TOKEN_NOT = sys.intern("NOT") -TOKEN_NULL = sys.intern("NULL") -TOKEN_OP = sys.intern("OP") -TOKEN_OR = sys.intern("OR") -TOKEN_RE = sys.intern("RE") -TOKEN_RE_FLAGS = sys.intern("RE_FLAGS") -TOKEN_RE_PATTERN = sys.intern("RE_PATTERN") -TOKEN_RPAREN = sys.intern("RPAREN") -TOKEN_SELF = sys.intern("SELF") -TOKEN_STRING = sys.intern("STRING") -TOKEN_DOUBLE_QUOTE_STRING = sys.intern("DOUBLE_QUOTE_STRING") -TOKEN_SINGLE_QUOTE_STRING = sys.intern("SINGLE_QUOTE_STRING") -TOKEN_TRUE = sys.intern("TRUE") -TOKEN_UNDEFINED = sys.intern("UNDEFINED") -TOKEN_MISSING = sys.intern("MISSING") +TOKEN_AND = sys.intern("TOKEN_AND") +TOKEN_BLANK = sys.intern("TOKEN_BLANK") +TOKEN_CONTAINS = sys.intern("TOKEN_CONTAINS") +TOKEN_DOUBLE_QUOTE_STRING = sys.intern("TOKEN_DOUBLE_QUOTE_STRING") +TOKEN_EMPTY = sys.intern("TOKEN_EMPTY") +TOKEN_EQ = sys.intern("TOKEN_EQ") +TOKEN_FALSE = sys.intern("TOKEN_FALSE") +TOKEN_FILTER_CONTEXT = sys.intern("TOKEN_FILTER_CONTEXT") +TOKEN_FLOAT = sys.intern("TOKEN_FLOAT") +TOKEN_FUNCTION = sys.intern("TOKEN_FUNCTION") +TOKEN_GE = sys.intern("TOKEN_GE") +TOKEN_GT = sys.intern("TOKEN_GT") +TOKEN_IN = sys.intern("TOKEN_IN") +TOKEN_INT = sys.intern("TOKEN_INT") +TOKEN_LE = sys.intern("TOKEN_LE") +TOKEN_LG = sys.intern("TOKEN_LG") +TOKEN_LPAREN = sys.intern("TOKEN_LPAREN") +TOKEN_LT = sys.intern("TOKEN_LT") +TOKEN_MISSING = sys.intern("TOKEN_MISSING") +TOKEN_NE = sys.intern("TOKEN_NE") +TOKEN_NIL = sys.intern("TOKEN_NIL") +TOKEN_NONE = sys.intern("TOKEN_NONE") +TOKEN_NOT = sys.intern("TOKEN_NOT") +TOKEN_NULL = sys.intern("TOKEN_NULL") +TOKEN_OP = sys.intern("TOKEN_OP") +TOKEN_OR = sys.intern("TOKEN_OR") +TOKEN_RE = sys.intern("TOKEN_RE") +TOKEN_RE_FLAGS = sys.intern("TOKEN_RE_FLAGS") +TOKEN_RE_PATTERN = sys.intern("TOKEN_RE_PATTERN") +TOKEN_RPAREN = sys.intern("TOKEN_RPAREN") +TOKEN_SELF = sys.intern("TOKEN_SELF") +TOKEN_SINGLE_QUOTE_STRING = sys.intern("TOKEN_SINGLE_QUOTE_STRING") +TOKEN_STRING = sys.intern("TOKEN_STRING") +TOKEN_TRUE = sys.intern("TOKEN_TRUE") +TOKEN_UNDEFINED = sys.intern("TOKEN_UNDEFINED") # Extension tokens -TOKEN_UNION = sys.intern("UNION") -TOKEN_INTERSECTION = sys.intern("INTERSECT") +TOKEN_INTERSECTION = sys.intern("TOKEN_INTERSECTION") +TOKEN_UNION = sys.intern("TOKEN_UNION") class Token: @@ -99,7 +94,7 @@ def __init__( def __repr__(self) -> str: # pragma: no cover return ( - f"Token(kind={self.kind!r}, value={self.value!r}, " + f"Token(kind={self.kind}, value={self.value!r}, " f"index={self.index}, path={self.path!r})" ) diff --git a/pyproject.toml b/pyproject.toml index a801434..23268f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,7 +87,8 @@ omit = ["jsonpath/__about__.py", "tests/compliance.py", "tests/consensus.py"] exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] [tool.mypy] -files = "jsonpath" +files = ["jsonpath", "tests"] +exclude = ["tests/nts"] python_version = "3.11" disallow_subclassing_any = true disallow_untyped_calls = true diff --git a/tests/test_filter_expression_caching.py b/tests/test_filter_expression_caching.py index 9d1b3cf..31534c2 100644 --- a/tests/test_filter_expression_caching.py +++ b/tests/test_filter_expression_caching.py @@ -1,4 +1,5 @@ """Filter expression caching test cases.""" + from unittest import mock from jsonpath import JSONPath @@ -9,10 +10,10 @@ from jsonpath.filter import FilterExpression from jsonpath.filter import InfixExpression from jsonpath.filter import IntegerLiteral -from jsonpath.filter import RootPath -from jsonpath.filter import SelfPath +from jsonpath.filter import RelativeFilterQuery +from jsonpath.filter import RootFilterQuery +from jsonpath.segments import JSONPathChildSegment from jsonpath.selectors import Filter as FilterSelector -from jsonpath.selectors import ListSelector def test_cache_root_path() -> None: @@ -20,9 +21,9 @@ def test_cache_root_path() -> None: env = JSONPathEnvironment() path = env.compile("$.some[?@.a < $.thing].a") assert isinstance(path, JSONPath) - selection_list = path.selectors[1] - assert isinstance(selection_list, ListSelector) - filter_selector = selection_list.items[0] + segment = path.segments[1] + assert isinstance(segment, JSONPathChildSegment) + filter_selector = segment.selectors[0] assert isinstance(filter_selector, FilterSelector) assert filter_selector.cacheable_nodes is True @@ -31,17 +32,17 @@ def test_cache_root_path() -> None: assert isinstance(expr, BooleanExpression) expr = expr.expression assert isinstance(expr, InfixExpression) - assert isinstance(expr.left, SelfPath) - assert isinstance(expr.right, RootPath) + assert isinstance(expr.left, RelativeFilterQuery) + assert isinstance(expr.right, RootFilterQuery) # A caching copy of the original expression tree. expr = filter_selector.expression.cache_tree() assert isinstance(expr, BooleanExpression) expr = expr.expression assert isinstance(expr, InfixExpression) - assert isinstance(expr.left, SelfPath) + assert isinstance(expr.left, RelativeFilterQuery) assert isinstance(expr.right, CachingFilterExpression) - assert isinstance(expr.right._expr, RootPath) # noqa: SLF001 + assert isinstance(expr.right._expr, RootFilterQuery) # noqa: SLF001 def test_root_path_cache() -> None: @@ -75,9 +76,9 @@ def test_cache_context_path() -> None: env = JSONPathEnvironment() path = env.compile("$.some[?_.thing > @.a].a") assert isinstance(path, JSONPath) - selection_list = path.selectors[1] - assert isinstance(selection_list, ListSelector) - filter_selector = selection_list.items[0] + segment = path.segments[1] + assert isinstance(segment, JSONPathChildSegment) + filter_selector = segment.selectors[0] assert isinstance(filter_selector, FilterSelector) assert filter_selector.cacheable_nodes is True @@ -87,7 +88,7 @@ def test_cache_context_path() -> None: expr = expr.expression assert isinstance(expr, InfixExpression) assert isinstance(expr.left, FilterContextPath) - assert isinstance(expr.right, SelfPath) + assert isinstance(expr.right, RelativeFilterQuery) # A caching copy of the original expression tree. expr = filter_selector.expression.cache_tree() @@ -96,7 +97,7 @@ def test_cache_context_path() -> None: assert isinstance(expr, InfixExpression) assert isinstance(expr.left, CachingFilterExpression) assert isinstance(expr.left._expr, FilterContextPath) # noqa: SLF001 - assert isinstance(expr.right, SelfPath) + assert isinstance(expr.right, RelativeFilterQuery) def test_context_path_cache() -> None: @@ -146,9 +147,9 @@ def test_uncacheable_filter() -> None: env = JSONPathEnvironment(filter_caching=True) path = env.compile("$.some[?@.a > 2 and @.b < 4].a") assert isinstance(path, JSONPath) - selection_list = path.selectors[1] - assert isinstance(selection_list, ListSelector) - filter_selector = selection_list.items[0] + segment = path.segments[1] + assert isinstance(segment, JSONPathChildSegment) + filter_selector = segment.selectors[0] assert isinstance(filter_selector, FilterSelector) assert filter_selector.cacheable_nodes is False @@ -159,7 +160,7 @@ def test_uncacheable_filter() -> None: assert isinstance(expr, InfixExpression) assert isinstance(expr.left, InfixExpression) assert isinstance(expr.right, InfixExpression) - assert isinstance(expr.left.left, SelfPath) + assert isinstance(expr.left.left, RelativeFilterQuery) assert isinstance(expr.left.right, IntegerLiteral) - assert isinstance(expr.right.left, SelfPath) + assert isinstance(expr.right.left, RelativeFilterQuery) assert isinstance(expr.right.right, IntegerLiteral) diff --git a/tests/test_find_reference.py b/tests/test_find_reference.py index cbc7bf0..83a050d 100644 --- a/tests/test_find_reference.py +++ b/tests/test_find_reference.py @@ -2,6 +2,7 @@ See https://goessner.net/articles/JsonPath/ """ + import asyncio import dataclasses import operator @@ -220,132 +221,132 @@ class Case: }, ], ), - Case( - description="root descent", - path="$..", - data=REFERENCE_DATA, - want=[ - { - "store": { - "book": [ - { - "category": "reference", - "author": "Nigel Rees", - "title": "Sayings of the Century", - "price": 8.95, - }, - { - "category": "fiction", - "author": "Evelyn Waugh", - "title": "Sword of Honour", - "price": 12.99, - }, - { - "category": "fiction", - "author": "Herman Melville", - "title": "Moby Dick", - "isbn": "0-553-21311-3", - "price": 8.99, - }, - { - "category": "fiction", - "author": "J. R. R. Tolkien", - "title": "The Lord of the Rings", - "isbn": "0-395-19395-8", - "price": 22.99, - }, - ], - "bicycle": {"color": "red", "price": 19.95}, - } - }, - { - "book": [ - { - "category": "reference", - "author": "Nigel Rees", - "title": "Sayings of the Century", - "price": 8.95, - }, - { - "category": "fiction", - "author": "Evelyn Waugh", - "title": "Sword of Honour", - "price": 12.99, - }, - { - "category": "fiction", - "author": "Herman Melville", - "title": "Moby Dick", - "isbn": "0-553-21311-3", - "price": 8.99, - }, - { - "category": "fiction", - "author": "J. R. R. Tolkien", - "title": "The Lord of the Rings", - "isbn": "0-395-19395-8", - "price": 22.99, - }, - ], - "bicycle": {"color": "red", "price": 19.95}, - }, - [ - { - "category": "reference", - "author": "Nigel Rees", - "title": "Sayings of the Century", - "price": 8.95, - }, - { - "category": "fiction", - "author": "Evelyn Waugh", - "title": "Sword of Honour", - "price": 12.99, - }, - { - "category": "fiction", - "author": "Herman Melville", - "title": "Moby Dick", - "isbn": "0-553-21311-3", - "price": 8.99, - }, - { - "category": "fiction", - "author": "J. R. R. Tolkien", - "title": "The Lord of the Rings", - "isbn": "0-395-19395-8", - "price": 22.99, - }, - ], - { - "category": "reference", - "author": "Nigel Rees", - "title": "Sayings of the Century", - "price": 8.95, - }, - { - "category": "fiction", - "author": "Evelyn Waugh", - "title": "Sword of Honour", - "price": 12.99, - }, - { - "category": "fiction", - "author": "Herman Melville", - "title": "Moby Dick", - "isbn": "0-553-21311-3", - "price": 8.99, - }, - { - "category": "fiction", - "author": "J. R. R. Tolkien", - "title": "The Lord of the Rings", - "isbn": "0-395-19395-8", - "price": 22.99, - }, - {"color": "red", "price": 19.95}, - ], - ), + # Case( + # description="root descent", + # path="$..", + # data=REFERENCE_DATA, + # want=[ + # { + # "store": { + # "book": [ + # { + # "category": "reference", + # "author": "Nigel Rees", + # "title": "Sayings of the Century", + # "price": 8.95, + # }, + # { + # "category": "fiction", + # "author": "Evelyn Waugh", + # "title": "Sword of Honour", + # "price": 12.99, + # }, + # { + # "category": "fiction", + # "author": "Herman Melville", + # "title": "Moby Dick", + # "isbn": "0-553-21311-3", + # "price": 8.99, + # }, + # { + # "category": "fiction", + # "author": "J. R. R. Tolkien", + # "title": "The Lord of the Rings", + # "isbn": "0-395-19395-8", + # "price": 22.99, + # }, + # ], + # "bicycle": {"color": "red", "price": 19.95}, + # } + # }, + # { + # "book": [ + # { + # "category": "reference", + # "author": "Nigel Rees", + # "title": "Sayings of the Century", + # "price": 8.95, + # }, + # { + # "category": "fiction", + # "author": "Evelyn Waugh", + # "title": "Sword of Honour", + # "price": 12.99, + # }, + # { + # "category": "fiction", + # "author": "Herman Melville", + # "title": "Moby Dick", + # "isbn": "0-553-21311-3", + # "price": 8.99, + # }, + # { + # "category": "fiction", + # "author": "J. R. R. Tolkien", + # "title": "The Lord of the Rings", + # "isbn": "0-395-19395-8", + # "price": 22.99, + # }, + # ], + # "bicycle": {"color": "red", "price": 19.95}, + # }, + # [ + # { + # "category": "reference", + # "author": "Nigel Rees", + # "title": "Sayings of the Century", + # "price": 8.95, + # }, + # { + # "category": "fiction", + # "author": "Evelyn Waugh", + # "title": "Sword of Honour", + # "price": 12.99, + # }, + # { + # "category": "fiction", + # "author": "Herman Melville", + # "title": "Moby Dick", + # "isbn": "0-553-21311-3", + # "price": 8.99, + # }, + # { + # "category": "fiction", + # "author": "J. R. R. Tolkien", + # "title": "The Lord of the Rings", + # "isbn": "0-395-19395-8", + # "price": 22.99, + # }, + # ], + # { + # "category": "reference", + # "author": "Nigel Rees", + # "title": "Sayings of the Century", + # "price": 8.95, + # }, + # { + # "category": "fiction", + # "author": "Evelyn Waugh", + # "title": "Sword of Honour", + # "price": 12.99, + # }, + # { + # "category": "fiction", + # "author": "Herman Melville", + # "title": "Moby Dick", + # "isbn": "0-553-21311-3", + # "price": 8.99, + # }, + # { + # "category": "fiction", + # "author": "J. R. R. Tolkien", + # "title": "The Lord of the Rings", + # "isbn": "0-395-19395-8", + # "price": 22.99, + # }, + # {"color": "red", "price": 19.95}, + # ], + # ), Case( description="(reference) all elements", path="$..*", diff --git a/tests/test_lex.py b/tests/test_lex.py index 14727ac..b3335c8 100644 --- a/tests/test_lex.py +++ b/tests/test_lex.py @@ -7,28 +7,28 @@ from jsonpath import JSONPathEnvironment from jsonpath.exceptions import JSONPathSyntaxError from jsonpath.token import TOKEN_AND -from jsonpath.token import TOKEN_BARE_PROPERTY +from jsonpath.token import TOKEN_COLON from jsonpath.token import TOKEN_COMMA from jsonpath.token import TOKEN_DDOT +from jsonpath.token import TOKEN_DOT from jsonpath.token import TOKEN_DOUBLE_QUOTE_STRING from jsonpath.token import TOKEN_EQ -from jsonpath.token import TOKEN_FAKE_ROOT from jsonpath.token import TOKEN_FALSE from jsonpath.token import TOKEN_FILTER from jsonpath.token import TOKEN_FLOAT -from jsonpath.token import TOKEN_FUNCTION from jsonpath.token import TOKEN_GT from jsonpath.token import TOKEN_IN from jsonpath.token import TOKEN_INT from jsonpath.token import TOKEN_INTERSECTION from jsonpath.token import TOKEN_KEYS -from jsonpath.token import TOKEN_LIST_START +from jsonpath.token import TOKEN_LBRACKET from jsonpath.token import TOKEN_LPAREN from jsonpath.token import TOKEN_LT +from jsonpath.token import TOKEN_NAME from jsonpath.token import TOKEN_NIL from jsonpath.token import TOKEN_NOT from jsonpath.token import TOKEN_OR -from jsonpath.token import TOKEN_PROPERTY +from jsonpath.token import TOKEN_PSEUDO_ROOT from jsonpath.token import TOKEN_RBRACKET from jsonpath.token import TOKEN_RE from jsonpath.token import TOKEN_RE_FLAGS @@ -37,11 +37,9 @@ from jsonpath.token import TOKEN_RPAREN from jsonpath.token import TOKEN_SELF from jsonpath.token import TOKEN_SINGLE_QUOTE_STRING -from jsonpath.token import TOKEN_SLICE_START -from jsonpath.token import TOKEN_SLICE_STEP -from jsonpath.token import TOKEN_SLICE_STOP from jsonpath.token import TOKEN_TRUE from jsonpath.token import TOKEN_UNION +from jsonpath.token import TOKEN_WHITESPACE from jsonpath.token import TOKEN_WILD from jsonpath.token import Token @@ -57,33 +55,33 @@ class Case: Case( description="just root", path="$", - want=[ - Token(kind=TOKEN_ROOT, value="$", index=0, path="$"), - ], + want=[Token(kind=TOKEN_ROOT, value="$", index=0, path="$")], ), Case( - description="just fake root", + description="just pseudo-root", path="^", - want=[ - Token(kind=TOKEN_FAKE_ROOT, value="^", index=0, path="^"), - ], + want=[Token(kind=TOKEN_PSEUDO_ROOT, value="^", index=0, path="^")], ), Case( description="root dot property", path="$.some.thing", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.some.thing"), - Token(kind=TOKEN_PROPERTY, value="some", index=2, path="$.some.thing"), - Token(kind=TOKEN_PROPERTY, value="thing", index=7, path="$.some.thing"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.some.thing"), + Token(kind=TOKEN_NAME, value="some", index=2, path="$.some.thing"), + Token(kind=TOKEN_DOT, value=".", index=6, path="$.some.thing"), + Token(kind=TOKEN_NAME, value="thing", index=7, path="$.some.thing"), ], ), Case( - description="fake root dot property", + description="pseudo root dot property", path="^.some.thing", want=[ - Token(kind=TOKEN_FAKE_ROOT, value="^", index=0, path="^.some.thing"), - Token(kind=TOKEN_PROPERTY, value="some", index=2, path="^.some.thing"), - Token(kind=TOKEN_PROPERTY, value="thing", index=7, path="^.some.thing"), + Token(kind=TOKEN_PSEUDO_ROOT, value="^", index=0, path="^.some.thing"), + Token(kind=TOKEN_DOT, value=".", index=1, path="^.some.thing"), + Token(kind=TOKEN_NAME, value="some", index=2, path="^.some.thing"), + Token(kind=TOKEN_DOT, value=".", index=6, path="^.some.thing"), + Token(kind=TOKEN_NAME, value="thing", index=7, path="^.some.thing"), ], ), Case( @@ -91,15 +89,11 @@ class Case: path="$[some][thing]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$[some][thing]"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$[some][thing]"), - Token( - kind=TOKEN_BARE_PROPERTY, value="some", index=2, path="$[some][thing]" - ), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$[some][thing]"), + Token(kind=TOKEN_NAME, value="some", index=2, path="$[some][thing]"), Token(kind=TOKEN_RBRACKET, value="]", index=6, path="$[some][thing]"), - Token(kind=TOKEN_LIST_START, value="[", index=7, path="$[some][thing]"), - Token( - kind=TOKEN_BARE_PROPERTY, value="thing", index=8, path="$[some][thing]" - ), + Token(kind=TOKEN_LBRACKET, value="[", index=7, path="$[some][thing]"), + Token(kind=TOKEN_NAME, value="thing", index=8, path="$[some][thing]"), Token(kind=TOKEN_RBRACKET, value="]", index=13, path="$[some][thing]"), ], ), @@ -108,7 +102,7 @@ class Case: path='$["some"]', want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path='$["some"]'), - Token(kind=TOKEN_LIST_START, value="[", index=1, path='$["some"]'), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path='$["some"]'), Token( kind=TOKEN_DOUBLE_QUOTE_STRING, value="some", index=3, path='$["some"]' ), @@ -120,7 +114,7 @@ class Case: path="$['some']", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$['some']"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$['some']"), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$['some']"), Token( kind=TOKEN_SINGLE_QUOTE_STRING, value="some", index=3, path="$['some']" ), @@ -132,15 +126,12 @@ class Case: path="$.[some][thing]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.[some][thing]"), - Token(kind=TOKEN_LIST_START, value="[", index=2, path="$.[some][thing]"), - Token( - kind=TOKEN_BARE_PROPERTY, value="some", index=3, path="$.[some][thing]" - ), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.[some][thing]"), + Token(kind=TOKEN_LBRACKET, value="[", index=2, path="$.[some][thing]"), + Token(kind=TOKEN_NAME, value="some", index=3, path="$.[some][thing]"), Token(kind=TOKEN_RBRACKET, value="]", index=7, path="$.[some][thing]"), - Token(kind=TOKEN_LIST_START, value="[", index=8, path="$.[some][thing]"), - Token( - kind=TOKEN_BARE_PROPERTY, value="thing", index=9, path="$.[some][thing]" - ), + Token(kind=TOKEN_LBRACKET, value="[", index=8, path="$.[some][thing]"), + Token(kind=TOKEN_NAME, value="thing", index=9, path="$.[some][thing]"), Token(kind=TOKEN_RBRACKET, value="]", index=14, path="$.[some][thing]"), ], ), @@ -149,7 +140,7 @@ class Case: path="$[1]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$[1]"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$[1]"), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$[1]"), Token(kind=TOKEN_INT, value="1", index=2, path="$[1]"), Token(kind=TOKEN_RBRACKET, value="]", index=3, path="$[1]"), ], @@ -159,7 +150,8 @@ class Case: path="$.[1]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.[1]"), - Token(kind=TOKEN_LIST_START, value="[", index=2, path="$.[1]"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.[1]"), + Token(kind=TOKEN_LBRACKET, value="[", index=2, path="$.[1]"), Token(kind=TOKEN_INT, value="1", index=3, path="$.[1]"), Token(kind=TOKEN_RBRACKET, value="]", index=4, path="$.[1]"), ], @@ -168,10 +160,8 @@ class Case: description="empty slice", path="[:]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[:]"), - Token(kind=TOKEN_SLICE_START, value="", index=1, path="[:]"), - Token(kind=TOKEN_SLICE_STOP, value="", index=2, path="[:]"), - Token(kind=TOKEN_SLICE_STEP, value="", index=-1, path="[:]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[:]"), + Token(kind=TOKEN_COLON, value=":", index=1, path="[:]"), Token(kind=TOKEN_RBRACKET, value="]", index=2, path="[:]"), ], ), @@ -179,10 +169,9 @@ class Case: description="empty slice empty step", path="[::]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[::]"), - Token(kind=TOKEN_SLICE_START, value="", index=1, path="[::]"), - Token(kind=TOKEN_SLICE_STOP, value="", index=2, path="[::]"), - Token(kind=TOKEN_SLICE_STEP, value="", index=3, path="[::]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[::]"), + Token(kind=TOKEN_COLON, value=":", index=1, path="[::]"), + Token(kind=TOKEN_COLON, value=":", index=2, path="[::]"), Token(kind=TOKEN_RBRACKET, value="]", index=3, path="[::]"), ], ), @@ -190,10 +179,9 @@ class Case: description="slice empty stop", path="[1:]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[1:]"), - Token(kind=TOKEN_SLICE_START, value="1", index=1, path="[1:]"), - Token(kind=TOKEN_SLICE_STOP, value="", index=3, path="[1:]"), - Token(kind=TOKEN_SLICE_STEP, value="", index=-1, path="[1:]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[1:]"), + Token(kind=TOKEN_INT, value="1", index=1, path="[1:]"), + Token(kind=TOKEN_COLON, value=":", index=2, path="[1:]"), Token(kind=TOKEN_RBRACKET, value="]", index=3, path="[1:]"), ], ), @@ -201,10 +189,9 @@ class Case: description="slice empty start", path="[:-1]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[:-1]"), - Token(kind=TOKEN_SLICE_START, value="", index=1, path="[:-1]"), - Token(kind=TOKEN_SLICE_STOP, value="-1", index=2, path="[:-1]"), - Token(kind=TOKEN_SLICE_STEP, value="", index=-1, path="[:-1]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[:-1]"), + Token(kind=TOKEN_COLON, value=":", index=1, path="[:-1]"), + Token(kind=TOKEN_INT, value="-1", index=2, path="[:-1]"), Token(kind=TOKEN_RBRACKET, value="]", index=4, path="[:-1]"), ], ), @@ -212,10 +199,10 @@ class Case: description="slice start and stop", path="[1:7]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[1:7]"), - Token(kind=TOKEN_SLICE_START, value="1", index=1, path="[1:7]"), - Token(kind=TOKEN_SLICE_STOP, value="7", index=3, path="[1:7]"), - Token(kind=TOKEN_SLICE_STEP, value="", index=-1, path="[1:7]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[1:7]"), + Token(kind=TOKEN_INT, value="1", index=1, path="[1:7]"), + Token(kind=TOKEN_COLON, value=":", index=2, path="[1:7]"), + Token(kind=TOKEN_INT, value="7", index=3, path="[1:7]"), Token(kind=TOKEN_RBRACKET, value="]", index=4, path="[1:7]"), ], ), @@ -223,10 +210,12 @@ class Case: description="slice start, stop and step", path="[1:7:2]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[1:7:2]"), - Token(kind=TOKEN_SLICE_START, value="1", index=1, path="[1:7:2]"), - Token(kind=TOKEN_SLICE_STOP, value="7", index=3, path="[1:7:2]"), - Token(kind=TOKEN_SLICE_STEP, value="2", index=5, path="[1:7:2]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[1:7:2]"), + Token(kind=TOKEN_INT, value="1", index=1, path="[1:7:2]"), + Token(kind=TOKEN_COLON, value=":", index=2, path="[1:7:2]"), + Token(kind=TOKEN_INT, value="7", index=3, path="[1:7:2]"), + Token(kind=TOKEN_COLON, value=":", index=4, path="[1:7:2]"), + Token(kind=TOKEN_INT, value="2", index=5, path="[1:7:2]"), Token(kind=TOKEN_RBRACKET, value="]", index=6, path="[1:7:2]"), ], ), @@ -235,6 +224,7 @@ class Case: path="$.*", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.*"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.*"), Token(kind=TOKEN_WILD, value="*", index=2, path="$.*"), ], ), @@ -243,7 +233,7 @@ class Case: path="$[*]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$[*]"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$[*]"), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$[*]"), Token(kind=TOKEN_WILD, value="*", index=2, path="$[*]"), Token(kind=TOKEN_RBRACKET, value="]", index=3, path="$[*]"), ], @@ -253,7 +243,8 @@ class Case: path="$.[*]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.[*]"), - Token(kind=TOKEN_LIST_START, value="[", index=2, path="$.[*]"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.[*]"), + Token(kind=TOKEN_LBRACKET, value="[", index=2, path="$.[*]"), Token(kind=TOKEN_WILD, value="*", index=3, path="$.[*]"), Token(kind=TOKEN_RBRACKET, value="]", index=4, path="$.[*]"), ], @@ -272,7 +263,7 @@ class Case: want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$..thing"), Token(kind=TOKEN_DDOT, value="..", index=1, path="$..thing"), - Token(kind=TOKEN_BARE_PROPERTY, value="thing", index=3, path="$..thing"), + Token(kind=TOKEN_NAME, value="thing", index=3, path="$..thing"), ], ), Case( @@ -281,7 +272,8 @@ class Case: want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$...thing"), Token(kind=TOKEN_DDOT, value="..", index=1, path="$...thing"), - Token(kind=TOKEN_PROPERTY, value="thing", index=4, path="$...thing"), + Token(kind=TOKEN_DOT, value=".", index=3, path="$...thing"), + Token(kind=TOKEN_NAME, value="thing", index=4, path="$...thing"), ], ), Case( @@ -289,7 +281,7 @@ class Case: path="$[1,4,5]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$[1,4,5]"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$[1,4,5]"), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$[1,4,5]"), Token(kind=TOKEN_INT, value="1", index=2, path="$[1,4,5]"), Token(kind=TOKEN_COMMA, value=",", index=3, path="$[1,4,5]"), Token(kind=TOKEN_INT, value="4", index=4, path="$[1,4,5]"), @@ -303,12 +295,12 @@ class Case: path="$[1,4:9]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$[1,4:9]"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$[1,4:9]"), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$[1,4:9]"), Token(kind=TOKEN_INT, value="1", index=2, path="$[1,4:9]"), Token(kind=TOKEN_COMMA, value=",", index=3, path="$[1,4:9]"), - Token(kind=TOKEN_SLICE_START, value="4", index=4, path="$[1,4:9]"), - Token(kind=TOKEN_SLICE_STOP, value="9", index=6, path="$[1,4:9]"), - Token(kind=TOKEN_SLICE_STEP, value="", index=-1, path="$[1,4:9]"), + Token(kind=TOKEN_INT, value="4", index=4, path="$[1,4:9]"), + Token(kind=TOKEN_COLON, value=":", index=5, path="$[1,4:9]"), + Token(kind=TOKEN_INT, value="9", index=6, path="$[1,4:9]"), Token(kind=TOKEN_RBRACKET, value="]", index=7, path="$[1,4:9]"), ], ), @@ -317,14 +309,10 @@ class Case: path="$[some,thing]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$[some,thing]"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$[some,thing]"), - Token( - kind=TOKEN_BARE_PROPERTY, value="some", index=2, path="$[some,thing]" - ), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$[some,thing]"), + Token(kind=TOKEN_NAME, value="some", index=2, path="$[some,thing]"), Token(kind=TOKEN_COMMA, value=",", index=6, path="$[some,thing]"), - Token( - kind=TOKEN_BARE_PROPERTY, value="thing", index=7, path="$[some,thing]" - ), + Token(kind=TOKEN_NAME, value="thing", index=7, path="$[some,thing]"), Token(kind=TOKEN_RBRACKET, value="]", index=12, path="$[some,thing]"), ], ), @@ -333,11 +321,13 @@ class Case: path="$.[?(@.some)]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.[?(@.some)]"), - Token(kind=TOKEN_LIST_START, value="[", index=2, path="$.[?(@.some)]"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.[?(@.some)]"), + Token(kind=TOKEN_LBRACKET, value="[", index=2, path="$.[?(@.some)]"), Token(kind=TOKEN_FILTER, value="?", index=3, path="$.[?(@.some)]"), Token(kind=TOKEN_LPAREN, value="(", index=4, path="$.[?(@.some)]"), Token(kind=TOKEN_SELF, value="@", index=5, path="$.[?(@.some)]"), - Token(kind=TOKEN_PROPERTY, value="some", index=7, path="$.[?(@.some)]"), + Token(kind=TOKEN_DOT, value=".", index=6, path="$.[?(@.some)]"), + Token(kind=TOKEN_NAME, value="some", index=7, path="$.[?(@.some)]"), Token(kind=TOKEN_RPAREN, value=")", index=11, path="$.[?(@.some)]"), Token(kind=TOKEN_RBRACKET, value="]", index=12, path="$.[?(@.some)]"), ], @@ -347,11 +337,13 @@ class Case: path="$.[?($.some)]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.[?($.some)]"), - Token(kind=TOKEN_LIST_START, value="[", index=2, path="$.[?($.some)]"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.[?($.some)]"), + Token(kind=TOKEN_LBRACKET, value="[", index=2, path="$.[?($.some)]"), Token(kind=TOKEN_FILTER, value="?", index=3, path="$.[?($.some)]"), Token(kind=TOKEN_LPAREN, value="(", index=4, path="$.[?($.some)]"), Token(kind=TOKEN_ROOT, value="$", index=5, path="$.[?($.some)]"), - Token(kind=TOKEN_PROPERTY, value="some", index=7, path="$.[?($.some)]"), + Token(kind=TOKEN_DOT, value=".", index=6, path="$.[?($.some)]"), + Token(kind=TOKEN_NAME, value="some", index=7, path="$.[?($.some)]"), Token(kind=TOKEN_RPAREN, value=")", index=11, path="$.[?($.some)]"), Token(kind=TOKEN_RBRACKET, value="]", index=12, path="$.[?($.some)]"), ], @@ -361,11 +353,12 @@ class Case: path="$.[?(@[1])]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.[?(@[1])]"), - Token(kind=TOKEN_LIST_START, value="[", index=2, path="$.[?(@[1])]"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.[?(@[1])]"), + Token(kind=TOKEN_LBRACKET, value="[", index=2, path="$.[?(@[1])]"), Token(kind=TOKEN_FILTER, value="?", index=3, path="$.[?(@[1])]"), Token(kind=TOKEN_LPAREN, value="(", index=4, path="$.[?(@[1])]"), Token(kind=TOKEN_SELF, value="@", index=5, path="$.[?(@[1])]"), - Token(kind=TOKEN_LIST_START, value="[", index=6, path="$.[?(@[1])]"), + Token(kind=TOKEN_LBRACKET, value="[", index=6, path="$.[?(@[1])]"), Token(kind=TOKEN_INT, value="1", index=7, path="$.[?(@[1])]"), Token(kind=TOKEN_RBRACKET, value="]", index=8, path="$.[?(@[1])]"), Token(kind=TOKEN_RPAREN, value=")", index=9, path="$.[?(@[1])]"), @@ -376,43 +369,41 @@ class Case: description="filter self dot property equality with float", path="[?(@.some == 1.1)]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[?(@.some == 1.1)]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1.1)]"), Token(kind=TOKEN_FILTER, value="?", index=1, path="[?(@.some == 1.1)]"), Token(kind=TOKEN_LPAREN, value="(", index=2, path="[?(@.some == 1.1)]"), Token(kind=TOKEN_SELF, value="@", index=3, path="[?(@.some == 1.1)]"), + Token(kind=TOKEN_DOT, value=".", index=4, path="[?(@.some == 1.1)]"), + Token(kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1.1)]"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=9, path="[?(@.some == 1.1)]"), + Token(kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1.1)]"), Token( - kind=TOKEN_PROPERTY, value="some", index=5, path="[?(@.some == 1.1)]" + kind=TOKEN_WHITESPACE, value=" ", index=12, path="[?(@.some == 1.1)]" ), - Token(kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1.1)]"), Token(kind=TOKEN_FLOAT, value="1.1", index=13, path="[?(@.some == 1.1)]"), Token(kind=TOKEN_RPAREN, value=")", index=16, path="[?(@.some == 1.1)]"), Token(kind=TOKEN_RBRACKET, value="]", index=17, path="[?(@.some == 1.1)]"), ], ), Case( - description=( - "filter self dot property equality with float in scientific notation" - ), + description="filter self dot property equality float in scientific notation", path="[?(@.some == 1.1e10)]", want=[ Token( - kind=TOKEN_LIST_START, - value="[", - index=0, - path="[?(@.some == 1.1e10)]", + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1.1e10)]" ), Token(kind=TOKEN_FILTER, value="?", index=1, path="[?(@.some == 1.1e10)]"), - Token( - kind=TOKEN_LPAREN, - value="(", - index=2, - path="[?(@.some == 1.1e10)]", - ), + Token(kind=TOKEN_LPAREN, value="(", index=2, path="[?(@.some == 1.1e10)]"), Token(kind=TOKEN_SELF, value="@", index=3, path="[?(@.some == 1.1e10)]"), + Token(kind=TOKEN_DOT, value=".", index=4, path="[?(@.some == 1.1e10)]"), + Token(kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1.1e10)]"), Token( - kind=TOKEN_PROPERTY, value="some", index=5, path="[?(@.some == 1.1e10)]" + kind=TOKEN_WHITESPACE, value=" ", index=9, path="[?(@.some == 1.1e10)]" ), Token(kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1.1e10)]"), + Token( + kind=TOKEN_WHITESPACE, value=" ", index=12, path="[?(@.some == 1.1e10)]" + ), Token( kind=TOKEN_FLOAT, value="1.1e10", index=13, path="[?(@.some == 1.1e10)]" ), @@ -426,14 +417,16 @@ class Case: description="filter self index equality with float", path="[?(@[1] == 1.1)]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[?(@[1] == 1.1)]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_FILTER, value="?", index=1, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_LPAREN, value="(", index=2, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_SELF, value="@", index=3, path="[?(@[1] == 1.1)]"), - Token(kind=TOKEN_LIST_START, value="[", index=4, path="[?(@[1] == 1.1)]"), + Token(kind=TOKEN_LBRACKET, value="[", index=4, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_INT, value="1", index=5, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_RBRACKET, value="]", index=6, path="[?(@[1] == 1.1)]"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=7, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_EQ, value="==", index=8, path="[?(@[1] == 1.1)]"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=10, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_FLOAT, value="1.1", index=11, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_RPAREN, value=")", index=14, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_RBRACKET, value="]", index=15, path="[?(@[1] == 1.1)]"), @@ -443,12 +436,15 @@ class Case: description="filter self dot property equality with int", path="[?(@.some == 1)]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[?(@.some == 1)]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1)]"), Token(kind=TOKEN_FILTER, value="?", index=1, path="[?(@.some == 1)]"), Token(kind=TOKEN_LPAREN, value="(", index=2, path="[?(@.some == 1)]"), Token(kind=TOKEN_SELF, value="@", index=3, path="[?(@.some == 1)]"), - Token(kind=TOKEN_PROPERTY, value="some", index=5, path="[?(@.some == 1)]"), + Token(kind=TOKEN_DOT, value=".", index=4, path="[?(@.some == 1)]"), + Token(kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1)]"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=9, path="[?(@.some == 1)]"), Token(kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1)]"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=12, path="[?(@.some == 1)]"), Token(kind=TOKEN_INT, value="1", index=13, path="[?(@.some == 1)]"), Token(kind=TOKEN_RPAREN, value=")", index=14, path="[?(@.some == 1)]"), Token(kind=TOKEN_RBRACKET, value="]", index=15, path="[?(@.some == 1)]"), @@ -458,29 +454,19 @@ class Case: description="filter self dot property equality with int in scientific notation", path="[?(@.some == 1e10)]", want=[ - Token( - kind=TOKEN_LIST_START, - value="[", - index=0, - path="[?(@.some == 1e10)]", - ), - Token( - kind=TOKEN_FILTER, - value="?", - index=1, - path="[?(@.some == 1e10)]", - ), - Token( - kind=TOKEN_LPAREN, - value="(", - index=2, - path="[?(@.some == 1e10)]", - ), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1e10)]"), + Token(kind=TOKEN_FILTER, value="?", index=1, path="[?(@.some == 1e10)]"), + Token(kind=TOKEN_LPAREN, value="(", index=2, path="[?(@.some == 1e10)]"), Token(kind=TOKEN_SELF, value="@", index=3, path="[?(@.some == 1e10)]"), + Token(kind=TOKEN_DOT, value=".", index=4, path="[?(@.some == 1e10)]"), + Token(kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1e10)]"), Token( - kind=TOKEN_PROPERTY, value="some", index=5, path="[?(@.some == 1e10)]" + kind=TOKEN_WHITESPACE, value=" ", index=9, path="[?(@.some == 1e10)]" ), Token(kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1e10)]"), + Token( + kind=TOKEN_WHITESPACE, value=" ", index=12, path="[?(@.some == 1e10)]" + ), Token(kind=TOKEN_INT, value="1e10", index=13, path="[?(@.some == 1e10)]"), Token(kind=TOKEN_RPAREN, value=")", index=17, path="[?(@.some == 1e10)]"), Token(kind=TOKEN_RBRACKET, value="]", index=18, path="[?(@.some == 1e10)]"), @@ -491,36 +477,37 @@ class Case: path="[?(@.some =~ /foo|bar/i)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some =~ /foo|bar/i)]", ), Token( - kind=TOKEN_FILTER, - value="?", - index=1, - path="[?(@.some =~ /foo|bar/i)]", + kind=TOKEN_FILTER, value="?", index=1, path="[?(@.some =~ /foo|bar/i)]" ), Token( - kind=TOKEN_LPAREN, - value="(", - index=2, - path="[?(@.some =~ /foo|bar/i)]", + kind=TOKEN_LPAREN, value="(", index=2, path="[?(@.some =~ /foo|bar/i)]" ), Token( kind=TOKEN_SELF, value="@", index=3, path="[?(@.some =~ /foo|bar/i)]" ), + Token(kind=TOKEN_DOT, value=".", index=4, path="[?(@.some =~ /foo|bar/i)]"), Token( - kind=TOKEN_PROPERTY, - value="some", - index=5, + kind=TOKEN_NAME, value="some", index=5, path="[?(@.some =~ /foo|bar/i)]" + ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, path="[?(@.some =~ /foo|bar/i)]", ), Token( - kind=TOKEN_RE, - value="=~", - index=10, + kind=TOKEN_RE, value="=~", index=10, path="[?(@.some =~ /foo|bar/i)]" + ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=12, path="[?(@.some =~ /foo|bar/i)]", ), Token( @@ -536,10 +523,7 @@ class Case: path="[?(@.some =~ /foo|bar/i)]", ), Token( - kind=TOKEN_RPAREN, - value=")", - index=23, - path="[?(@.some =~ /foo|bar/i)]", + kind=TOKEN_RPAREN, value=")", index=23, path="[?(@.some =~ /foo|bar/i)]" ), Token( kind=TOKEN_RBRACKET, @@ -554,12 +538,14 @@ class Case: path="$.some | $.thing", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.some | $.thing"), - Token(kind=TOKEN_PROPERTY, value="some", index=2, path="$.some | $.thing"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.some | $.thing"), + Token(kind=TOKEN_NAME, value="some", index=2, path="$.some | $.thing"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=6, path="$.some | $.thing"), Token(kind=TOKEN_UNION, value="|", index=7, path="$.some | $.thing"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=8, path="$.some | $.thing"), Token(kind=TOKEN_ROOT, value="$", index=9, path="$.some | $.thing"), - Token( - kind=TOKEN_PROPERTY, value="thing", index=11, path="$.some | $.thing" - ), + Token(kind=TOKEN_DOT, value=".", index=10, path="$.some | $.thing"), + Token(kind=TOKEN_NAME, value="thing", index=11, path="$.some | $.thing"), ], ), Case( @@ -570,31 +556,64 @@ class Case: kind=TOKEN_ROOT, value="$", index=0, path="$.some | $.thing | $.other" ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, value=".", index=1, path="$.some | $.thing | $.other" + ), + Token( + kind=TOKEN_NAME, value="some", index=2, path="$.some | $.thing | $.other", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=6, + path="$.some | $.thing | $.other", + ), Token( kind=TOKEN_UNION, value="|", index=7, path="$.some | $.thing | $.other" ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=8, + path="$.some | $.thing | $.other", + ), Token( kind=TOKEN_ROOT, value="$", index=9, path="$.some | $.thing | $.other" ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, value=".", index=10, path="$.some | $.thing | $.other" + ), + Token( + kind=TOKEN_NAME, value="thing", index=11, path="$.some | $.thing | $.other", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=16, + path="$.some | $.thing | $.other", + ), Token( kind=TOKEN_UNION, value="|", index=17, path="$.some | $.thing | $.other" ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=18, + path="$.some | $.thing | $.other", + ), Token( kind=TOKEN_ROOT, value="$", index=19, path="$.some | $.thing | $.other" ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, value=".", index=20, path="$.some | $.thing | $.other" + ), + Token( + kind=TOKEN_NAME, value="other", index=21, path="$.some | $.thing | $.other", @@ -606,12 +625,14 @@ class Case: path="$.some & $.thing", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.some & $.thing"), - Token(kind=TOKEN_PROPERTY, value="some", index=2, path="$.some & $.thing"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.some & $.thing"), + Token(kind=TOKEN_NAME, value="some", index=2, path="$.some & $.thing"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=6, path="$.some & $.thing"), Token(kind=TOKEN_INTERSECTION, value="&", index=7, path="$.some & $.thing"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=8, path="$.some & $.thing"), Token(kind=TOKEN_ROOT, value="$", index=9, path="$.some & $.thing"), - Token( - kind=TOKEN_PROPERTY, value="thing", index=11, path="$.some & $.thing" - ), + Token(kind=TOKEN_DOT, value=".", index=10, path="$.some & $.thing"), + Token(kind=TOKEN_NAME, value="thing", index=11, path="$.some & $.thing"), ], ), Case( @@ -619,7 +640,7 @@ class Case: path="[?(@.some > 1 and @.some < 5)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some > 1 and @.some < 5)]", @@ -643,29 +664,59 @@ class Case: path="[?(@.some > 1 and @.some < 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=4, + path="[?(@.some > 1 and @.some < 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=5, path="[?(@.some > 1 and @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, + path="[?(@.some > 1 and @.some < 5)]", + ), Token( kind=TOKEN_GT, value=">", index=10, path="[?(@.some > 1 and @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=11, + path="[?(@.some > 1 and @.some < 5)]", + ), Token( kind=TOKEN_INT, value="1", index=12, path="[?(@.some > 1 and @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=13, + path="[?(@.some > 1 and @.some < 5)]", + ), Token( kind=TOKEN_AND, value="and", index=14, path="[?(@.some > 1 and @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(@.some > 1 and @.some < 5)]", + ), Token( kind=TOKEN_SELF, value="@", @@ -673,17 +724,35 @@ class Case: path="[?(@.some > 1 and @.some < 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=19, + path="[?(@.some > 1 and @.some < 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=20, path="[?(@.some > 1 and @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=24, + path="[?(@.some > 1 and @.some < 5)]", + ), Token( kind=TOKEN_LT, value="<", index=25, path="[?(@.some > 1 and @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=26, + path="[?(@.some > 1 and @.some < 5)]", + ), Token( kind=TOKEN_INT, value="5", @@ -709,7 +778,7 @@ class Case: path="[?(@.some == 1 or @.some == 5)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1 or @.some == 5)]", @@ -733,29 +802,59 @@ class Case: path="[?(@.some == 1 or @.some == 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=4, + path="[?(@.some == 1 or @.some == 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1 or @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, + path="[?(@.some == 1 or @.some == 5)]", + ), Token( kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1 or @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=12, + path="[?(@.some == 1 or @.some == 5)]", + ), Token( kind=TOKEN_INT, value="1", index=13, path="[?(@.some == 1 or @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=14, + path="[?(@.some == 1 or @.some == 5)]", + ), Token( kind=TOKEN_OR, value="or", index=15, path="[?(@.some == 1 or @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(@.some == 1 or @.some == 5)]", + ), Token( kind=TOKEN_SELF, value="@", @@ -763,17 +862,35 @@ class Case: path="[?(@.some == 1 or @.some == 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=19, + path="[?(@.some == 1 or @.some == 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=20, path="[?(@.some == 1 or @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=24, + path="[?(@.some == 1 or @.some == 5)]", + ), Token( kind=TOKEN_EQ, value="==", index=25, path="[?(@.some == 1 or @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=27, + path="[?(@.some == 1 or @.some == 5)]", + ), Token( kind=TOKEN_INT, value="5", @@ -799,7 +916,7 @@ class Case: path="[?(@.some == 1 || @.some == 5)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1 || @.some == 5)]", @@ -823,29 +940,59 @@ class Case: path="[?(@.some == 1 || @.some == 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=4, + path="[?(@.some == 1 || @.some == 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1 || @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, + path="[?(@.some == 1 || @.some == 5)]", + ), Token( kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1 || @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=12, + path="[?(@.some == 1 || @.some == 5)]", + ), Token( kind=TOKEN_INT, value="1", index=13, path="[?(@.some == 1 || @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=14, + path="[?(@.some == 1 || @.some == 5)]", + ), Token( kind=TOKEN_OR, value="||", index=15, path="[?(@.some == 1 || @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(@.some == 1 || @.some == 5)]", + ), Token( kind=TOKEN_SELF, value="@", @@ -853,17 +1000,35 @@ class Case: path="[?(@.some == 1 || @.some == 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=19, + path="[?(@.some == 1 || @.some == 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=20, path="[?(@.some == 1 || @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=24, + path="[?(@.some == 1 || @.some == 5)]", + ), Token( kind=TOKEN_EQ, value="==", index=25, path="[?(@.some == 1 || @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=27, + path="[?(@.some == 1 || @.some == 5)]", + ), Token( kind=TOKEN_INT, value="5", @@ -889,33 +1054,34 @@ class Case: path="[?(@.thing in [1, '1'])]", want=[ Token( - kind=TOKEN_LIST_START, - value="[", - index=0, - path="[?(@.thing in [1, '1'])]", + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.thing in [1, '1'])]" ), Token( - kind=TOKEN_FILTER, - value="?", - index=1, - path="[?(@.thing in [1, '1'])]", + kind=TOKEN_FILTER, value="?", index=1, path="[?(@.thing in [1, '1'])]" ), Token( - kind=TOKEN_LPAREN, - value="(", - index=2, - path="[?(@.thing in [1, '1'])]", + kind=TOKEN_LPAREN, value="(", index=2, path="[?(@.thing in [1, '1'])]" ), Token(kind=TOKEN_SELF, value="@", index=3, path="[?(@.thing in [1, '1'])]"), + Token(kind=TOKEN_DOT, value=".", index=4, path="[?(@.thing in [1, '1'])]"), Token( - kind=TOKEN_PROPERTY, - value="thing", - index=5, + kind=TOKEN_NAME, value="thing", index=5, path="[?(@.thing in [1, '1'])]" + ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=10, path="[?(@.thing in [1, '1'])]", ), Token(kind=TOKEN_IN, value="in", index=11, path="[?(@.thing in [1, '1'])]"), Token( - kind=TOKEN_LIST_START, + kind=TOKEN_WHITESPACE, + value=" ", + index=13, + path="[?(@.thing in [1, '1'])]", + ), + Token( + kind=TOKEN_LBRACKET, value="[", index=14, path="[?(@.thing in [1, '1'])]", @@ -924,6 +1090,12 @@ class Case: Token( kind=TOKEN_COMMA, value=",", index=16, path="[?(@.thing in [1, '1'])]" ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(@.thing in [1, '1'])]", + ), Token( kind=TOKEN_SINGLE_QUOTE_STRING, value="1", @@ -937,10 +1109,7 @@ class Case: path="[?(@.thing in [1, '1'])]", ), Token( - kind=TOKEN_RPAREN, - value=")", - index=22, - path="[?(@.thing in [1, '1'])]", + kind=TOKEN_RPAREN, value=")", index=22, path="[?(@.thing in [1, '1'])]" ), Token( kind=TOKEN_RBRACKET, @@ -955,7 +1124,7 @@ class Case: path="[?(@.some == 1 or not @.some < 5)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1 or not @.some < 5)]", @@ -979,35 +1148,71 @@ class Case: path="[?(@.some == 1 or not @.some < 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=4, + path="[?(@.some == 1 or not @.some < 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=12, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_INT, value="1", index=13, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=14, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_OR, value="or", index=15, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_NOT, value="not", index=18, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=21, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_SELF, value="@", @@ -1015,17 +1220,35 @@ class Case: path="[?(@.some == 1 or not @.some < 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=23, + path="[?(@.some == 1 or not @.some < 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=24, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=28, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_LT, value="<", index=29, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=30, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_INT, value="5", @@ -1051,7 +1274,7 @@ class Case: path="[?(@.some == 1 or !@.some < 5)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1 or !@.some < 5)]", @@ -1075,29 +1298,59 @@ class Case: path="[?(@.some == 1 or !@.some < 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=4, + path="[?(@.some == 1 or !@.some < 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1 or !@.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, + path="[?(@.some == 1 or !@.some < 5)]", + ), Token( kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1 or !@.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=12, + path="[?(@.some == 1 or !@.some < 5)]", + ), Token( kind=TOKEN_INT, value="1", index=13, path="[?(@.some == 1 or !@.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=14, + path="[?(@.some == 1 or !@.some < 5)]", + ), Token( kind=TOKEN_OR, value="or", index=15, path="[?(@.some == 1 or !@.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(@.some == 1 or !@.some < 5)]", + ), Token( kind=TOKEN_NOT, value="!", @@ -1111,17 +1364,35 @@ class Case: path="[?(@.some == 1 or !@.some < 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=20, + path="[?(@.some == 1 or !@.some < 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=21, path="[?(@.some == 1 or !@.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=25, + path="[?(@.some == 1 or !@.some < 5)]", + ), Token( kind=TOKEN_LT, value="<", index=26, path="[?(@.some == 1 or !@.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=27, + path="[?(@.some == 1 or !@.some < 5)]", + ), Token( kind=TOKEN_INT, value="5", @@ -1146,26 +1417,15 @@ class Case: description="filter true and false", path="[?(true == false)]", want=[ - Token( - kind=TOKEN_LIST_START, - value="[", - index=0, - path="[?(true == false)]", - ), - Token( - kind=TOKEN_FILTER, - value="?", - index=1, - path="[?(true == false)]", - ), - Token( - kind=TOKEN_LPAREN, - value="(", - index=2, - path="[?(true == false)]", - ), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[?(true == false)]"), + Token(kind=TOKEN_FILTER, value="?", index=1, path="[?(true == false)]"), + Token(kind=TOKEN_LPAREN, value="(", index=2, path="[?(true == false)]"), Token(kind=TOKEN_TRUE, value="true", index=3, path="[?(true == false)]"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=7, path="[?(true == false)]"), Token(kind=TOKEN_EQ, value="==", index=8, path="[?(true == false)]"), + Token( + kind=TOKEN_WHITESPACE, value=" ", index=10, path="[?(true == false)]" + ), Token(kind=TOKEN_FALSE, value="false", index=11, path="[?(true == false)]"), Token(kind=TOKEN_RPAREN, value=")", index=16, path="[?(true == false)]"), Token(kind=TOKEN_RBRACKET, value="]", index=17, path="[?(true == false)]"), @@ -1176,7 +1436,7 @@ class Case: path="[?(nil == none && nil == null)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(nil == none && nil == null)]", @@ -1199,36 +1459,72 @@ class Case: index=3, path="[?(nil == none && nil == null)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=6, + path="[?(nil == none && nil == null)]", + ), Token( kind=TOKEN_EQ, value="==", index=7, path="[?(nil == none && nil == null)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, + path="[?(nil == none && nil == null)]", + ), Token( kind=TOKEN_NIL, value="none", index=10, path="[?(nil == none && nil == null)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=14, + path="[?(nil == none && nil == null)]", + ), Token( kind=TOKEN_AND, value="&&", index=15, path="[?(nil == none && nil == null)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(nil == none && nil == null)]", + ), Token( kind=TOKEN_NIL, value="nil", index=18, path="[?(nil == none && nil == null)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=21, + path="[?(nil == none && nil == null)]", + ), Token( kind=TOKEN_EQ, value="==", index=22, path="[?(nil == none && nil == null)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=24, + path="[?(nil == none && nil == null)]", + ), Token( kind=TOKEN_NIL, value="null", @@ -1254,7 +1550,7 @@ class Case: path="$['some', 'thing']", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$['some', 'thing']"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$['some', 'thing']"), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$['some', 'thing']"), Token( kind=TOKEN_SINGLE_QUOTE_STRING, value="some", @@ -1262,6 +1558,7 @@ class Case: path="$['some', 'thing']", ), Token(kind=TOKEN_COMMA, value=",", index=8, path="$['some', 'thing']"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=9, path="$['some', 'thing']"), Token( kind=TOKEN_SINGLE_QUOTE_STRING, value="thing", @@ -1282,13 +1579,19 @@ class Case: path="$.some[?(length(@.thing) < 2)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=1, + path="$.some[?(length(@.thing) < 2)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=2, path="$.some[?(length(@.thing) < 2)]", ), Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=6, path="$.some[?(length(@.thing) < 2)]", @@ -1306,11 +1609,17 @@ class Case: path="$.some[?(length(@.thing) < 2)]", ), Token( - kind=TOKEN_FUNCTION, + kind=TOKEN_NAME, value="length", index=9, path="$.some[?(length(@.thing) < 2)]", ), + Token( + kind=TOKEN_LPAREN, + value="(", + index=15, + path="$.some[?(length(@.thing) < 2)]", + ), Token( kind=TOKEN_SELF, value="@", @@ -1318,7 +1627,13 @@ class Case: path="$.some[?(length(@.thing) < 2)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=17, + path="$.some[?(length(@.thing) < 2)]", + ), + Token( + kind=TOKEN_NAME, value="thing", index=18, path="$.some[?(length(@.thing) < 2)]", @@ -1329,12 +1644,24 @@ class Case: index=23, path="$.some[?(length(@.thing) < 2)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=24, + path="$.some[?(length(@.thing) < 2)]", + ), Token( kind=TOKEN_LT, value="<", index=25, path="$.some[?(length(@.thing) < 2)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=26, + path="$.some[?(length(@.thing) < 2)]", + ), Token( kind=TOKEN_INT, value="2", @@ -1360,7 +1687,9 @@ class Case: path="$.thing.~", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.thing.~"), - Token(kind=TOKEN_PROPERTY, value="thing", index=2, path="$.thing.~"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.thing.~"), + Token(kind=TOKEN_NAME, value="thing", index=2, path="$.thing.~"), + Token(kind=TOKEN_DOT, value=".", index=7, path="$.thing.~"), Token(kind=TOKEN_KEYS, value="~", index=8, path="$.thing.~"), ], ), @@ -1369,8 +1698,9 @@ class Case: path="$.thing[~]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.thing[~]"), - Token(kind=TOKEN_PROPERTY, value="thing", index=2, path="$.thing[~]"), - Token(kind=TOKEN_LIST_START, value="[", index=7, path="$.thing[~]"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.thing[~]"), + Token(kind=TOKEN_NAME, value="thing", index=2, path="$.thing[~]"), + Token(kind=TOKEN_LBRACKET, value="[", index=7, path="$.thing[~]"), Token(kind=TOKEN_KEYS, value="~", index=8, path="$.thing[~]"), Token(kind=TOKEN_RBRACKET, value="]", index=9, path="$.thing[~]"), ], @@ -1378,81 +1708,49 @@ class Case: Case( description="implicit root selector, name selector starts with `and`", path="anderson", - want=[ - Token(kind=TOKEN_BARE_PROPERTY, value="anderson", index=0, path="anderson"), - ], + want=[Token(kind=TOKEN_NAME, value="anderson", index=0, path="anderson")], ), Case( description="implicit root selector, name selector starts with `or`", path="order", - want=[ - Token(kind=TOKEN_BARE_PROPERTY, value="order", index=0, path="order"), - ], + want=[Token(kind=TOKEN_NAME, value="order", index=0, path="order")], ), Case( description="implicit root selector, name selector starts with `true`", path="trueblue", - want=[ - Token(kind=TOKEN_BARE_PROPERTY, value="trueblue", index=0, path="trueblue"), - ], + want=[Token(kind=TOKEN_NAME, value="trueblue", index=0, path="trueblue")], ), Case( description="implicit root selector, name selector starts with `false`", path="falsehood", - want=[ - Token( - kind=TOKEN_BARE_PROPERTY, value="falsehood", index=0, path="falsehood" - ), - ], + want=[Token(kind=TOKEN_NAME, value="falsehood", index=0, path="falsehood")], ), Case( description="implicit root selector, name selector starts with `not`", path="nottingham", - want=[ - Token( - kind=TOKEN_BARE_PROPERTY, value="nottingham", index=0, path="nottingham" - ), - ], + want=[Token(kind=TOKEN_NAME, value="nottingham", index=0, path="nottingham")], ), Case( description="implicit root selector, name selector starts with `null`", path="nullable", - want=[ - Token(kind=TOKEN_BARE_PROPERTY, value="nullable", index=0, path="nullable"), - ], + want=[Token(kind=TOKEN_NAME, value="nullable", index=0, path="nullable")], ), Case( description="implicit root selector, name selector starts with `none`", path="nonexpert", - want=[ - Token( - kind=TOKEN_BARE_PROPERTY, value="nonexpert", index=0, path="nonexpert" - ), - ], + want=[Token(kind=TOKEN_NAME, value="nonexpert", index=0, path="nonexpert")], ), Case( description="implicit root selector, name selector starts with `undefined`", path="undefinedness", want=[ - Token( - kind=TOKEN_BARE_PROPERTY, - value="undefinedness", - index=0, - path="undefinedness", - ), + Token(kind=TOKEN_NAME, value="undefinedness", index=0, path="undefinedness") ], ), Case( description="implicit root selector, name selector starts with `missing`", path="missingly", - want=[ - Token( - kind=TOKEN_BARE_PROPERTY, - value="missingly", - index=0, - path="missingly", - ), - ], + want=[Token(kind=TOKEN_NAME, value="missingly", index=0, path="missingly")], ), ] diff --git a/tests/test_walk_filter_expression_tree.py b/tests/test_walk_filter_expression_tree.py index b8059ec..0dad737 100644 --- a/tests/test_walk_filter_expression_tree.py +++ b/tests/test_walk_filter_expression_tree.py @@ -1,4 +1,5 @@ """Test that we can traverse filter expression trees.""" + import dataclasses import operator from typing import List @@ -9,7 +10,6 @@ from jsonpath.filter import FilterExpression from jsonpath.filter import walk from jsonpath.selectors import Filter as FilterSelector -from jsonpath.selectors import ListSelector @dataclasses.dataclass @@ -63,13 +63,11 @@ def test_is_volatile(case: Case) -> None: assert isinstance(path, jsonpath.JSONPath) filter_selectors: List[FilterSelector] = [] - for segment in path.selectors: - if isinstance(segment, ListSelector): - filter_selectors.extend( - selector - for selector in segment.items - if isinstance(selector, FilterSelector) - ) + + for segment in path.segments: + for selector in segment.selectors: + if isinstance(selector, FilterSelector): + filter_selectors.append(selector) assert len(filter_selectors) == 1 assert is_volatile(filter_selectors[0].expression) is case.want From 6945983e836ed73603764a921afa4ab0a621db62 Mon Sep 17 00:00:00 2001 From: James Prior Date: Fri, 8 Aug 2025 21:46:17 +0100 Subject: [PATCH 2/5] Rewrite parser WIP [skip ci] --- jsonpath/env.py | 33 +-- jsonpath/lex.py | 37 +++- jsonpath/parse.py | 444 +++++++++++++++++++++------------------ jsonpath/selectors.py | 38 ++-- jsonpath/stream.py | 142 ++++++------- jsonpath/token.py | 3 +- pyproject.toml | 2 +- tests/test_compliance.py | 22 +- tests/test_find.py | 30 +-- tests/test_lex.py | 3 +- 10 files changed, 406 insertions(+), 348 deletions(-) diff --git a/jsonpath/env.py b/jsonpath/env.py index 8542b32..770c614 100644 --- a/jsonpath/env.py +++ b/jsonpath/env.py @@ -92,8 +92,8 @@ class attributes `root_token`, `self_token` and `filter_context_token`. ## Class attributes Attributes: - pseudo_root_token (str): The pattern used to select a "fake" root node, one level - above the real root node. + pseudo_root_token (str): The pattern used to select a "fake" root node, one + level above the real root node. filter_context_token (str): The pattern used to select extra filter context data. Defaults to `"_"`. intersection_token (str): The pattern used as the intersection operator. @@ -180,24 +180,25 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 """ tokens = self.lexer.tokenize(path) stream = TokenStream(tokens) - pseudo_root = stream.current.kind == TOKEN_PSEUDO_ROOT + pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT _path: Union[JSONPath, CompoundJSONPath] = JSONPath( env=self, segments=self.parser.parse(stream), pseudo_root=pseudo_root ) - if stream.current.kind != TOKEN_EOF: + # TODO: better! + if stream.current().kind != TOKEN_EOF: _path = CompoundJSONPath(env=self, path=_path) - while stream.current.kind != TOKEN_EOF: - if stream.peek.kind == TOKEN_EOF: + while stream.current().kind != TOKEN_EOF: + if stream.peek().kind == TOKEN_EOF: # trailing union or intersection raise JSONPathSyntaxError( - f"expected a path after {stream.current.value!r}", - token=stream.current, + f"expected a path after {stream.current().value!r}", + token=stream.current(), ) - if stream.current.kind == TOKEN_UNION: - stream.next_token() - pseudo_root = stream.current.kind == TOKEN_PSEUDO_ROOT + if stream.current().kind == TOKEN_UNION: + stream.next() + pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT _path = _path.union( JSONPath( env=self, @@ -205,9 +206,9 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 pseudo_root=pseudo_root, ) ) - elif stream.current.kind == TOKEN_INTERSECTION: - stream.next_token() - pseudo_root = stream.current.kind == TOKEN_PSEUDO_ROOT + elif stream.current().kind == TOKEN_INTERSECTION: + stream.next() + pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT _path = _path.intersection( JSONPath( env=self, @@ -218,8 +219,8 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 else: # pragma: no cover # Parser.parse catches this too raise JSONPathSyntaxError( # noqa: TRY003 - f"unexpected token {stream.current.value!r}", - token=stream.current, + f"unexpected token {stream.current().value!r}", + token=stream.current(), ) return _path diff --git a/jsonpath/lex.py b/jsonpath/lex.py index 837f6dc..6583589 100644 --- a/jsonpath/lex.py +++ b/jsonpath/lex.py @@ -15,6 +15,7 @@ from .token import TOKEN_CONTAINS from .token import TOKEN_DDOT from .token import TOKEN_DOT +from .token import TOKEN_DOT_PROPERTY from .token import TOKEN_DOUBLE_QUOTE_STRING from .token import TOKEN_EQ from .token import TOKEN_ERROR @@ -22,6 +23,7 @@ from .token import TOKEN_FILTER from .token import TOKEN_FILTER_CONTEXT from .token import TOKEN_FLOAT +from .token import TOKEN_FUNCTION from .token import TOKEN_GE from .token import TOKEN_GT from .token import TOKEN_IN @@ -82,7 +84,6 @@ class attributes. Then setting `lexer_class` on a `JSONPathEnvironment`. """ key_pattern = r"[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*" - name_pattern = key_pattern # XXX: # ! or `not` logical_not_pattern = r"(?:not\b)|!" @@ -99,10 +100,15 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: self.double_quote_pattern = r'"(?P(?:(?!(?(?:(?!(?\.)(?P{self.key_pattern})" + # /pattern/ or /pattern/flags self.re_pattern = r"/(?P.+?)/(?P[aims]*)" + # func( + self.function_pattern = r"(?P[a-z][a-z_0-9]+)(?P\()" + self.rules = self.compile_rules() def compile_rules(self) -> Pattern[str]: @@ -122,6 +128,7 @@ def compile_rules(self) -> Pattern[str]: (TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern), (TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern), (TOKEN_RE_PATTERN, self.re_pattern), + (TOKEN_DOT_PROPERTY, self.dot_property_pattern), (TOKEN_FLOAT, r"-?\d+\.\d*(?:[eE][+-]?\d+)?"), (TOKEN_INT, r"-?\d+(?P[eE][+\-]?\d+)?\b"), (TOKEN_DDOT, r"\.\."), @@ -160,6 +167,7 @@ def compile_rules(self) -> Pattern[str]: (TOKEN_LT, r"<"), (TOKEN_GT, r">"), (TOKEN_NOT, self.logical_not_pattern), # Must go after "!=" + (TOKEN_FUNCTION, self.function_pattern), (TOKEN_NAME, self.key_pattern), # Must go after reserved words (TOKEN_LPAREN, r"\("), (TOKEN_RPAREN, r"\)"), @@ -180,7 +188,18 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912 kind = match.lastgroup assert kind is not None - if kind == TOKEN_DOUBLE_QUOTE_STRING: + if kind == TOKEN_DOT_PROPERTY: + yield _token( + kind=TOKEN_DOT, + value=match.group("G_DOT"), + index=match.start("G_DOT"), + ) + yield _token( + kind=TOKEN_NAME, + value=match.group("G_PROP"), + index=match.start("G_PROP"), + ) + elif kind == TOKEN_DOUBLE_QUOTE_STRING: yield _token( kind=TOKEN_DOUBLE_QUOTE_STRING, value=match.group("G_DQUOTE"), @@ -222,6 +241,18 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912 value=match.group(), index=match.start(), ) + elif kind == TOKEN_FUNCTION: + yield _token( + kind=TOKEN_FUNCTION, + value=match.group("G_FUNC"), + index=match.start("G_FUNC"), + ) + + yield _token( + kind=TOKEN_LPAREN, + value=match.group("G_FUNC_PAREN"), + index=match.start("G_FUNC_PAREN"), + ) elif kind == TOKEN_ERROR: raise JSONPathSyntaxError( f"unexpected token {match.group()!r}", diff --git a/jsonpath/parse.py b/jsonpath/parse.py index 82ccea5..c1bb0d4 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -289,212 +289,240 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: def parse(self, stream: TokenStream) -> Iterator[JSONPathSegment]: """Parse a JSONPath from a stream of tokens.""" - if stream.current.kind in {TOKEN_ROOT, TOKEN_PSEUDO_ROOT}: - stream.next_token() + # TODO: Optionally require TOKEN_ROOT + if stream.current().kind in {TOKEN_ROOT, TOKEN_PSEUDO_ROOT}: + stream.next() - yield from self.parse_path(stream, in_filter=False) + # TODO: Support "bare" paths. Those without a leading dot for shorthand + # selectors - if stream.current.kind not in (TOKEN_EOF, TOKEN_INTERSECTION, TOKEN_UNION): + yield from self.parse_path(stream) + + if stream.current().kind not in (TOKEN_EOF, TOKEN_INTERSECTION, TOKEN_UNION): raise JSONPathSyntaxError( - f"unexpected token {stream.current.value!r}", - token=stream.current, + f"unexpected token {stream.current().value!r}", + token=stream.current(), ) - def parse_path( - self, - stream: TokenStream, - *, - in_filter: bool = False, - ) -> Iterable[JSONPathSegment]: - """Parse a top-level JSONPath, or one that is nested in a filter.""" + def parse_path(self, stream: TokenStream) -> Iterable[JSONPathSegment]: + """Parse a JSONPath query string. + + This method assumes the root, current or pseudo root identifier has + already been consumed. + """ while True: - if stream.current.kind == TOKEN_DDOT: - token = stream.next_token() + stream.skip_whitespace() + if stream.current().kind == TOKEN_DOT: + # Consume the dot. + stream.next() + # Assert that dot is followed by shorthand selector without whitespace. + stream.expect(TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS) + token = stream.current() + selectors = self.parse_selectors(stream) + yield JSONPathChildSegment( + env=self.env, token=token, selectors=selectors + ) + elif stream.current().kind == TOKEN_DDOT: + token = stream.next() selectors = self.parse_selectors(stream) if not selectors: raise JSONPathSyntaxError( "missing selector for recursive descent segment", - token=stream.current, + token=stream.current(), ) yield JSONPathRecursiveDescentSegment( env=self.env, token=token, selectors=selectors ) - elif ( - stream.skip(TOKEN_DOT) - and stream.current.kind - in { - TOKEN_NAME, - TOKEN_WILD, - TOKEN_KEYS, - } - ) or stream.current.kind == TOKEN_LBRACKET: - token = stream.current + elif stream.current().kind == TOKEN_LBRACKET: + token = stream.current() selectors = self.parse_selectors(stream) yield JSONPathChildSegment( env=self.env, token=token, selectors=selectors ) else: - if in_filter: - stream.push(stream.current) break - stream.next_token() - def parse_selectors(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: - if stream.current.kind == TOKEN_NAME: + token = stream.next() + + if token.kind == TOKEN_NAME: return ( PropertySelector( env=self.env, - token=stream.current, - name=stream.current.value, + token=token, + name=token.value, shorthand=True, ), ) - if stream.current.kind == TOKEN_WILD: - return (WildSelector(env=self.env, token=stream.current, shorthand=True),) + if token.kind == TOKEN_WILD: + return ( + WildSelector( + env=self.env, + token=token, + shorthand=True, + ), + ) - if stream.current.kind == TOKEN_KEYS: + if token.kind == TOKEN_KEYS: return ( KeysSelector( env=self.env, - token=stream.current, + token=token, shorthand=True, ), ) - if stream.current.kind == TOKEN_LBRACKET: + if token.kind == TOKEN_LBRACKET: + stream.pos -= 1 return tuple(self.parse_bracketed_selection(stream)) return () - def parse_slice(self, stream: TokenStream) -> SliceSelector: - """Parse a slice JSONPath expression from a stream of tokens.""" - tok = stream.current - start: Optional[int] = None - stop: Optional[int] = None - step: Optional[int] = None - - def _maybe_index(token: Token) -> bool: - if token.kind == TOKEN_INT: - if len(token.value) > 1 and token.value.startswith(("0", "-0")): - raise JSONPathSyntaxError( - f"invalid index {token.value!r}", token=token - ) - return True - return False - - # 1: or : - if _maybe_index(stream.current): - start = int(stream.current.value) - stream.next_token() - - stream.expect(TOKEN_COLON) - stream.next_token() - - # 1 or 1: or : or ? - if _maybe_index(stream.current): - stop = int(stream.current.value) - stream.next_token() - if stream.current.kind == TOKEN_COLON: - stream.next_token() - elif stream.current.kind == TOKEN_COLON: - stream.expect(TOKEN_COLON) - stream.next_token() - - # 1 or ? - if _maybe_index(stream.current): - step = int(stream.current.value) - stream.next_token() - - stream.push(stream.current) - - return SliceSelector( - env=self.env, - token=tok, - start=start, - stop=stop, - step=step, - ) - def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelector]: # noqa: PLR0912 """Parse a comma separated list of JSONPath selectors.""" - tok = stream.next_token() # Skip LBRACKET + segment_token = stream.eat(TOKEN_LBRACKET) selectors: List[JSONPathSelector] = [] - while stream.current.kind != TOKEN_RBRACKET: - if stream.current.kind == TOKEN_INT: - if stream.peek.kind == TOKEN_COLON: + while True: + stream.skip_whitespace() + token = stream.current() + + if token.kind == TOKEN_RBRACKET: + break + + if token.kind == TOKEN_INT: + if ( + stream.peek().kind == TOKEN_COLON + or stream.peek(2).kind == TOKEN_COLON + ): selectors.append(self.parse_slice(stream)) else: - if ( - len(stream.current.value) > 1 - and stream.current.value.startswith("0") - ) or stream.current.value.startswith("-0"): - raise JSONPathSyntaxError( - "leading zero in index selector", token=stream.current - ) + self._raise_for_leading_zero(token) selectors.append( IndexSelector( env=self.env, - token=stream.current, - index=int(stream.current.value), + token=token, + index=int(token.value), ) ) - elif stream.current.kind in ( + stream.next() + elif token.kind in ( TOKEN_DOUBLE_QUOTE_STRING, TOKEN_SINGLE_QUOTE_STRING, ): selectors.append( PropertySelector( env=self.env, - token=stream.current, - name=self._decode_string_literal(stream.current), + token=token, + name=self._decode_string_literal(token), shorthand=False, ), ) - elif stream.current.kind == TOKEN_COLON: + stream.next() + elif token.kind == TOKEN_COLON: selectors.append(self.parse_slice(stream)) - elif stream.current.kind == TOKEN_WILD: + elif token.kind == TOKEN_WILD: selectors.append( WildSelector( env=self.env, - token=stream.current, + token=token, shorthand=False, ) ) - elif stream.current.kind == TOKEN_FILTER: - selectors.append(self.parse_filter_selector(stream)) - elif stream.current.kind == TOKEN_EOF: - raise JSONPathSyntaxError( - "unexpected end of query", token=stream.current + stream.next() + elif token.kind == TOKEN_KEYS: + selectors.append( + KeysSelector(env=self.env, token=token, shorthand=False) ) + stream.next() + elif token.kind == TOKEN_FILTER: + selectors.append(self.parse_filter_selector(stream)) + elif token.kind == TOKEN_EOF: + raise JSONPathSyntaxError("unexpected end of query", token=token) else: raise JSONPathSyntaxError( - f"unexpected token in bracketed selection {stream.current.kind!r}", - token=stream.current, + f"unexpected token in bracketed selection {token.kind!r}", + token=token, ) - if stream.peek.kind == TOKEN_EOF: - raise JSONPathSyntaxError( - "unexpected end of selector list", - token=stream.current, - ) + # XXX: + # if stream.peek().kind == TOKEN_EOF: + # raise JSONPathSyntaxError( + # "unexpected end of segment", + # token=stream.current(), + # ) - if stream.peek.kind != TOKEN_RBRACKET: - stream.expect_peek(TOKEN_COMMA) - stream.next_token() - stream.expect_peek_not(TOKEN_RBRACKET, "unexpected trailing comma") + stream.skip_whitespace() - stream.next_token() + if stream.current().kind != TOKEN_RBRACKET: + stream.eat(TOKEN_COMMA) + stream.skip_whitespace() + if stream.current().kind == TOKEN_RBRACKET: + raise JSONPathSyntaxError( + "unexpected trailing comma", token=stream.current() + ) + + stream.eat(TOKEN_RBRACKET) if not selectors: - raise JSONPathSyntaxError("empty bracketed segment", token=tok) + raise JSONPathSyntaxError("empty bracketed segment", token=segment_token) return selectors + def parse_slice(self, stream: TokenStream) -> SliceSelector: + """Parse a slice JSONPath expression from a stream of tokens.""" + token = stream.current() + start: Optional[int] = None + stop: Optional[int] = None + step: Optional[int] = None + + def _maybe_index(token: Token) -> bool: + if token.kind == TOKEN_INT: + if len(token.value) > 1 and token.value.startswith(("0", "-0")): + raise JSONPathSyntaxError( + f"invalid index {token.value!r}", token=token + ) + return True + return False + + # 1: or : + if _maybe_index(stream.current()): + start = int(stream.current().value) + stream.next() + + stream.skip_whitespace() + stream.expect(TOKEN_COLON) + stream.next() + stream.skip_whitespace() + + # 1 or 1: or : or ? + if _maybe_index(stream.current()): + stop = int(stream.current().value) + stream.next() + stream.skip_whitespace() + if stream.current().kind == TOKEN_COLON: + stream.next() + elif stream.current().kind == TOKEN_COLON: + stream.expect(TOKEN_COLON) + stream.next() + + # 1 or ? + stream.skip_whitespace() + if _maybe_index(stream.current()): + step = int(stream.current().value) + stream.next() + + return SliceSelector( + env=self.env, + token=token, + start=start, + stop=stop, + step=step, + ) + def parse_filter_selector(self, stream: TokenStream) -> Filter: - tok = stream.next_token() + token = stream.eat(TOKEN_FILTER) expr = self.parse_filter_expression(stream) if self.env.well_typed and isinstance(expr, FunctionExtension): @@ -505,42 +533,44 @@ def parse_filter_selector(self, stream: TokenStream) -> Filter: and func.return_type == ExpressionType.VALUE ): raise JSONPathTypeError( - f"result of {expr.name}() must be compared", token=tok + f"result of {expr.name}() must be compared", token=token ) if isinstance(expr, (Literal, Nil)): raise JSONPathSyntaxError( "filter expression literals outside of " "function expressions must be compared", - token=tok, + token=token, ) - return Filter(env=self.env, token=tok, expression=BooleanExpression(expr)) + return Filter(env=self.env, token=token, expression=BooleanExpression(expr)) def parse_boolean(self, stream: TokenStream) -> FilterExpression: - if stream.current.kind == TOKEN_TRUE: + if stream.next().kind == TOKEN_TRUE: return TRUE return FALSE - def parse_nil(self, _: TokenStream) -> FilterExpression: + def parse_nil(self, stream: TokenStream) -> FilterExpression: + stream.next() return NIL - def parse_undefined(self, _: TokenStream) -> FilterExpression: + def parse_undefined(self, stream: TokenStream) -> FilterExpression: + stream.next() return UNDEFINED_LITERAL def parse_string_literal(self, stream: TokenStream) -> FilterExpression: - return StringLiteral(value=self._decode_string_literal(stream.current)) + return StringLiteral(value=self._decode_string_literal(stream.next())) def parse_integer_literal(self, stream: TokenStream) -> FilterExpression: # Convert to float first to handle scientific notation. - return IntegerLiteral(value=int(float(stream.current.value))) + return IntegerLiteral(value=int(float(stream.next().value))) def parse_float_literal(self, stream: TokenStream) -> FilterExpression: - return FloatLiteral(value=float(stream.current.value)) + return FloatLiteral(value=float(stream.next().value)) def parse_prefix_expression(self, stream: TokenStream) -> FilterExpression: - tok = stream.next_token() - assert tok.kind == TOKEN_NOT + token = stream.next() + assert token.kind == TOKEN_NOT return PrefixExpression( operator="!", right=self.parse_filter_expression( @@ -551,169 +581,173 @@ def parse_prefix_expression(self, stream: TokenStream) -> FilterExpression: def parse_infix_expression( self, stream: TokenStream, left: FilterExpression ) -> FilterExpression: - tok = stream.next_token() - precedence = self.PRECEDENCES.get(tok.kind, self.PRECEDENCE_LOWEST) + token = stream.next() + precedence = self.PRECEDENCES.get(token.kind, self.PRECEDENCE_LOWEST) right = self.parse_filter_expression(stream, precedence) - operator = self.BINARY_OPERATORS[tok.kind] + operator = self.BINARY_OPERATORS[token.kind] if self.env.well_typed and operator in self.COMPARISON_OPERATORS: - self._raise_for_non_comparable_function(left, tok) - self._raise_for_non_comparable_function(right, tok) + self._raise_for_non_comparable_function(left, token) + self._raise_for_non_comparable_function(right, token) if operator not in self.INFIX_LITERAL_OPERATORS: if isinstance(left, (Literal, Nil)): raise JSONPathSyntaxError( "filter expression literals outside of " "function expressions must be compared", - token=tok, + token=token, ) if isinstance(right, (Literal, Nil)): raise JSONPathSyntaxError( "filter expression literals outside of " "function expressions must be compared", - token=tok, + token=token, ) return InfixExpression(left, operator, right) def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression: - stream.next_token() + stream.eat(TOKEN_LPAREN) expr = self.parse_filter_expression(stream) - stream.next_token() - while stream.current.kind != TOKEN_RPAREN: - if stream.current.kind == TOKEN_EOF: - raise JSONPathSyntaxError( - "unbalanced parentheses", token=stream.current - ) + while stream.current().kind != TOKEN_RPAREN: + token = stream.current() + if token.kind == TOKEN_EOF: + raise JSONPathSyntaxError("unbalanced parentheses", token=token) - if stream.current.kind not in self.BINARY_OPERATORS: + if token.kind not in self.BINARY_OPERATORS: raise JSONPathSyntaxError( - f"expected an expression, found '{stream.current.value}'", - token=stream.current, + f"expected an expression, found '{token.value}'", + token=token, ) expr = self.parse_infix_expression(stream, expr) - stream.expect(TOKEN_RPAREN) + stream.eat(TOKEN_RPAREN) return expr def parse_root_path(self, stream: TokenStream) -> FilterExpression: - root = stream.next_token() + root = stream.next() return RootFilterQuery( JSONPath( env=self.env, - segments=self.parse_path(stream, in_filter=True), + segments=self.parse_path(stream), pseudo_root=root.kind == TOKEN_PSEUDO_ROOT, ) ) def parse_self_path(self, stream: TokenStream) -> FilterExpression: - stream.next_token() + stream.next() return RelativeFilterQuery( - JSONPath(env=self.env, segments=self.parse_path(stream, in_filter=True)) + JSONPath(env=self.env, segments=self.parse_path(stream)) ) - def parse_current_key(self, _: TokenStream) -> FilterExpression: + def parse_current_key(self, stream: TokenStream) -> FilterExpression: + stream.next() return CURRENT_KEY def parse_filter_context_path(self, stream: TokenStream) -> FilterExpression: - stream.next_token() + stream.next() return FilterContextPath( - JSONPath(env=self.env, segments=self.parse_path(stream, in_filter=True)) + JSONPath(env=self.env, segments=self.parse_path(stream)) ) def parse_regex(self, stream: TokenStream) -> FilterExpression: - pattern = stream.current.value + pattern = stream.current().value flags = 0 - if stream.peek.kind == TOKEN_RE_FLAGS: - stream.next_token() - for flag in set(stream.current.value): + if stream.peek().kind == TOKEN_RE_FLAGS: + stream.next() + for flag in set(stream.next().value): flags |= self.RE_FLAG_MAP[flag] return RegexLiteral(value=re.compile(pattern, flags)) def parse_list_literal(self, stream: TokenStream) -> FilterExpression: - stream.next_token() + stream.eat(TOKEN_LBRACKET) list_items: List[FilterExpression] = [] - while stream.current.kind != TOKEN_RBRACKET: + while stream.current().kind != TOKEN_RBRACKET: try: - list_items.append(self.list_item_map[stream.current.kind](stream)) + list_items.append(self.list_item_map[stream.current().kind](stream)) except KeyError as err: raise JSONPathSyntaxError( - f"unexpected {stream.current.value!r}", - token=stream.current, + f"unexpected {stream.current().value!r}", + token=stream.current(), ) from err - if stream.peek.kind != TOKEN_RBRACKET: + if stream.peek().kind != TOKEN_RBRACKET: stream.expect_peek(TOKEN_COMMA) - stream.next_token() + stream.next() - stream.next_token() + stream.next() + stream.eat(TOKEN_RBRACKET) return ListLiteral(list_items) def parse_function_extension(self, stream: TokenStream) -> FilterExpression: function_arguments: List[FilterExpression] = [] - tok = stream.next_token() + function_token = stream.next() + stream.eat(TOKEN_LPAREN) + + while True: + stream.skip_whitespace() + token = stream.current() + + if token.kind == TOKEN_RPAREN: + break - while stream.current.kind != TOKEN_RPAREN: try: - func = self.function_argument_map[stream.current.kind] + func = self.function_argument_map[token.kind] except KeyError as err: raise JSONPathSyntaxError( - f"unexpected {stream.current.value!r}", - token=stream.current, + f"unexpected {token.value!r}", token=token ) from err expr = func(stream) + stream.skip_whitespace() - # The argument could be a comparison or logical expression - peek_kind = stream.peek.kind - while peek_kind in self.BINARY_OPERATORS: - stream.next_token() + while stream.current().kind in self.BINARY_OPERATORS: expr = self.parse_infix_expression(stream, expr) - peek_kind = stream.peek.kind function_arguments.append(expr) + stream.skip_whitespace() - if stream.peek.kind != TOKEN_RPAREN: - stream.expect_peek(TOKEN_COMMA) - stream.next_token() + if stream.current().kind != TOKEN_RPAREN: + stream.eat(TOKEN_COMMA) - stream.next_token() + stream.eat(TOKEN_RPAREN) return FunctionExtension( - tok.value, - self.env.validate_function_extension_signature(tok, function_arguments), + function_token.value, + self.env.validate_function_extension_signature( + function_token, function_arguments + ), ) def parse_filter_expression( self, stream: TokenStream, precedence: int = PRECEDENCE_LOWEST ) -> FilterExpression: + stream.skip_whitespace() + token = stream.current() + try: - left = self.token_map[stream.current.kind](stream) + left = self.token_map[token.kind](stream) except KeyError as err: - if stream.current.kind in (TOKEN_EOF, TOKEN_RBRACKET): + if token.kind in (TOKEN_EOF, TOKEN_RBRACKET): msg = "end of expression" else: - msg = repr(stream.current.value) - raise JSONPathSyntaxError( - f"unexpected {msg}", token=stream.current - ) from err + msg = repr(token.value) + raise JSONPathSyntaxError(f"unexpected {msg}", token=token) from err while True: - peek_kind = stream.peek.kind + stream.skip_whitespace() + kind = stream.current().kind + if ( - peek_kind in (TOKEN_EOF, TOKEN_RBRACKET) - or self.PRECEDENCES.get(peek_kind, self.PRECEDENCE_LOWEST) < precedence + kind not in self.BINARY_OPERATORS + or self.PRECEDENCES.get(kind, self.PRECEDENCE_LOWEST) < precedence ): break - if peek_kind not in self.BINARY_OPERATORS: - return left - - stream.next_token() left = self.parse_infix_expression(stream, left) return left @@ -748,3 +782,9 @@ def _raise_for_non_comparable_function( raise JSONPathTypeError( f"result of {expr.name}() is not comparable", token ) + + def _raise_for_leading_zero(self, token: Token) -> None: + if ( + len(token.value) > 1 and token.value.startswith("0") + ) or token.value.startswith("-0"): + raise JSONPathSyntaxError("leading zero in index selector", token=token) diff --git a/jsonpath/selectors.py b/jsonpath/selectors.py index 89e2490..d13071b 100644 --- a/jsonpath/selectors.py +++ b/jsonpath/selectors.py @@ -150,15 +150,16 @@ def _normalized_index(self, obj: Sequence[object]) -> int: return self.index def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: - if isinstance(node.obj, Mapping): - # Try the string representation of the index as a key. - with suppress(KeyError): - match = node.new_child( - self.env.getitem(node.obj, self._as_key), self.index - ) - node.add_child(match) - yield match - elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + # TODO: Optionally try string representation of int + # if isinstance(node.obj, Mapping): + # # Try the string representation of the index as a key. + # with suppress(KeyError): + # match = node.new_child( + # self.env.getitem(node.obj, self._as_key), self.index + # ) + # node.add_child(match) + # yield match + if isinstance(node.obj, Sequence) and not isinstance(node.obj, str): norm_index = self._normalized_index(node.obj) with suppress(IndexError): match = node.new_child( @@ -168,15 +169,16 @@ def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: yield match async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: - if isinstance(node.obj, Mapping): - # Try the string representation of the index as a key. - with suppress(KeyError): - match = node.new_child( - await self.env.getitem_async(node.obj, self._as_key), self.index - ) - node.add_child(match) - yield match - elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + # XXX + # if isinstance(node.obj, Mapping): + # # Try the string representation of the index as a key. + # with suppress(KeyError): + # match = node.new_child( + # await self.env.getitem_async(node.obj, self._as_key), self.index + # ) + # node.add_child(match) + # yield match + if isinstance(node.obj, Sequence) and not isinstance(node.obj, str): norm_index = self._normalized_index(node.obj) with suppress(IndexError): match = node.new_child( diff --git a/jsonpath/stream.py b/jsonpath/stream.py index 0a6e052..775c95f 100644 --- a/jsonpath/stream.py +++ b/jsonpath/stream.py @@ -1,114 +1,94 @@ -# noqa: D100 +"""Step through a stream of tokens.""" + from __future__ import annotations -from collections import deque -from typing import Deque -from typing import Iterator -from typing import Optional +from typing import Iterable from .exceptions import JSONPathSyntaxError from .token import TOKEN_EOF +from .token import TOKEN_WHITESPACE from .token import Token -# ruff: noqa: D102 - class TokenStream: - """Step through or iterate a stream of tokens.""" - - def __init__(self, token_iter: Iterator[Token]): - self.iter = token_iter - self._pushed: Deque[Token] = deque() - self.current = Token("", "", -1, "") - next(self) - - class TokenStreamIterator: - """An iterable token stream.""" - - def __init__(self, stream: TokenStream): - self.stream = stream - - def __iter__(self) -> Iterator[Token]: - return self - - def __next__(self) -> Token: - tok = self.stream.current - if tok.kind is TOKEN_EOF: - self.stream.close() - raise StopIteration - next(self.stream) - return tok - - def __iter__(self) -> Iterator[Token]: - return self.TokenStreamIterator(self) - - def __next__(self) -> Token: - tok = self.current - if self._pushed: - self.current = self._pushed.popleft() - elif self.current.kind is not TOKEN_EOF: - try: - self.current = next(self.iter) - except StopIteration: - self.close() - return tok + """Step through a stream of tokens.""" + + def __init__(self, token_iter: Iterable[Token]): + self.tokens = list(token_iter) + self.pos = 0 + self.eof = Token(TOKEN_EOF, "", -1, self.tokens[0].path) def __str__(self) -> str: # pragma: no cover return f"current: {self.current}\nnext: {self.peek}" - def next_token(self) -> Token: - """Return the next token from the stream.""" - return next(self) - - @property - def peek(self) -> Token: - """Look at the next token.""" - current = next(self) - result = self.current - self.push(current) - return result - - def push(self, tok: Token) -> None: - """Push a token back to the stream.""" - self._pushed.append(self.current) - self.current = tok - - def close(self) -> None: - """Close the stream.""" - self.current = Token(TOKEN_EOF, "", -1, "") + def current(self) -> Token: + """Return the token at the current position in the stream.""" + try: + return self.tokens[self.pos] + except IndexError: + return self.eof + + def next(self) -> Token: + """Return the token at the current position and advance the pointer.""" + try: + token = self.tokens[self.pos] + self.pos += 1 + return token + except IndexError: + return self.eof + + def peek(self, offset: int = 1) -> Token: + """Return the token at current position plus the offset. + + Does not advance the pointer. + """ + try: + return self.tokens[self.pos + offset] + except IndexError: + return self.eof + + def eat(self, kind: str, message: str | None = None) -> Token: + """Assert tge type if the current token and advance the pointer.""" + token = self.next() + if token.kind != kind: + raise JSONPathSyntaxError( + message or f"expected {kind}, found {token.kind!r}", + token=token, + ) + return token def expect(self, *typ: str) -> None: - if self.current.kind not in typ: + """Raise an exception of the current token is not in `typ`.""" + token = self.current() + if token.kind not in typ: if len(typ) == 1: _typ = repr(typ[0]) else: _typ = f"one of {typ!r}" raise JSONPathSyntaxError( - f"expected {_typ}, found {self.current.kind!r}", - token=self.current, + f"expected {_typ}, found {token.kind!r}", + token=token, ) def expect_peek(self, *typ: str) -> None: - if self.peek.kind not in typ: + """Raise an exception of the current token is not in `typ`.""" + token = self.peek() + if token.kind not in typ: if len(typ) == 1: _typ = repr(typ[0]) else: _typ = f"one of {typ!r}" raise JSONPathSyntaxError( - f"expected {_typ}, found {self.peek.kind!r}", - token=self.peek, + f"expected {_typ}, found {token.kind!r}", + token=token, ) def expect_peek_not(self, typ: str, message: str) -> None: """Raise an exception if the next token kind of _typ_.""" - if self.peek.kind == typ: - raise JSONPathSyntaxError(message, token=self.peek) - - def eat(self, *typ: str) -> Token: - self.expect(*typ) - return self.next_token() + if self.peek().kind == typ: + raise JSONPathSyntaxError(message, token=self.peek()) - def skip(self, *typ: str) -> Optional[Token]: - if self.current.kind in typ: - return self.next_token() - return None + def skip_whitespace(self) -> None: + """Skip whitespace.""" + if self.current().kind == TOKEN_WHITESPACE: + self.pos += 1 diff --git a/jsonpath/token.py b/jsonpath/token.py index c9f6592..6650b9c 100644 --- a/jsonpath/token.py +++ b/jsonpath/token.py @@ -22,7 +22,8 @@ TOKEN_RBRACKET = sys.intern("TOKEN_RBRACKET") TOKEN_ROOT = sys.intern("TOKEN_ROOT") TOKEN_WILD = sys.intern("TOKEN_WILD") -TOKEN_NAME = sys.intern("TOKEN_NAME") # An object property/key or a function name +TOKEN_NAME = sys.intern("TOKEN_NAME") +TOKEN_DOT_PROPERTY = sys.intern("TOKEN_DOT_PROPERTY") # Filter expression tokens TOKEN_AND = sys.intern("TOKEN_AND") diff --git a/pyproject.toml b/pyproject.toml index 23268f8..fe30c78 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,7 +88,7 @@ exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] [tool.mypy] files = ["jsonpath", "tests"] -exclude = ["tests/nts"] +exclude = ["tests/nts", "tests/cts"] python_version = "3.11" disallow_subclassing_any = true disallow_untyped_calls = true diff --git a/tests/test_compliance.py b/tests/test_compliance.py index 38592cb..6430f07 100644 --- a/tests/test_compliance.py +++ b/tests/test_compliance.py @@ -35,10 +35,10 @@ class Case: SKIP = { - "basic, no leading whitespace": "flexible whitespace policy", + # "basic, no leading whitespace": "flexible whitespace policy", "basic, no trailing whitespace": "flexible whitespace policy", - "basic, bald descendant segment": "almost has a consensus", - "filter, index segment on object, selects nothing": "flexible selector policy", + # "basic, bald descendant segment": "almost has a consensus", + # "filter, index segment on object, selects nothing": "flexible selector policy", "functions, match, dot matcher on \\u2028": "standard library re policy", "functions, match, dot matcher on \\u2029": "standard library re policy", "functions, search, dot matcher on \\u2028": "standard library re policy", @@ -76,14 +76,14 @@ class Case: "name selector, double quotes, non-surrogate surrogate": "expected behavior policy", "name selector, double quotes, surrogate supplementary": "expected behavior policy", "name selector, double quotes, supplementary surrogate": "expected behavior policy", - "whitespace, selectors, space between dot and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, newline between dot and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, tab between dot and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, return between dot and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, space between recursive descent and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, newline between recursive descent and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, tab between recursive descent and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, return between recursive descent and name": "flexible whitespace policy", # noqa: E501 + # "whitespace, selectors, space between dot and name": "flexible whitespace policy", # noqa: E501 + # "whitespace, selectors, newline between dot and name": "flexible whitespace policy", # noqa: E501 + # "whitespace, selectors, tab between dot and name": "flexible whitespace policy", # noqa: E501 + # "whitespace, selectors, return between dot and name": "flexible whitespace policy", # noqa: E501 + # "whitespace, selectors, space between recursive descent and name": "flexible whitespace policy", # noqa: E501 + # "whitespace, selectors, newline between recursive descent and name": "flexible whitespace policy", # noqa: E501 + # "whitespace, selectors, tab between recursive descent and name": "flexible whitespace policy", # noqa: E501 + # "whitespace, selectors, return between recursive descent and name": "flexible whitespace policy", # noqa: E501 } diff --git a/tests/test_find.py b/tests/test_find.py index 6059641..ebab04d 100644 --- a/tests/test_find.py +++ b/tests/test_find.py @@ -21,12 +21,14 @@ class Case: TEST_CASES = [ - Case( - description="property key that looks like an index", - path="$[some][0]", - data={"some": {"0": "thing"}}, - want=["thing"], - ), + # XXX: We're replacing bare property names with the "singular path selector" + # https://github.com/ietf-wg-jsonpath/draft-ietf-jsonpath-base/issues/522 + # Case( + # description="property key that looks like an index", + # path="$[some][0]", + # data={"some": {"0": "thing"}}, + # want=["thing"], + # ), Case( description="slice a mapping", path="$.some[0:4]", @@ -58,14 +60,14 @@ class Case: want=[{"foo": 1}, {"foo": 2}], ), Case( - description="select root value using fake root", - path="^[?@some.thing > 7]", + description="select root value using pseudo root", + path="^[?@.some.thing > 7]", data={"some": {"thing": 42}}, want=[{"some": {"thing": 42}}], ), Case( - description="fake root in a filter query", - path="^[?@some.thing > value(^.*.num)]", + description="pseudo root in a filter query", + path="^[?@.some.thing > value(^.*.num)]", data={"some": {"thing": 42}, "num": 7}, want=[{"some": {"thing": 42}, "num": 7}], ), @@ -129,25 +131,25 @@ class Case: ), Case( description="issue 72, orders", - path="orders", + path="$.orders", data={"orders": [1, 2, 3]}, want=[[1, 2, 3]], ), Case( description="issue 72, andy", - path="andy", + path="$.andy", data={"andy": [1, 2, 3]}, want=[[1, 2, 3]], ), Case( description="quoted reserved word, and", - path="['and']", + path="$['and']", data={"and": [1, 2, 3]}, want=[[1, 2, 3]], ), Case( description="quoted reserved word, or", - path="['or']", + path="$['or']", data={"or": [1, 2, 3]}, want=[[1, 2, 3]], ), diff --git a/tests/test_lex.py b/tests/test_lex.py index b3335c8..8241a04 100644 --- a/tests/test_lex.py +++ b/tests/test_lex.py @@ -16,6 +16,7 @@ from jsonpath.token import TOKEN_FALSE from jsonpath.token import TOKEN_FILTER from jsonpath.token import TOKEN_FLOAT +from jsonpath.token import TOKEN_FUNCTION from jsonpath.token import TOKEN_GT from jsonpath.token import TOKEN_IN from jsonpath.token import TOKEN_INT @@ -1609,7 +1610,7 @@ class Case: path="$.some[?(length(@.thing) < 2)]", ), Token( - kind=TOKEN_NAME, + kind=TOKEN_FUNCTION, value="length", index=9, path="$.some[?(length(@.thing) < 2)]", From e41ec29696b9ce1cf56bf319841dceffd9a59cbb Mon Sep 17 00:00:00 2001 From: James Prior Date: Sat, 9 Aug 2025 09:28:01 +0100 Subject: [PATCH 3/5] Fix canonical paths, compound paths and list literals --- jsonpath/env.py | 5 +++ jsonpath/parse.py | 53 +++++++++++++++---------- jsonpath/selectors.py | 14 ++----- jsonpath/stream.py | 3 +- tests/test_env.py | 5 ++- tests/test_errors.py | 7 +++- tests/test_filter_expression_caching.py | 4 +- tests/test_parse.py | 18 --------- 8 files changed, 52 insertions(+), 57 deletions(-) diff --git a/jsonpath/env.py b/jsonpath/env.py index 770c614..7719fff 100644 --- a/jsonpath/env.py +++ b/jsonpath/env.py @@ -185,6 +185,9 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 env=self, segments=self.parser.parse(stream), pseudo_root=pseudo_root ) + # TODO: Optionally raise for trailing whitespace + stream.skip_whitespace() + # TODO: better! if stream.current().kind != TOKEN_EOF: _path = CompoundJSONPath(env=self, path=_path) @@ -198,6 +201,7 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 if stream.current().kind == TOKEN_UNION: stream.next() + stream.skip_whitespace() pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT _path = _path.union( JSONPath( @@ -208,6 +212,7 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 ) elif stream.current().kind == TOKEN_INTERSECTION: stream.next() + stream.skip_whitespace() pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT _path = _path.intersection( JSONPath( diff --git a/jsonpath/parse.py b/jsonpath/parse.py index c1bb0d4..3e08c2a 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -241,6 +241,7 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: TOKEN_FUNCTION: self.parse_function_extension, TOKEN_INT: self.parse_integer_literal, TOKEN_KEY: self.parse_current_key, + TOKEN_LBRACKET: self.parse_list_literal, TOKEN_LPAREN: self.parse_grouped_expression, TOKEN_MISSING: self.parse_undefined, TOKEN_NIL: self.parse_nil, @@ -293,9 +294,6 @@ def parse(self, stream: TokenStream) -> Iterator[JSONPathSegment]: if stream.current().kind in {TOKEN_ROOT, TOKEN_PSEUDO_ROOT}: stream.next() - # TODO: Support "bare" paths. Those without a leading dot for shorthand - # selectors - yield from self.parse_path(stream) if stream.current().kind not in (TOKEN_EOF, TOKEN_INTERSECTION, TOKEN_UNION): @@ -312,9 +310,9 @@ def parse_path(self, stream: TokenStream) -> Iterable[JSONPathSegment]: """ while True: stream.skip_whitespace() - if stream.current().kind == TOKEN_DOT: - # Consume the dot. - stream.next() + _token = stream.current() + if _token.kind == TOKEN_DOT: + stream.eat(TOKEN_DOT) # Assert that dot is followed by shorthand selector without whitespace. stream.expect(TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS) token = stream.current() @@ -322,8 +320,8 @@ def parse_path(self, stream: TokenStream) -> Iterable[JSONPathSegment]: yield JSONPathChildSegment( env=self.env, token=token, selectors=selectors ) - elif stream.current().kind == TOKEN_DDOT: - token = stream.next() + elif _token.kind == TOKEN_DDOT: + token = stream.eat(TOKEN_DDOT) selectors = self.parse_selectors(stream) if not selectors: raise JSONPathSyntaxError( @@ -333,7 +331,14 @@ def parse_path(self, stream: TokenStream) -> Iterable[JSONPathSegment]: yield JSONPathRecursiveDescentSegment( env=self.env, token=token, selectors=selectors ) - elif stream.current().kind == TOKEN_LBRACKET: + elif _token.kind == TOKEN_LBRACKET: + selectors = self.parse_selectors(stream) + yield JSONPathChildSegment( + env=self.env, token=_token, selectors=selectors + ) + elif _token.kind in {TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS}: + # A non-standard "bare" path. One without a leading identifier (`$`, + # `@`, `^` or `_`). token = stream.current() selectors = self.parse_selectors(stream) yield JSONPathChildSegment( @@ -377,6 +382,7 @@ def parse_selectors(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: stream.pos -= 1 return tuple(self.parse_bracketed_selection(stream)) + stream.pos -= 1 return () def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelector]: # noqa: PLR0912 @@ -446,15 +452,14 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto token=token, ) - # XXX: - # if stream.peek().kind == TOKEN_EOF: - # raise JSONPathSyntaxError( - # "unexpected end of segment", - # token=stream.current(), - # ) - stream.skip_whitespace() + if stream.current().kind == TOKEN_EOF: + raise JSONPathSyntaxError( + "unexpected end of segment", + token=stream.current(), + ) + if stream.current().kind != TOKEN_RBRACKET: stream.eat(TOKEN_COMMA) stream.skip_whitespace() @@ -665,7 +670,12 @@ def parse_list_literal(self, stream: TokenStream) -> FilterExpression: stream.eat(TOKEN_LBRACKET) list_items: List[FilterExpression] = [] - while stream.current().kind != TOKEN_RBRACKET: + while True: + stream.skip_whitespace() + + if stream.current().kind == TOKEN_RBRACKET: + break + try: list_items.append(self.list_item_map[stream.current().kind](stream)) except KeyError as err: @@ -674,11 +684,10 @@ def parse_list_literal(self, stream: TokenStream) -> FilterExpression: token=stream.current(), ) from err - if stream.peek().kind != TOKEN_RBRACKET: - stream.expect_peek(TOKEN_COMMA) - stream.next() - - stream.next() + stream.skip_whitespace() + if stream.current().kind != TOKEN_RBRACKET: + stream.eat(TOKEN_COMMA) + stream.skip_whitespace() stream.eat(TOKEN_RBRACKET) return ListLiteral(list_items) diff --git a/jsonpath/selectors.py b/jsonpath/selectors.py index d13071b..362bc9f 100644 --- a/jsonpath/selectors.py +++ b/jsonpath/selectors.py @@ -71,11 +71,7 @@ def __init__( self.shorthand = shorthand def __str__(self) -> str: - return ( - f"[{canonical_string(self.name)}]" - if self.shorthand - else f"{canonical_string(self.name)}" - ) + return canonical_string(self.name) def __eq__(self, __value: object) -> bool: return ( @@ -203,11 +199,7 @@ def __init__( self.shorthand = shorthand def __str__(self) -> str: - return ( - f"[{self.env.keys_selector_token}]" - if self.shorthand - else self.env.keys_selector_token - ) + return self.env.keys_selector_token def __eq__(self, __value: object) -> bool: return isinstance(__value, KeysSelector) and self.token == __value.token @@ -315,7 +307,7 @@ def __init__( self.shorthand = shorthand def __str__(self) -> str: - return "[*]" if self.shorthand else "*" + return "*" def __eq__(self, __value: object) -> bool: return isinstance(__value, WildSelector) and self.token == __value.token diff --git a/jsonpath/stream.py b/jsonpath/stream.py index 775c95f..93ddf93 100644 --- a/jsonpath/stream.py +++ b/jsonpath/stream.py @@ -16,7 +16,8 @@ class TokenStream: def __init__(self, token_iter: Iterable[Token]): self.tokens = list(token_iter) self.pos = 0 - self.eof = Token(TOKEN_EOF, "", -1, self.tokens[0].path) + path = self.tokens[0].path if self.tokens else "" + self.eof = Token(TOKEN_EOF, "", -1, path) def __str__(self) -> str: # pragma: no cover return f"current: {self.current}\nnext: {self.peek}" diff --git a/tests/test_env.py b/tests/test_env.py index 5908baa..51b91d8 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -1,4 +1,5 @@ """JSONPathEnvironment API test cases.""" + import asyncio from typing import List @@ -178,7 +179,7 @@ def test_custom_fake_root_identifier_token() -> None: """Test that we can change the non-standard fake root identifier.""" class MyJSONPathEnvironment(JSONPathEnvironment): - fake_root_token = "$$" + pseudo_root_token = "$$" env = MyJSONPathEnvironment() data = {"foo": {"a": 1, "b": 2, "c": 3}} @@ -191,7 +192,7 @@ def test_disable_fake_root_identifier() -> None: """Test that we can disable the non-standard fake root identifier.""" class MyJSONPathEnvironment(JSONPathEnvironment): - fake_root_token = "" + pseudo_root_token = "" env = MyJSONPathEnvironment() with pytest.raises(JSONPathSyntaxError): diff --git a/tests/test_errors.py b/tests/test_errors.py index 1d1f46a..8ff5913 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -15,7 +15,7 @@ def env() -> JSONPathEnvironment: def test_unclosed_selection_list(env: JSONPathEnvironment) -> None: - with pytest.raises(JSONPathSyntaxError, match=r"unexpected end of selector list"): + with pytest.raises(JSONPathSyntaxError, match=r"unexpected end of segment"): env.compile("$[1,2") @@ -39,6 +39,11 @@ def test_unbalanced_parens(env: JSONPathEnvironment) -> None: env.compile("$[?((@.foo)]") +def test_root_dot(env: JSONPathEnvironment) -> None: + with pytest.raises(JSONPathSyntaxError): + env.compile("$.") + + class FilterLiteralTestCase(NamedTuple): description: str query: str diff --git a/tests/test_filter_expression_caching.py b/tests/test_filter_expression_caching.py index 31534c2..0ba64ea 100644 --- a/tests/test_filter_expression_caching.py +++ b/tests/test_filter_expression_caching.py @@ -50,7 +50,7 @@ def test_root_path_cache() -> None: env = JSONPathEnvironment(filter_caching=True) data = {"some": [{"a": 1}, {"a": 99}, {"a": 2}, {"a": 3}]} with mock.patch( - "jsonpath.filter.RootPath.evaluate", return_value=10 + "jsonpath.filter.RootFilterQuery.evaluate", return_value=10 ) as mock_root_path: path = env.compile("$.some[?@.a < $.thing].a") rv = path.findall(data) @@ -63,7 +63,7 @@ def test_root_path_no_cache() -> None: env = JSONPathEnvironment(filter_caching=False) data = {"some": [{"a": 1}, {"a": 99}, {"a": 2}, {"a": 3}]} with mock.patch( - "jsonpath.filter.RootPath.evaluate", return_value=10 + "jsonpath.filter.RootFilterQuery.evaluate", return_value=10 ) as mock_root_path: path = env.compile("$.some[?@.a < $.thing].a") rv = path.findall(data) diff --git a/tests/test_parse.py b/tests/test_parse.py index 96949a1..8415b74 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -16,10 +16,8 @@ class Case: TEST_CASES = [ Case(description="empty", path="", want="$"), Case(description="just root", path="$", want="$"), - Case(description="root dot", path="$.", want="$"), Case(description="implicit root dot property", path=".thing", want="$['thing']"), Case(description="root dot property", path="$.thing", want="$['thing']"), - Case(description="root bracket property", path="$[thing]", want="$['thing']"), Case( description="root double quoted property", path='$["thing"]', want="$['thing']" ), @@ -31,40 +29,24 @@ class Case: path="$['anything{!%']", want="$['anything{!%']", ), - Case(description="root dot bracket property", path="$.[thing]", want="$['thing']"), Case(description="root bracket index", path="$[1]", want="$[1]"), Case(description="root slice", path="$[1:-1]", want="$[1:-1:1]"), - Case(description="root dot slice", path="$.[1:-1]", want="$[1:-1:1]"), Case(description="root slice with step", path="$[1:-1:2]", want="$[1:-1:2]"), Case(description="root slice with empty start", path="$[:-1]", want="$[:-1:1]"), Case(description="root slice with empty stop", path="$[1:]", want="$[1::1]"), Case(description="root dot wild", path="$.*", want="$[*]"), Case(description="root bracket wild", path="$[*]", want="$[*]"), - Case(description="root dot bracket wild", path="$.[*]", want="$[*]"), - Case(description="root descend", path="$..", want="$.."), - Case(description="root dot descend", path="$...", want="$.."), Case(description="root selector list", path="$[1,2]", want="$[1, 2]"), - Case(description="root dot selector list", path="$.[1,2]", want="$[1, 2]"), Case( description="root selector list with slice", path="$[1,5:-1:1]", want="$[1, 5:-1:1]", ), - Case( - description="root selector list with properties", - path="$[some,thing]", - want="$['some', 'thing']", - ), Case( description="root selector list with quoted properties", path="$[\"some\",'thing']", want="$['some', 'thing']", ), - Case( - description="implicit root selector list with mixed selectors", - path='$["some",thing, 1, 2:-2:2]', - want="$['some', 'thing', 1, 2:-2:2]", - ), Case( description="filter self dot property", path="[?(@.thing)]", From dd6bfc8e73802be2c20571d2ad1ed137484e731a Mon Sep 17 00:00:00 2001 From: James Prior Date: Sun, 10 Aug 2025 07:48:29 +0100 Subject: [PATCH 4/5] Remove shorthand arguments to Property, Wild and Keys selectors --- jsonpath/parse.py | 16 ++-------------- jsonpath/selectors.py | 27 +++++---------------------- 2 files changed, 7 insertions(+), 36 deletions(-) diff --git a/jsonpath/parse.py b/jsonpath/parse.py index 3e08c2a..499db46 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -356,7 +356,6 @@ def parse_selectors(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: env=self.env, token=token, name=token.value, - shorthand=True, ), ) @@ -365,7 +364,6 @@ def parse_selectors(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: WildSelector( env=self.env, token=token, - shorthand=True, ), ) @@ -374,7 +372,6 @@ def parse_selectors(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: KeysSelector( env=self.env, token=token, - shorthand=True, ), ) @@ -422,25 +419,16 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto env=self.env, token=token, name=self._decode_string_literal(token), - shorthand=False, ), ) stream.next() elif token.kind == TOKEN_COLON: selectors.append(self.parse_slice(stream)) elif token.kind == TOKEN_WILD: - selectors.append( - WildSelector( - env=self.env, - token=token, - shorthand=False, - ) - ) + selectors.append(WildSelector(env=self.env, token=token)) stream.next() elif token.kind == TOKEN_KEYS: - selectors.append( - KeysSelector(env=self.env, token=token, shorthand=False) - ) + selectors.append(KeysSelector(env=self.env, token=token)) stream.next() elif token.kind == TOKEN_FILTER: selectors.append(self.parse_filter_selector(stream)) diff --git a/jsonpath/selectors.py b/jsonpath/selectors.py index 362bc9f..e3db8c3 100644 --- a/jsonpath/selectors.py +++ b/jsonpath/selectors.py @@ -56,19 +56,11 @@ def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: class PropertySelector(JSONPathSelector): """A shorthand or bracketed property selector.""" - __slots__ = ("name", "shorthand") + __slots__ = ("name",) - def __init__( - self, - *, - env: JSONPathEnvironment, - token: Token, - name: str, - shorthand: bool, - ) -> None: + def __init__(self, *, env: JSONPathEnvironment, token: Token, name: str) -> None: super().__init__(env=env, token=token) self.name = name - self.shorthand = shorthand def __str__(self) -> str: return canonical_string(self.name) @@ -190,13 +182,10 @@ class KeysSelector(JSONPathSelector): NOTE: This is a non-standard selector. """ - __slots__ = ("shorthand",) + __slots__ = () - def __init__( - self, *, env: JSONPathEnvironment, token: Token, shorthand: bool - ) -> None: + def __init__(self, *, env: JSONPathEnvironment, token: Token) -> None: super().__init__(env=env, token=token) - self.shorthand = shorthand def __str__(self) -> str: return self.env.keys_selector_token @@ -298,13 +287,7 @@ async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatc class WildSelector(JSONPathSelector): """Select all items from a sequence/array or values from a mapping/object.""" - __slots__ = ("shorthand",) - - def __init__( - self, *, env: JSONPathEnvironment, token: Token, shorthand: bool - ) -> None: - super().__init__(env=env, token=token) - self.shorthand = shorthand + __slots__ = () def __str__(self) -> str: return "*" From 7cb83c346e1fbce3a2287566d57ccbc7fdff9109 Mon Sep 17 00:00:00 2001 From: James Prior Date: Mon, 11 Aug 2025 21:08:34 +0100 Subject: [PATCH 5/5] Add "key" and "keys filter" JSONPath selectors --- jsonpath/env.py | 3 + jsonpath/lex.py | 23 ++++++- jsonpath/parse.py | 41 +++++++++--- jsonpath/selectors.py | 143 +++++++++++++++++++++++++++++++++++++++++- jsonpath/token.py | 2 + tests/test_find.py | 6 ++ 6 files changed, 206 insertions(+), 12 deletions(-) diff --git a/jsonpath/env.py b/jsonpath/env.py index 7719fff..3c4d04d 100644 --- a/jsonpath/env.py +++ b/jsonpath/env.py @@ -102,6 +102,8 @@ class attributes `root_token`, `self_token` and `filter_context_token`. filtering a mapping or sequence. Defaults to `"#"`. keys_selector_token (str): The pattern used as the "keys" selector. Defaults to `"~"`. + keys_filter_token (str): The pattern used as the "keys filter" selector. + Defaults to `"~?"`. lexer_class: The lexer to use when tokenizing path strings. max_int_index (int): The maximum integer allowed when selecting array items by index. Defaults to `(2**53) - 1`. @@ -122,6 +124,7 @@ class attributes `root_token`, `self_token` and `filter_context_token`. intersection_token = "&" key_token = "#" keys_selector_token = "~" + keys_filter_token = "~?" root_token = "$" self_token = "@" union_token = "|" diff --git a/jsonpath/lex.py b/jsonpath/lex.py index 6583589..3c3dfb0 100644 --- a/jsonpath/lex.py +++ b/jsonpath/lex.py @@ -15,6 +15,7 @@ from .token import TOKEN_CONTAINS from .token import TOKEN_DDOT from .token import TOKEN_DOT +from .token import TOKEN_DOT_KEY_PROPERTY from .token import TOKEN_DOT_PROPERTY from .token import TOKEN_DOUBLE_QUOTE_STRING from .token import TOKEN_EQ @@ -30,6 +31,7 @@ from .token import TOKEN_INT from .token import TOKEN_INTERSECTION from .token import TOKEN_KEY +from .token import TOKEN_KEY_NAME from .token import TOKEN_KEYS from .token import TOKEN_KEYS_FILTER from .token import TOKEN_LBRACKET @@ -103,6 +105,13 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: # .thing self.dot_property_pattern = rf"(?P\.)(?P{self.key_pattern})" + # .~thing + self.dot_key_pattern = ( + r"(?P\.)" + rf"(?P{re.escape(env.keys_selector_token)})" + rf"(?P{self.key_pattern})" + ) + # /pattern/ or /pattern/flags self.re_pattern = r"/(?P.+?)/(?P[aims]*)" @@ -122,12 +131,14 @@ def compile_rules(self) -> Pattern[str]: (TOKEN_INTERSECTION, self.env.intersection_token), (TOKEN_FILTER_CONTEXT, self.env.filter_context_token), (TOKEN_KEYS, self.env.keys_selector_token), + (TOKEN_KEYS_FILTER, self.env.keys_filter_token), ] rules = [ (TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern), (TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern), (TOKEN_RE_PATTERN, self.re_pattern), + (TOKEN_DOT_KEY_PROPERTY, self.dot_key_pattern), (TOKEN_DOT_PROPERTY, self.dot_property_pattern), (TOKEN_FLOAT, r"-?\d+\.\d*(?:[eE][+-]?\d+)?"), (TOKEN_INT, r"-?\d+(?P[eE][+\-]?\d+)?\b"), @@ -144,7 +155,6 @@ def compile_rules(self) -> Pattern[str]: ], (TOKEN_WILD, r"\*"), (TOKEN_FILTER, r"\?"), - (TOKEN_KEYS_FILTER, r"~\?"), # TODO: get from env (TOKEN_IN, r"in\b"), (TOKEN_TRUE, r"[Tt]rue\b"), (TOKEN_FALSE, r"[Ff]alse\b"), @@ -199,6 +209,17 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912 value=match.group("G_PROP"), index=match.start("G_PROP"), ) + elif kind == TOKEN_DOT_KEY_PROPERTY: + yield _token( + kind=TOKEN_DOT, + value=match.group("G_DOT_KEY"), + index=match.start("G_DOT_KEY"), + ) + yield _token( + kind=TOKEN_KEY_NAME, + value=match.group("G_PROP_KEY"), + index=match.start("G_PROP_KEY"), + ) elif kind == TOKEN_DOUBLE_QUOTE_STRING: yield _token( kind=TOKEN_DOUBLE_QUOTE_STRING, diff --git a/jsonpath/parse.py b/jsonpath/parse.py index 499db46..a0c6d97 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -11,6 +11,7 @@ from typing import Iterator from typing import List from typing import Optional +from typing import Union from jsonpath.function_extensions.filter_function import ExpressionType from jsonpath.function_extensions.filter_function import FilterFunction @@ -45,6 +46,8 @@ from .selectors import Filter from .selectors import IndexSelector from .selectors import JSONPathSelector +from .selectors import KeySelector +from .selectors import KeysFilter from .selectors import KeysSelector from .selectors import PropertySelector from .selectors import SliceSelector @@ -69,7 +72,9 @@ from .token import TOKEN_INT from .token import TOKEN_INTERSECTION from .token import TOKEN_KEY +from .token import TOKEN_KEY_NAME from .token import TOKEN_KEYS +from .token import TOKEN_KEYS_FILTER from .token import TOKEN_LBRACKET from .token import TOKEN_LE from .token import TOKEN_LG @@ -314,15 +319,15 @@ def parse_path(self, stream: TokenStream) -> Iterable[JSONPathSegment]: if _token.kind == TOKEN_DOT: stream.eat(TOKEN_DOT) # Assert that dot is followed by shorthand selector without whitespace. - stream.expect(TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS) + stream.expect(TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS, TOKEN_KEY_NAME) token = stream.current() - selectors = self.parse_selectors(stream) + selectors = self.parse_selector(stream) yield JSONPathChildSegment( env=self.env, token=token, selectors=selectors ) elif _token.kind == TOKEN_DDOT: token = stream.eat(TOKEN_DDOT) - selectors = self.parse_selectors(stream) + selectors = self.parse_selector(stream) if not selectors: raise JSONPathSyntaxError( "missing selector for recursive descent segment", @@ -332,22 +337,22 @@ def parse_path(self, stream: TokenStream) -> Iterable[JSONPathSegment]: env=self.env, token=token, selectors=selectors ) elif _token.kind == TOKEN_LBRACKET: - selectors = self.parse_selectors(stream) + selectors = self.parse_selector(stream) yield JSONPathChildSegment( env=self.env, token=_token, selectors=selectors ) - elif _token.kind in {TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS}: + elif _token.kind in {TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS, TOKEN_KEY_NAME}: # A non-standard "bare" path. One without a leading identifier (`$`, # `@`, `^` or `_`). token = stream.current() - selectors = self.parse_selectors(stream) + selectors = self.parse_selector(stream) yield JSONPathChildSegment( env=self.env, token=token, selectors=selectors ) else: break - def parse_selectors(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: + def parse_selector(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: token = stream.next() if token.kind == TOKEN_NAME: @@ -359,6 +364,15 @@ def parse_selectors(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: ), ) + if token.kind == TOKEN_KEY_NAME: + return ( + KeySelector( + env=self.env, + token=token, + key=token.value, + ), + ) + if token.kind == TOKEN_WILD: return ( WildSelector( @@ -432,6 +446,8 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto stream.next() elif token.kind == TOKEN_FILTER: selectors.append(self.parse_filter_selector(stream)) + elif token.kind == TOKEN_KEYS_FILTER: + selectors.append(self.parse_filter_selector(stream, keys=True)) elif token.kind == TOKEN_EOF: raise JSONPathSyntaxError("unexpected end of query", token=token) else: @@ -514,8 +530,10 @@ def _maybe_index(token: Token) -> bool: step=step, ) - def parse_filter_selector(self, stream: TokenStream) -> Filter: - token = stream.eat(TOKEN_FILTER) + def parse_filter_selector( + self, stream: TokenStream, *, keys: bool = False + ) -> Union[Filter, KeysFilter]: + token = stream.next() expr = self.parse_filter_expression(stream) if self.env.well_typed and isinstance(expr, FunctionExtension): @@ -536,6 +554,11 @@ def parse_filter_selector(self, stream: TokenStream) -> Filter: token=token, ) + if keys: + return KeysFilter( + env=self.env, token=token, expression=BooleanExpression(expr) + ) + return Filter(env=self.env, token=token, expression=BooleanExpression(expr)) def parse_boolean(self, stream: TokenStream) -> FilterExpression: diff --git a/jsonpath/selectors.py b/jsonpath/selectors.py index e3db8c3..04ee4ae 100644 --- a/jsonpath/selectors.py +++ b/jsonpath/selectors.py @@ -176,10 +176,57 @@ async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatc yield match +class KeySelector(JSONPathSelector): + """Select a single mapping/object name/key. + + NOTE: This is a non-standard selector. + + See https://jg-rp.github.io/json-p3/guides/jsonpath-extra#key-selector. + """ + + __slots__ = ("key",) + + def __init__(self, *, env: JSONPathEnvironment, token: Token, key: str) -> None: + super().__init__(env=env, token=token) + self.key = key + + def __str__(self) -> str: + return f"{self.env.keys_selector_token}{canonical_string(self.key)}" + + def __eq__(self, __value: object) -> bool: + return ( + isinstance(__value, KeySelector) + and self.token == __value.token + and self.key == __value.key + ) + + def __hash__(self) -> int: + return hash((self.token, self.key)) + + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if isinstance(node.obj, Mapping) and self.key in node.obj: + match = node.__class__( + filter_context=node.filter_context(), + obj=self.key, + parent=node, + parts=node.parts + (f"{self.env.keys_selector_token}{self.key}",), + path=f"{node.path}[{self}]", + root=node.root, + ) + node.add_child(match) + yield match + + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + for _node in self.resolve(node): + yield _node + + class KeysSelector(JSONPathSelector): """Select mapping/object keys/properties. NOTE: This is a non-standard selector. + + See https://jg-rp.github.io/json-p3/guides/jsonpath-extra#keys-selector """ __slots__ = () @@ -198,13 +245,13 @@ def __hash__(self) -> int: def _keys(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: if isinstance(node.obj, Mapping): - for i, key in enumerate(node.obj.keys()): + for key in node.obj: match = node.__class__( filter_context=node.filter_context(), obj=key, parent=node, parts=node.parts + (f"{self.env.keys_selector_token}{key}",), - path=f"{node.path}[{self.env.keys_selector_token}][{i}]", + path=f"{node.path}[{self.env.keys_selector_token}{canonical_string(key)}]", root=node.root, ) node.add_child(match) @@ -449,6 +496,98 @@ async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatc yield match +class KeysFilter(JSONPathSelector): + """Selects names from an object’s name/value members. + + NOTE: This is a non-standard selector. + + See https://jg-rp.github.io/json-p3/guides/jsonpath-extra#keys-filter-selector + """ + + __slots__ = ("expression",) + + def __init__( + self, + *, + env: JSONPathEnvironment, + token: Token, + expression: BooleanExpression, + ) -> None: + super().__init__(env=env, token=token) + self.expression = expression + + def __str__(self) -> str: + return f"~?{self.expression}" + + def __eq__(self, __value: object) -> bool: + return ( + isinstance(__value, Filter) + and self.expression == __value.expression + and self.token == __value.token + ) + + def __hash__(self) -> int: + return hash(("~", str(self.expression), self.token)) + + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if isinstance(node.value, Mapping): + for key, val in node.value.items(): + context = FilterContext( + env=self.env, + current=val, + root=node.root, + extra_context=node.filter_context(), + current_key=key, + ) + + try: + if self.expression.evaluate(context): + match = node.__class__( + filter_context=node.filter_context(), + obj=key, + parent=node, + parts=node.parts + + (f"{self.env.keys_selector_token}{key}",), + path=f"{node.path}[{self.env.keys_selector_token}{canonical_string(key)}]", + root=node.root, + ) + node.add_child(match) + yield match + except JSONPathTypeError as err: + if not err.token: + err.token = self.token + raise + + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + if isinstance(node.value, Mapping): + for key, val in node.value.items(): + context = FilterContext( + env=self.env, + current=val, + root=node.root, + extra_context=node.filter_context(), + current_key=key, + ) + + try: + if await self.expression.evaluate_async(context): + match = node.__class__( + filter_context=node.filter_context(), + obj=key, + parent=node, + parts=node.parts + + (f"{self.env.keys_selector_token}{key}",), + path=f"{node.path}[{self.env.keys_selector_token}{canonical_string(key)}]", + root=node.root, + ) + node.add_child(match) + yield match + except JSONPathTypeError as err: + if not err.token: + err.token = self.token + raise + + class FilterContext: """Contextual information and data for evaluating a filter expression.""" diff --git a/jsonpath/token.py b/jsonpath/token.py index 6650b9c..e9d39e0 100644 --- a/jsonpath/token.py +++ b/jsonpath/token.py @@ -24,6 +24,8 @@ TOKEN_WILD = sys.intern("TOKEN_WILD") TOKEN_NAME = sys.intern("TOKEN_NAME") TOKEN_DOT_PROPERTY = sys.intern("TOKEN_DOT_PROPERTY") +TOKEN_DOT_KEY_PROPERTY = sys.intern("TOKEN_DOT_KEY_PROPERTY") +TOKEN_KEY_NAME = sys.intern("TOKEN_KEY_NAME") # Filter expression tokens TOKEN_AND = sys.intern("TOKEN_AND") diff --git a/tests/test_find.py b/tests/test_find.py index ebab04d..7c60684 100644 --- a/tests/test_find.py +++ b/tests/test_find.py @@ -59,6 +59,12 @@ class Case: }, want=[{"foo": 1}, {"foo": 2}], ), + Case( + description="filter current key, array data", + path="$.abc[?(# >= 1)]", + data={"abc": [1, 2, 3], "def": [4, 5], "abx": [6], "aby": []}, + want=[2, 3], + ), Case( description="select root value using pseudo root", path="^[?@.some.thing > 7]",