From d3e3aa0336807436397bc7fd28a07aa2ace9d578 Mon Sep 17 00:00:00 2001 From: colinthebomb1 Date: Thu, 23 Oct 2025 20:24:06 -0700 Subject: [PATCH 01/10] imrpove ast --- core/ast/__init__.py | 1 + core/ast/node.py | 20 ++++++++++++++------ core/ast/node_type.py | 1 + 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/core/ast/__init__.py b/core/ast/__init__.py index 9d54b22..b9c822b 100644 --- a/core/ast/__init__.py +++ b/core/ast/__init__.py @@ -43,6 +43,7 @@ 'GroupByNode', 'HavingNode', 'OrderByNode', + 'OrderByItemNode', 'LimitNode', 'OffsetNode', 'QueryNode' diff --git a/core/ast/node.py b/core/ast/node.py index d27584b..bc2627a 100644 --- a/core/ast/node.py +++ b/core/ast/node.py @@ -76,11 +76,12 @@ def __init__(self, _left: Node, _name: str, _right: Optional[Node] = None, **kwa class FunctionNode(Node): """Function call node""" - def __init__(self, _name: str, _args: Optional[List[Node]] = None, **kwargs): + def __init__(self, _name: str, _args: Optional[List[Node]] = None, _alias: Optional[str] = None, **kwargs): if _args is None: _args = [] super().__init__(NodeType.FUNCTION, children=_args, **kwargs) self.name = _name + self.alias = _alias # ============================================================================ @@ -89,20 +90,20 @@ def __init__(self, _name: str, _args: Optional[List[Node]] = None, **kwargs): class SelectNode(Node): """SELECT clause node""" - def __init__(self, _items: Set['Node'], **kwargs): + def __init__(self, _items: List['Node'], **kwargs): super().__init__(NodeType.SELECT, children=_items, **kwargs) # TODO - confine the valid NodeTypes as children of FromNode class FromNode(Node): """FROM clause node""" - def __init__(self, _sources: Set['Node'], **kwargs): + def __init__(self, _sources: List['Node'], **kwargs): super().__init__(NodeType.FROM, children=_sources, **kwargs) class WhereNode(Node): """WHERE clause node""" - def __init__(self, _predicates: Set['Node'], **kwargs): + def __init__(self, _predicates: List['Node'], **kwargs): super().__init__(NodeType.WHERE, children=_predicates, **kwargs) @@ -114,14 +115,21 @@ def __init__(self, _items: List['Node'], **kwargs): class HavingNode(Node): """HAVING clause node""" - def __init__(self, _predicates: Set['Node'], **kwargs): + def __init__(self, _predicates: List['Node'], **kwargs): super().__init__(NodeType.HAVING, children=_predicates, **kwargs) +class OrderByItemNode(Node): + """Single ORDER BY item with sort direction""" + def __init__(self, _column: Node, _sort: str = 'asc', **kwargs): + super().__init__(NodeType.ORDER_BY_ITEM, children=[_column], **kwargs) + self.sort = _sort + class OrderByNode(Node): """ORDER BY clause node""" - def __init__(self, _items: List['Node'], **kwargs): + def __init__(self, _items: List['Node'], _sort: Optional[str] = None, **kwargs): super().__init__(NodeType.ORDER_BY, children=_items, **kwargs) + self.sort = _sort class LimitNode(Node): diff --git a/core/ast/node_type.py b/core/ast/node_type.py index 2bae729..3699a8e 100644 --- a/core/ast/node_type.py +++ b/core/ast/node_type.py @@ -27,6 +27,7 @@ class NodeType(Enum): GROUP_BY = "group_by" HAVING = "having" ORDER_BY = "order_by" + ORDER_BY_ITEM = "order_by_item" LIMIT = "limit" OFFSET = "offset" QUERY = "query" From fe119500f3b4a9f63e17490e4075bba3d1539049 Mon Sep 17 00:00:00 2001 From: colinthebomb1 Date: Thu, 23 Oct 2025 22:29:14 -0700 Subject: [PATCH 02/10] fix documentation --- core/ast/node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/ast/node.py b/core/ast/node.py index bc2627a..af49643 100644 --- a/core/ast/node.py +++ b/core/ast/node.py @@ -120,7 +120,7 @@ def __init__(self, _predicates: List['Node'], **kwargs): class OrderByItemNode(Node): - """Single ORDER BY item with sort direction""" + """Single ORDER BY item""" def __init__(self, _column: Node, _sort: str = 'asc', **kwargs): super().__init__(NodeType.ORDER_BY_ITEM, children=[_column], **kwargs) self.sort = _sort From a01e507bbfa5b348ce7cc4511b40d9e52b63e38c Mon Sep 17 00:00:00 2001 From: Yihong Yu <116992300+HazelYuAhiru@users.noreply.github.com> Date: Mon, 10 Nov 2025 11:14:09 -0500 Subject: [PATCH 03/10] add node changes from parser --- core/ast/node.py | 115 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 1 deletion(-) diff --git a/core/ast/node.py b/core/ast/node.py index af49643..717b9ab 100644 --- a/core/ast/node.py +++ b/core/ast/node.py @@ -13,6 +13,31 @@ class Node(ABC): def __init__(self, type: NodeType, children: Optional[Set['Node']|List['Node']] = None): self.type = type self.children = children if children is not None else set() + + def __eq__(self, other): + if not isinstance(other, Node): + return False + if self.type != other.type: + return False + if len(self.children) != len(other.children): + return False + # Compare children + if isinstance(self.children, set) and isinstance(other.children, set): + return self.children == other.children + elif isinstance(self.children, list) and isinstance(other.children, list): + return self.children == other.children + else: + return False + + def __hash__(self): + # Make nodes hashable by using their type and a hash of their children + if isinstance(self.children, set): + # For sets, create a deterministic hash by sorting children by their string representation + children_hash = hash(tuple(sorted(self.children, key=lambda x: str(x)))) + else: + # For lists, just hash the tuple directly + children_hash = hash(tuple(self.children)) + return hash((self.type, children_hash)) # ============================================================================ @@ -25,6 +50,16 @@ def __init__(self, _name: str, _alias: Optional[str] = None, **kwargs): super().__init__(NodeType.TABLE, **kwargs) self.name = _name self.alias = _alias + + def __eq__(self, other): + if not isinstance(other, TableNode): + return False + return (super().__eq__(other) and + self.name == other.name and + self.alias == other.alias) + + def __hash__(self): + return hash((super().__hash__(), self.name, self.alias)) # TODO - including query structure arguments (similar to QueryNode) in constructor. @@ -43,6 +78,17 @@ def __init__(self, _name: str, _alias: Optional[str] = None, _parent_alias: Opti self.alias = _alias self.parent_alias = _parent_alias self.parent = _parent + + def __eq__(self, other): + if not isinstance(other, ColumnNode): + return False + return (super().__eq__(other) and + self.name == other.name and + self.alias == other.alias and + self.parent_alias == other.parent_alias) + + def __hash__(self): + return hash((super().__hash__(), self.name, self.alias, self.parent_alias)) class LiteralNode(Node): @@ -51,6 +97,15 @@ def __init__(self, _value: str|int|float|bool|datetime|None, **kwargs): super().__init__(NodeType.LITERAL, **kwargs) self.value = _value + def __eq__(self, other): + if not isinstance(other, LiteralNode): + return False + return (super().__eq__(other) and + self.value == other.value) + + def __hash__(self): + return hash((super().__hash__(), self.value)) + class VarNode(Node): """VarSQL variable node""" @@ -72,6 +127,15 @@ def __init__(self, _left: Node, _name: str, _right: Optional[Node] = None, **kwa children = [_left, _right] if _right else [_left] super().__init__(NodeType.OPERATOR, children=children, **kwargs) self.name = _name + + def __eq__(self, other): + if not isinstance(other, OperatorNode): + return False + return (super().__eq__(other) and + self.name == other.name) + + def __hash__(self): + return hash((super().__hash__(), self.name)) class FunctionNode(Node): @@ -82,7 +146,38 @@ def __init__(self, _name: str, _args: Optional[List[Node]] = None, _alias: Optio super().__init__(NodeType.FUNCTION, children=_args, **kwargs) self.name = _name self.alias = _alias - + + def __eq__(self, other): + if not isinstance(other, FunctionNode): + return False + return (super().__eq__(other) and + self.name == other.name and + self.alias == other.alias) + + def __hash__(self): + return hash((super().__hash__(), self.name, self.alias)) + + +class JoinNode(Node): + """JOIN clause node""" + def __init__(self, _left_table: 'TableNode', _right_table: 'TableNode', _join_type: str = "INNER", _on_condition: Optional['Node'] = None, **kwargs): + children = [_left_table, _right_table] + if _on_condition: + children.append(_on_condition) + super().__init__(NodeType.JOIN, children=children, **kwargs) + self.left_table = _left_table + self.right_table = _right_table + self.join_type = _join_type + self.on_condition = _on_condition + + def __eq__(self, other): + if not isinstance(other, JoinNode): + return False + return (super().__eq__(other) and + self.join_type == other.join_type) + + def __hash__(self): + return hash((super().__hash__(), self.join_type)) # ============================================================================ # Query Structure Nodes @@ -137,6 +232,15 @@ class LimitNode(Node): def __init__(self, _limit: int, **kwargs): super().__init__(NodeType.LIMIT, **kwargs) self.limit = _limit + + def __eq__(self, other): + if not isinstance(other, LimitNode): + return False + return (super().__eq__(other) and + self.limit == other.limit) + + def __hash__(self): + return hash((super().__hash__(), self.limit)) class OffsetNode(Node): @@ -144,6 +248,15 @@ class OffsetNode(Node): def __init__(self, _offset: int, **kwargs): super().__init__(NodeType.OFFSET, **kwargs) self.offset = _offset + + def __eq__(self, other): + if not isinstance(other, OffsetNode): + return False + return (super().__eq__(other) and + self.offset == other.offset) + + def __hash__(self): + return hash((super().__hash__(), self.offset)) class QueryNode(Node): From 214fd762de7472c8b435fd207ec2518044c35e2a Mon Sep 17 00:00:00 2001 From: Yihong Yu <116992300+HazelYuAhiru@users.noreply.github.com> Date: Tue, 11 Nov 2025 20:35:13 -0500 Subject: [PATCH 04/10] update node_type for join --- core/ast/__init__.py | 2 ++ core/ast/node_type.py | 1 + 2 files changed, 3 insertions(+) diff --git a/core/ast/__init__.py b/core/ast/__init__.py index b9c822b..abc61d7 100644 --- a/core/ast/__init__.py +++ b/core/ast/__init__.py @@ -18,6 +18,7 @@ SelectNode, FromNode, WhereNode, + JoinNode, GroupByNode, HavingNode, OrderByNode, @@ -40,6 +41,7 @@ 'SelectNode', 'FromNode', 'WhereNode', + 'JoinNode', 'GroupByNode', 'HavingNode', 'OrderByNode', diff --git a/core/ast/node_type.py b/core/ast/node_type.py index 3699a8e..7f36858 100644 --- a/core/ast/node_type.py +++ b/core/ast/node_type.py @@ -24,6 +24,7 @@ class NodeType(Enum): SELECT = "select" FROM = "from" WHERE = "where" + JOIN = "join" GROUP_BY = "group_by" HAVING = "having" ORDER_BY = "order_by" From 3b32dab61d9c20e786acf2349b2b7431e1e0b70e Mon Sep 17 00:00:00 2001 From: Colin Harrison Date: Wed, 12 Nov 2025 13:47:07 -0800 Subject: [PATCH 05/10] add sort order enum --- core/ast/node.py | 5 +++-- core/ast/sort_order.py | 6 ++++++ 2 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 core/ast/sort_order.py diff --git a/core/ast/node.py b/core/ast/node.py index 717b9ab..dd84a8a 100644 --- a/core/ast/node.py +++ b/core/ast/node.py @@ -3,6 +3,7 @@ from abc import ABC from .node_type import NodeType +from .sort_order import SortOrder # ============================================================================ # Base Node Structure @@ -216,13 +217,13 @@ def __init__(self, _predicates: List['Node'], **kwargs): class OrderByItemNode(Node): """Single ORDER BY item""" - def __init__(self, _column: Node, _sort: str = 'asc', **kwargs): + def __init__(self, _column: Node, _sort: SortOrder = SortOrder.ASC, **kwargs): super().__init__(NodeType.ORDER_BY_ITEM, children=[_column], **kwargs) self.sort = _sort class OrderByNode(Node): """ORDER BY clause node""" - def __init__(self, _items: List['Node'], _sort: Optional[str] = None, **kwargs): + def __init__(self, _items: List['Node'], _sort: Optional[SortOrder] = None, **kwargs): super().__init__(NodeType.ORDER_BY, children=_items, **kwargs) self.sort = _sort diff --git a/core/ast/sort_order.py b/core/ast/sort_order.py new file mode 100644 index 0000000..a2b49ab --- /dev/null +++ b/core/ast/sort_order.py @@ -0,0 +1,6 @@ +from enum import Enum + +class SortOrder(Enum): + """Sort order enum""" + ASC = "ASC" + DESC = "DESC" \ No newline at end of file From cde53cea7d724a650a253b9750790f8cee34731b Mon Sep 17 00:00:00 2001 From: Colin Harrison Date: Wed, 12 Nov 2025 14:05:23 -0800 Subject: [PATCH 06/10] add __eq__ and __hash__ for OrderByItemNode and OrderByNode --- core/ast/node.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/core/ast/node.py b/core/ast/node.py index dd84a8a..ec6fe80 100644 --- a/core/ast/node.py +++ b/core/ast/node.py @@ -221,12 +221,29 @@ def __init__(self, _column: Node, _sort: SortOrder = SortOrder.ASC, **kwargs): super().__init__(NodeType.ORDER_BY_ITEM, children=[_column], **kwargs) self.sort = _sort + def __eq__(self, other): + if not isinstance(other, OrderByItemNode): + return False + return (super().__eq__(other) and + self.sort == other.sort) + + def __hash__(self): + return hash((super().__hash__(), self.sort)) + class OrderByNode(Node): """ORDER BY clause node""" def __init__(self, _items: List['Node'], _sort: Optional[SortOrder] = None, **kwargs): super().__init__(NodeType.ORDER_BY, children=_items, **kwargs) self.sort = _sort + def __eq__(self, other): + if not isinstance(other, OrderByNode): + return False + return (super().__eq__(other) and + self.sort == other.sort) + + def __hash__(self): + return hash((super().__hash__(), self.sort)) class LimitNode(Node): """LIMIT clause node""" From 7ca1f2935d3dacba4c22a9a32c80a5055268b3b0 Mon Sep 17 00:00:00 2001 From: Yihong Yu <116992300+HazelYuAhiru@users.noreply.github.com> Date: Wed, 12 Nov 2025 18:01:30 -0500 Subject: [PATCH 07/10] add enum to join type --- core/ast/node.py | 4 ++-- core/ast/node_type.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/core/ast/node.py b/core/ast/node.py index ec6fe80..1b4ae0f 100644 --- a/core/ast/node.py +++ b/core/ast/node.py @@ -2,7 +2,7 @@ from typing import List, Set, Optional from abc import ABC -from .node_type import NodeType +from .node_type import NodeType, JoinType from .sort_order import SortOrder # ============================================================================ @@ -161,7 +161,7 @@ def __hash__(self): class JoinNode(Node): """JOIN clause node""" - def __init__(self, _left_table: 'TableNode', _right_table: 'TableNode', _join_type: str = "INNER", _on_condition: Optional['Node'] = None, **kwargs): + def __init__(self, _left_table: 'TableNode', _right_table: 'TableNode', _join_type: JoinType = JoinType.INNER, _on_condition: Optional['Node'] = None, **kwargs): children = [_left_table, _right_table] if _on_condition: children.append(_on_condition) diff --git a/core/ast/node_type.py b/core/ast/node_type.py index 7f36858..e8f9610 100644 --- a/core/ast/node_type.py +++ b/core/ast/node_type.py @@ -32,3 +32,19 @@ class NodeType(Enum): LIMIT = "limit" OFFSET = "offset" QUERY = "query" + +# ============================================================================ +# Join Type Enumeration +# ============================================================================ + +class JoinType(Enum): + """Join type enumeration""" + INNER = "inner" + OUTER = "outer" + LEFT = "left" + RIGHT = "right" + FULL = "full" + CROSS = "cross" + NATURAL = "natural" + SEMI = "semi" + ANTI = "anti" From b9c53cea5c04d1f58b895957e08c893b6ce554f3 Mon Sep 17 00:00:00 2001 From: Colin Harrison Date: Tue, 18 Nov 2025 12:38:20 -0800 Subject: [PATCH 08/10] combine enum files --- core/ast/__init__.py | 2 +- core/ast/{node_type.py => enums.py} | 110 +++++++++++++++------------- core/ast/node.py | 3 +- core/ast/sort_order.py | 6 -- 4 files changed, 62 insertions(+), 59 deletions(-) rename core/ast/{node_type.py => enums.py} (81%) delete mode 100644 core/ast/sort_order.py diff --git a/core/ast/__init__.py b/core/ast/__init__.py index abc61d7..804646a 100644 --- a/core/ast/__init__.py +++ b/core/ast/__init__.py @@ -4,7 +4,7 @@ This module provides the node types and classes for representing SQL query structures. """ -from .node_type import NodeType +from .enums import NodeType from .node import ( Node, TableNode, diff --git a/core/ast/node_type.py b/core/ast/enums.py similarity index 81% rename from core/ast/node_type.py rename to core/ast/enums.py index e8f9610..4a07d54 100644 --- a/core/ast/node_type.py +++ b/core/ast/enums.py @@ -1,50 +1,60 @@ -from enum import Enum - -# ============================================================================ -# Node Type Enumeration -# ============================================================================ - -class NodeType(Enum): - """Node type enumeration""" - - # Operands - TABLE = "table" - SUBQUERY = "subquery" - COLUMN = "column" - LITERAL = "literal" - # VarSQL specific - VAR = "var" - VARSET = "varset" - - # Operators - OPERATOR = "operator" - FUNCTION = "function" - - # Query structure - SELECT = "select" - FROM = "from" - WHERE = "where" - JOIN = "join" - GROUP_BY = "group_by" - HAVING = "having" - ORDER_BY = "order_by" - ORDER_BY_ITEM = "order_by_item" - LIMIT = "limit" - OFFSET = "offset" - QUERY = "query" - -# ============================================================================ -# Join Type Enumeration -# ============================================================================ - -class JoinType(Enum): - """Join type enumeration""" - INNER = "inner" - OUTER = "outer" - LEFT = "left" - RIGHT = "right" - FULL = "full" - CROSS = "cross" - NATURAL = "natural" - SEMI = "semi" - ANTI = "anti" +from enum import Enum + +# ============================================================================ +# Node Type Enumeration +# ============================================================================ + +class NodeType(Enum): + """Node type enumeration""" + + # Operands + TABLE = "table" + SUBQUERY = "subquery" + COLUMN = "column" + LITERAL = "literal" + # VarSQL specific + VAR = "var" + VARSET = "varset" + + # Operators + OPERATOR = "operator" + FUNCTION = "function" + + # Query structure + SELECT = "select" + FROM = "from" + WHERE = "where" + JOIN = "join" + GROUP_BY = "group_by" + HAVING = "having" + ORDER_BY = "order_by" + ORDER_BY_ITEM = "order_by_item" + LIMIT = "limit" + OFFSET = "offset" + QUERY = "query" + +# ============================================================================ +# Join Type Enumeration +# ============================================================================ + +class JoinType(Enum): + """Join type enumeration""" + INNER = "inner" + OUTER = "outer" + LEFT = "left" + RIGHT = "right" + FULL = "full" + CROSS = "cross" + NATURAL = "natural" + SEMI = "semi" + ANTI = "anti" + + +# ============================================================================ +# Sort Order Enumeration +# ============================================================================ + +class SortOrder(Enum): + """Sort order enum""" + ASC = "ASC" + DESC = "DESC" \ No newline at end of file diff --git a/core/ast/node.py b/core/ast/node.py index 1b4ae0f..44f622d 100644 --- a/core/ast/node.py +++ b/core/ast/node.py @@ -2,8 +2,7 @@ from typing import List, Set, Optional from abc import ABC -from .node_type import NodeType, JoinType -from .sort_order import SortOrder +from .enums import NodeType, JoinType, SortOrder # ============================================================================ # Base Node Structure diff --git a/core/ast/sort_order.py b/core/ast/sort_order.py deleted file mode 100644 index a2b49ab..0000000 --- a/core/ast/sort_order.py +++ /dev/null @@ -1,6 +0,0 @@ -from enum import Enum - -class SortOrder(Enum): - """Sort order enum""" - ASC = "ASC" - DESC = "DESC" \ No newline at end of file From 2aae5bd7b7d41485501dc9a770a912d5622f2683 Mon Sep 17 00:00:00 2001 From: Colin Harrison Date: Tue, 18 Nov 2025 12:42:38 -0800 Subject: [PATCH 09/10] simplify OrderByNode --- core/ast/node.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/core/ast/node.py b/core/ast/node.py index 44f622d..befe799 100644 --- a/core/ast/node.py +++ b/core/ast/node.py @@ -231,18 +231,9 @@ def __hash__(self): class OrderByNode(Node): """ORDER BY clause node""" - def __init__(self, _items: List['Node'], _sort: Optional[SortOrder] = None, **kwargs): + def __init__(self, _items: List[OrderByItemNode], **kwargs): super().__init__(NodeType.ORDER_BY, children=_items, **kwargs) - self.sort = _sort - def __eq__(self, other): - if not isinstance(other, OrderByNode): - return False - return (super().__eq__(other) and - self.sort == other.sort) - - def __hash__(self): - return hash((super().__hash__(), self.sort)) class LimitNode(Node): """LIMIT clause node""" From 3f65188232a7c6bff97e94745a8451283d097f84 Mon Sep 17 00:00:00 2001 From: Colin Harrison Date: Tue, 18 Nov 2025 13:52:46 -0800 Subject: [PATCH 10/10] make tests use new enums file --- tests/test_query_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_query_parser.py b/tests/test_query_parser.py index 8b176f9..c3e7b61 100644 --- a/tests/test_query_parser.py +++ b/tests/test_query_parser.py @@ -5,7 +5,7 @@ LiteralNode, OperatorNode, FunctionNode, GroupByNode, HavingNode, OrderByNode, LimitNode, OffsetNode, SubqueryNode, VarNode, VarSetNode ) -from core.ast.node_type import NodeType +from core.ast.enums import NodeType, JoinType, SortOrder from data.queries import get_query parser = QueryParser()