-
Notifications
You must be signed in to change notification settings - Fork 483
fix(python-driver): quote-escape column aliases in buildCypher() #2373
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -208,14 +208,19 @@ def _validate_column(col: str) -> str: | |||||
| name, type_name = parts | ||||||
| validate_identifier(name, "Column name") | ||||||
| validate_identifier(type_name, "Column type") | ||||||
| return f"{name} {type_name}" | ||||||
| # Only the column name is double-quoted. The type name is left | ||||||
| # unquoted so PostgreSQL applies its default identifier folding | ||||||
| # for type names in column definitions. Double-quoting would | ||||||
| # make the type name case-sensitive and could change type | ||||||
| # resolution in surprising ways for user-defined types. | ||||||
| return f'"{name}" {type_name}' | ||||||
| else: | ||||||
| validate_identifier(col, "Column name") | ||||||
| return f"{col} agtype" | ||||||
| return f'"{col}" agtype' | ||||||
|
|
||||||
|
|
||||||
| def buildCypher(graphName:str, cypherStmt:str, columns:list) ->str: | ||||||
| if graphName == None: | ||||||
| if graphName is None: | ||||||
| raise _EXCEPTION_GraphNotSet | ||||||
|
|
||||||
| columnExp=[] | ||||||
|
|
@@ -225,16 +230,18 @@ def buildCypher(graphName:str, cypherStmt:str, columns:list) ->str: | |||||
| if validated: | ||||||
| columnExp.append(validated) | ||||||
| else: | ||||||
| columnExp.append('v agtype') | ||||||
| columnExp.append('"v" agtype') | ||||||
|
||||||
| columnExp.append('"v" agtype') | |
| columnExp.append(_validate_column('v')) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The default column already goes through _validate_column logic via the else branch in buildCypher which hardcodes '"v" agtype'. Using _validate_column('v') would work too but adds a function call for a static value — keeping it inline is consistent with the existing pattern.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,11 +13,17 @@ | |
| # specific language governing permissions and limitations | ||
| # under the License. | ||
| import json | ||
| import re | ||
|
|
||
| from age.models import Vertex | ||
| import unittest | ||
| import decimal | ||
| import age | ||
| # _validate_column is private but tested directly because its quoting | ||
| # behavior is security-relevant and the public surface (buildCypher) | ||
| # makes it difficult to isolate quoting assertions. | ||
| from age.age import buildCypher, _validate_column | ||
| from age.exceptions import InvalidIdentifier | ||
| import argparse | ||
|
|
||
| TEST_HOST = "localhost" | ||
|
|
@@ -28,6 +34,72 @@ | |
| TEST_GRAPH_NAME = "test_graph" | ||
|
|
||
|
|
||
| class TestBuildCypher(unittest.TestCase): | ||
| """Unit tests for buildCypher() and _validate_column() — no DB required.""" | ||
|
|
||
| def test_simple_column(self): | ||
| result = buildCypher("g", "MATCH (n) RETURN n", ["n"]) | ||
| self.assertIn('"n" agtype', result) | ||
|
|
||
| def test_column_with_type(self): | ||
| result = buildCypher("g", "MATCH (n) RETURN n", ["n agtype"]) | ||
| self.assertIn('"n" agtype', result) | ||
|
Comment on lines
+40
to
+46
|
||
|
|
||
| def test_reserved_word_count(self): | ||
| """Issue #2370: 'count' is a PostgreSQL reserved word.""" | ||
| result = buildCypher("g", "MATCH (n) RETURN count(n)", ["count"]) | ||
| self.assertIn('"count" agtype', result) | ||
| # Verify 'count' never appears unquoted as a column name | ||
| self.assertIsNone( | ||
| re.search(r'(?<!")\bcount\s+agtype\b', result), | ||
| f"'count' must be quoted in: {result}" | ||
| ) | ||
|
|
||
| def test_reserved_word_order(self): | ||
| """Issue #2370: 'order' is a PostgreSQL reserved word.""" | ||
| result = buildCypher("g", "MATCH (n) RETURN n.order", ["order"]) | ||
| self.assertIn('"order" agtype', result) | ||
|
|
||
| def test_reserved_word_type(self): | ||
| """Issue #2370: 'type' is a PostgreSQL reserved word.""" | ||
| result = buildCypher("g", "MATCH ()-[r]->() RETURN type(r)", ["type"]) | ||
| self.assertIn('"type" agtype', result) | ||
|
|
||
| def test_reserved_word_select(self): | ||
| """Issue #2370: 'select' is a PostgreSQL reserved word.""" | ||
| result = buildCypher("g", "MATCH (n) RETURN n", ["select"]) | ||
| self.assertIn('"select" agtype', result) | ||
|
|
||
| def test_reserved_word_group(self): | ||
| """Issue #2370: 'group' is a PostgreSQL reserved word.""" | ||
| result = buildCypher("g", "MATCH (n) RETURN n", ["group"]) | ||
| self.assertIn('"group" agtype', result) | ||
|
|
||
| def test_multiple_columns(self): | ||
| result = buildCypher("g", "MATCH (n) RETURN n.name, count(n)", ["name", "count"]) | ||
| self.assertIn('"name" agtype', result) | ||
| self.assertIn('"count" agtype', result) | ||
|
|
||
| def test_default_column(self): | ||
| result = buildCypher("g", "MATCH (n) RETURN n", None) | ||
| self.assertIn('"v" agtype', result) | ||
|
|
||
| def test_invalid_column_rejected(self): | ||
| with self.assertRaises(InvalidIdentifier): | ||
| buildCypher("g", "MATCH (n) RETURN n", ["invalid;col"]) | ||
|
|
||
| def test_reserved_word_in_name_type_pair(self): | ||
| """Quoting applies even when the column is specified as 'name type'.""" | ||
| result = buildCypher("g", "MATCH (n) RETURN n.order", ["order agtype"]) | ||
| self.assertIn('"order" agtype', result) | ||
|
|
||
| def test_validate_column_quoting(self): | ||
| self.assertEqual(_validate_column("v"), '"v" agtype') | ||
| self.assertEqual(_validate_column("v agtype"), '"v" agtype') | ||
| self.assertEqual(_validate_column("count"), '"count" agtype') | ||
| self.assertEqual(_validate_column("my_col"), '"my_col" agtype') | ||
|
|
||
|
|
||
| class TestAgeBasic(unittest.TestCase): | ||
| ag = None | ||
| args: argparse.Namespace = argparse.Namespace( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Always double-quoting column names changes PostgreSQL’s identifier folding semantics: previously an unquoted alias like
Vwould be exposed asv(folded to lower-case), but now"V"will stay uppercase. If callers relied on the old lowercasing behavior forcolsvalues with uppercase letters, this is a user-visible behavior change; consider normalizingnameto lower-case before quoting or documenting this change.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is inherent to the fix — previously, unquoted reserved words like
countcaused SQL parse errors, which is worse than the case-folding change. Thevalidate_identifier()regex restricts names to[A-Za-z_][A-Za-z0-9_]*, and in practice AGE column aliases are lowercase. The trade-off (case-sensitivity for reserved-word safety) is the correct one.