Skip to content

Commit 79e712b

Browse files
fix: improve PostgreSQL SQL function translations
- Fix TIMESTAMPDIFF by replacing CURDATE() first - Add YEAR(), MONTH(), DAY() function translations - Add SUM(comparison) → SUM((comparison)::int) for boolean handling - Reorder translations so simple functions are replaced before complex ones Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 2f61cbd commit 79e712b

File tree

1 file changed

+29
-8
lines changed

1 file changed

+29
-8
lines changed

src/datajoint/adapters/postgres.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,9 +1046,24 @@ def replace_group_concat(match):
10461046

10471047
expr = re.sub(r"GROUP_CONCAT\s*\((.+?)\)", replace_group_concat, expr, flags=re.IGNORECASE)
10481048

1049+
# Replace simple functions FIRST before complex patterns
1050+
# CURDATE() → CURRENT_DATE
1051+
expr = re.sub(r"CURDATE\s*\(\s*\)", "CURRENT_DATE", expr, flags=re.IGNORECASE)
1052+
1053+
# NOW() → CURRENT_TIMESTAMP
1054+
expr = re.sub(r"\bNOW\s*\(\s*\)", "CURRENT_TIMESTAMP", expr, flags=re.IGNORECASE)
1055+
1056+
# YEAR(date) → EXTRACT(YEAR FROM date)::int
1057+
expr = re.sub(r"\bYEAR\s*\(\s*([^)]+)\s*\)", r"EXTRACT(YEAR FROM \1)::int", expr, flags=re.IGNORECASE)
1058+
1059+
# MONTH(date) → EXTRACT(MONTH FROM date)::int
1060+
expr = re.sub(r"\bMONTH\s*\(\s*([^)]+)\s*\)", r"EXTRACT(MONTH FROM \1)::int", expr, flags=re.IGNORECASE)
1061+
1062+
# DAY(date) → EXTRACT(DAY FROM date)::int
1063+
expr = re.sub(r"\bDAY\s*\(\s*([^)]+)\s*\)", r"EXTRACT(DAY FROM \1)::int", expr, flags=re.IGNORECASE)
1064+
10491065
# TIMESTAMPDIFF(YEAR, d1, d2) → EXTRACT(YEAR FROM AGE(d2, d1))::int
1050-
# TIMESTAMPDIFF(MONTH, d1, d2) → year*12 + month from AGE
1051-
# TIMESTAMPDIFF(DAY, d1, d2) → (d2::date - d1::date)
1066+
# Use a more robust regex that handles the comma-separated arguments
10521067
def replace_timestampdiff(match):
10531068
unit = match.group(1).upper()
10541069
date1 = match.group(2).strip()
@@ -1060,21 +1075,27 @@ def replace_timestampdiff(match):
10601075
elif unit == "DAY":
10611076
return f"({date2}::date - {date1}::date)"
10621077
else:
1063-
# For other units, fall back to extracting from interval
10641078
return f"EXTRACT({unit} FROM AGE({date2}, {date1}))::int"
10651079

1080+
# Match TIMESTAMPDIFF with proper argument parsing
1081+
# The arguments are: unit, date1, date2 - we need to handle identifiers and CURRENT_DATE
10661082
expr = re.sub(
1067-
r"TIMESTAMPDIFF\s*\(\s*(\w+)\s*,\s*(.+?)\s*,\s*(.+?)\s*\)",
1083+
r"TIMESTAMPDIFF\s*\(\s*(\w+)\s*,\s*([^,]+)\s*,\s*([^)]+)\s*\)",
10681084
replace_timestampdiff,
10691085
expr,
10701086
flags=re.IGNORECASE,
10711087
)
10721088

1073-
# CURDATE() → CURRENT_DATE
1074-
expr = re.sub(r"CURDATE\s*\(\s*\)", "CURRENT_DATE", expr, flags=re.IGNORECASE)
1089+
# SUM(expr='value') → SUM((expr='value')::int) for PostgreSQL boolean handling
1090+
# This handles patterns like SUM(sex='F') which produce boolean in PostgreSQL
1091+
def replace_sum_comparison(match):
1092+
inner = match.group(1).strip()
1093+
# Check if inner contains a comparison operator
1094+
if re.search(r"[=<>!]", inner) and not inner.startswith("("):
1095+
return f"SUM(({inner})::int)"
1096+
return match.group(0) # Return unchanged if no comparison
10751097

1076-
# NOW() → CURRENT_TIMESTAMP (already works but ensure compatibility)
1077-
expr = re.sub(r"\bNOW\s*\(\s*\)", "CURRENT_TIMESTAMP", expr, flags=re.IGNORECASE)
1098+
expr = re.sub(r"\bSUM\s*\(\s*([^)]+)\s*\)", replace_sum_comparison, expr, flags=re.IGNORECASE)
10781099

10791100
return expr
10801101

0 commit comments

Comments
 (0)