From fadab21493e1e40639f0e434bbcbef0bb2130c60 Mon Sep 17 00:00:00 2001 From: Ramiro Aquino Romero Date: Thu, 22 Jan 2026 11:29:09 -0400 Subject: [PATCH 1/4] fix(api): nan is not properly handled for athena connections (#37071) --- superset/dataframe.py | 7 +- tests/unit_tests/dataframe_test.py | 196 ++++++++++++++++++++++++++--- 2 files changed, 184 insertions(+), 19 deletions(-) diff --git a/superset/dataframe.py b/superset/dataframe.py index 5f3c0dc77985..0e7cba0bc3c5 100644 --- a/superset/dataframe.py +++ b/superset/dataframe.py @@ -41,6 +41,9 @@ def df_to_records(dframe: pd.DataFrame) -> list[dict[str, Any]]: """ Convert a DataFrame to a set of records. + NaN values are converted to None for JSON compatibility. + This handles division by zero and other operations that produce NaN. + :param dframe: the DataFrame to convert :returns: a list of dictionaries reflecting each single row of the DataFrame """ @@ -52,6 +55,8 @@ def df_to_records(dframe: pd.DataFrame) -> list[dict[str, Any]]: for record in records: for key in record: - record[key] = _convert_big_integers(record[key]) + record[key] = ( + None if pd.isna(record[key]) else _convert_big_integers(record[key]) + ) return records diff --git a/tests/unit_tests/dataframe_test.py b/tests/unit_tests/dataframe_test.py index 0443bc1461cd..934edea20475 100644 --- a/tests/unit_tests/dataframe_test.py +++ b/tests/unit_tests/dataframe_test.py @@ -17,18 +17,19 @@ # pylint: disable=unused-argument, import-outside-toplevel from datetime import datetime +import numpy as np import pytest from pandas import Timestamp from pandas._libs.tslibs import NaT from superset.dataframe import df_to_records +from superset.db_engine_specs import BaseEngineSpec +from superset.result_set import SupersetResultSet from superset.superset_typing import DbapiDescription +from superset.utils import json as superset_json def test_df_to_records() -> None: - from superset.db_engine_specs import BaseEngineSpec - from superset.result_set import SupersetResultSet - data = [("a1", "b1", "c1"), ("a2", "b2", "c2")] cursor_descr: DbapiDescription = [ (column, "string", None, None, None, None, False) for column in ("a", "b", "c") @@ -43,9 +44,6 @@ def test_df_to_records() -> None: def test_df_to_records_NaT_type() -> None: # noqa: N802 - from superset.db_engine_specs import BaseEngineSpec - from superset.result_set import SupersetResultSet - data = [(NaT,), (Timestamp("2023-01-06 20:50:31.749000+0000", tz="UTC"),)] cursor_descr: DbapiDescription = [ ("date", "timestamp with time zone", None, None, None, None, False) @@ -60,9 +58,6 @@ def test_df_to_records_NaT_type() -> None: # noqa: N802 def test_df_to_records_mixed_emoji_type() -> None: - from superset.db_engine_specs import BaseEngineSpec - from superset.result_set import SupersetResultSet - data = [ ("What's up?", "This is a string text", 1), ("What's up?", "This is a string with an 😍 added", 2), @@ -100,9 +95,6 @@ def test_df_to_records_mixed_emoji_type() -> None: def test_df_to_records_mixed_accent_type() -> None: - from superset.db_engine_specs import BaseEngineSpec - from superset.result_set import SupersetResultSet - data = [ ("What's up?", "This is a string text", 1), ("What's up?", "This is a string with áccent", 2), @@ -140,9 +132,6 @@ def test_df_to_records_mixed_accent_type() -> None: def test_js_max_int() -> None: - from superset.db_engine_specs import BaseEngineSpec - from superset.result_set import SupersetResultSet - data = [(1, 1239162456494753670, "c1"), (2, 100, "c2")] cursor_descr: DbapiDescription = [ ("a", "int", None, None, None, None, False), @@ -192,9 +181,6 @@ def test_js_max_int() -> None: ], ) def test_max_pandas_timestamp(input_, expected) -> None: - from superset.db_engine_specs import BaseEngineSpec - from superset.result_set import SupersetResultSet - cursor_descr: DbapiDescription = [ ("a", "datetime", None, None, None, None, False), ("b", "int", None, None, None, None, False), @@ -203,3 +189,177 @@ def test_max_pandas_timestamp(input_, expected) -> None: df = results.to_pandas_df() assert df_to_records(df) == expected + + +def test_df_to_records_with_nan_from_division_by_zero() -> None: + """Test that NaN values from division by zero are converted to None.""" + # Simulate Athena query: select 0.00 / 0.00 as test + data = [(np.nan,), (5.0,), (np.nan,)] + cursor_descr: DbapiDescription = [("test", "double", None, None, None, None, False)] + results = SupersetResultSet(data, cursor_descr, BaseEngineSpec) + df = results.to_pandas_df() + + assert df_to_records(df) == [ + {"test": None}, + {"test": 5.0}, + {"test": None}, + ] + + +def test_df_to_records_with_mixed_nan_and_valid_values() -> None: + """Test that NaN values are properly handled alongside valid numeric data.""" + + # Simulate a query with multiple columns containing NaN values + data = [ + ("row1", 10.5, np.nan, 100), + ("row2", np.nan, 20.3, 200), + ("row3", 30.7, 40.2, np.nan), + ("row4", np.nan, np.nan, np.nan), + ] + cursor_descr: DbapiDescription = [ + ("name", "varchar", None, None, None, None, False), + ("value1", "double", None, None, None, None, False), + ("value2", "double", None, None, None, None, False), + ("value3", "int", None, None, None, None, False), + ] + results = SupersetResultSet(data, cursor_descr, BaseEngineSpec) + df = results.to_pandas_df() + + assert df_to_records(df) == [ + {"name": "row1", "value1": 10.5, "value2": None, "value3": 100}, + {"name": "row2", "value1": None, "value2": 20.3, "value3": 200}, + {"name": "row3", "value1": 30.7, "value2": 40.2, "value3": None}, + {"name": "row4", "value1": None, "value2": None, "value3": None}, + ] + + +def test_df_to_records_with_inf_and_nan() -> None: + """Test that both NaN and infinity values are handled correctly.""" + # Test various edge cases: NaN, positive infinity, negative infinity + data = [ + (np.nan, "division by zero"), + (np.inf, "positive infinity"), + (-np.inf, "negative infinity"), + (0.0, "zero"), + (42.5, "normal value"), + ] + cursor_descr: DbapiDescription = [ + ("result", "double", None, None, None, None, False), + ("description", "varchar", None, None, None, None, False), + ] + results = SupersetResultSet(data, cursor_descr, BaseEngineSpec) + df = results.to_pandas_df() + + records = df_to_records(df) + + # NaN should be converted to None + assert records[0]["result"] is None + assert records[0]["description"] == "division by zero" + + # Infinity values should remain as-is (they're valid JSON) + assert records[1]["result"] == np.inf + assert records[2]["result"] == -np.inf + + # Normal values should remain unchanged + assert records[3]["result"] == 0.0 + assert records[4]["result"] == 42.5 + + +def test_df_to_records_nan_json_serialization() -> None: + """ + Test that NaN values are properly converted to None for JSON serialization. + + Without the pd.isna() check, np.nan values would be passed through to JSON + serialization, which either produces non-spec-compliant output or requires + special handling with ignore_nan flags throughout the codebase. + + This test validates that our fix converts NaN to None for proper JSON + serialization. + """ + # Simulate Athena query: SELECT 0.00 / 0.00 as test + data = [(np.nan,), (5.0,), (np.nan,)] + cursor_descr: DbapiDescription = [("test", "double", None, None, None, None, False)] + results = SupersetResultSet(data, cursor_descr, BaseEngineSpec) + df = results.to_pandas_df() + + # Get records with our fix + records = df_to_records(df) + + # Verify NaN values are converted to None + assert records == [ + {"test": None}, # NaN converted to None + {"test": 5.0}, + {"test": None}, # NaN converted to None + ] + + # This should succeed with valid, spec-compliant JSON + json_output = superset_json.dumps(records) + parsed = superset_json.loads(json_output) + + # Verify JSON serialization works correctly + assert parsed == records + + # Demonstrate what happens WITHOUT the fix + # (simulate the old behavior by directly using to_dict) + records_without_fix = df.to_dict(orient="records") + + # Verify the records contain actual NaN values (not None) + assert np.isnan(records_without_fix[0]["test"]) + assert records_without_fix[1]["test"] == 5.0 + assert np.isnan(records_without_fix[2]["test"]) + + # Demonstrate the actual bug: without the fix, ignore_nan=False raises ValueError + # This is the error users would see without our fix + with pytest.raises( + ValueError, match="Out of range float values are not JSON compliant" + ): + superset_json.dumps(records_without_fix, ignore_nan=False) + + # With ignore_nan=True, it works by converting NaN to null + # But this requires the flag to be set everywhere - our fix eliminates this need + json_with_ignore = superset_json.dumps(records_without_fix, ignore_nan=True) + parsed_with_ignore = superset_json.loads(json_with_ignore) + # The output is the same, but our fix doesn't require the ignore_nan flag + assert parsed_with_ignore[0]["test"] is None + + +def test_df_to_records_with_json_serialization_like_sql_lab() -> None: + """ + Test that mimics the actual SQL Lab serialization flow. + This shows how the fix prevents errors in the real usage path. + """ + # Simulate query with NaN results + data = [ + ("user1", 100.0, np.nan), + ("user2", np.nan, 50.0), + ("user3", 75.0, 25.0), + ] + cursor_descr: DbapiDescription = [ + ("name", "varchar", None, None, None, None, False), + ("value1", "double", None, None, None, None, False), + ("value2", "double", None, None, None, None, False), + ] + results = SupersetResultSet(data, cursor_descr, BaseEngineSpec) + df = results.to_pandas_df() + + # Mimic sql_lab.py:360 - this is where df_to_records is used + records = df_to_records(df) or [] + + # Mimic sql_lab.py:332 - JSON serialization with Superset's custom json.dumps + # This should work without errors + json_str = superset_json.dumps( + records, default=superset_json.json_iso_dttm_ser, ignore_nan=True + ) + + # Verify it's valid JSON and NaN values are properly handled as null + parsed = superset_json.loads(json_str) + assert parsed[0]["value2"] is None # NaN became null + assert parsed[1]["value1"] is None # NaN became null + assert parsed[0]["value1"] == 100.0 + + # Also verify it works without ignore_nan flag (since we convert NaN to None) + json_str_no_flag = superset_json.dumps( + records, default=superset_json.json_iso_dttm_ser, ignore_nan=False + ) + parsed_no_flag = superset_json.loads(json_str_no_flag) + assert parsed_no_flag == parsed # Same result From 9fabd7f997c7e533537cf18c58528da117d67eba Mon Sep 17 00:00:00 2001 From: Evan Rusackas Date: Thu, 22 Jan 2026 09:41:07 -0800 Subject: [PATCH 2/4] docs: show Developer Portal in global navigation (#37313) Co-authored-by: Claude Opus 4.5 --- docs/docusaurus.config.ts | 2 +- docs/src/data/databases.json | 2 +- docs/versions-config.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts index c9b0e8c28c55..320299e7c603 100644 --- a/docs/docusaurus.config.ts +++ b/docs/docusaurus.config.ts @@ -134,7 +134,7 @@ if (!versionsConfig.developer_portal.disabled && !versionsConfig.developer_porta { type: 'doc', docsPluginId: 'developer_portal', - docId: 'extensions/architectural-principles', + docId: 'extensions/overview', label: 'Extensions', }, { diff --git a/docs/src/data/databases.json b/docs/src/data/databases.json index 88eb25777a77..35e91f79a90a 100644 --- a/docs/src/data/databases.json +++ b/docs/src/data/databases.json @@ -1,5 +1,5 @@ { - "generated": "2026-01-19T22:38:23.768Z", + "generated": "2026-01-21T21:46:41.044Z", "statistics": { "totalDatabases": 67, "withDocumentation": 67, diff --git a/docs/versions-config.json b/docs/versions-config.json index 3587ebb9d614..d96bfc498d0f 100644 --- a/docs/versions-config.json +++ b/docs/versions-config.json @@ -22,7 +22,7 @@ }, "developer_portal": { "disabled": false, - "hideFromNav": true, + "hideFromNav": false, "lastVersion": "current", "includeCurrentVersion": true, "onlyIncludeVersions": [ From b6308308419ebb930c35079e4130daf267bf2bf7 Mon Sep 17 00:00:00 2001 From: Gabriel Torres Ruiz Date: Thu, 22 Jan 2026 14:45:59 -0300 Subject: [PATCH 3/4] fix(sqllab): add colorEditorSelection token for visible text selection (#36932) --- .../packages/superset-core/src/ui/theme/types.ts | 7 +++++++ .../src/components/AsyncAceEditor/index.tsx | 3 ++- superset/config.py | 7 +++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/superset-frontend/packages/superset-core/src/ui/theme/types.ts b/superset-frontend/packages/superset-core/src/ui/theme/types.ts index 8a2fcd73f67a..03c5e8d45a39 100644 --- a/superset-frontend/packages/superset-core/src/ui/theme/types.ts +++ b/superset-frontend/packages/superset-core/src/ui/theme/types.ts @@ -159,6 +159,13 @@ export interface SupersetSpecificTokens { echartsOptionsOverridesByChartType?: { [chartType: string]: any; }; + + // Editor-related + /** + * Background color for code editor text selection. + * Defaults to colorPrimaryBgHover if not specified. + */ + colorEditorSelection?: string; } /** diff --git a/superset-frontend/packages/superset-ui-core/src/components/AsyncAceEditor/index.tsx b/superset-frontend/packages/superset-ui-core/src/components/AsyncAceEditor/index.tsx index 002d65cd070c..56b0e9576a72 100644 --- a/superset-frontend/packages/superset-ui-core/src/components/AsyncAceEditor/index.tsx +++ b/superset-frontend/packages/superset-ui-core/src/components/AsyncAceEditor/index.tsx @@ -292,7 +292,8 @@ export function AsyncAceEditor( } /* Adjust selection color */ .ace_editor .ace_selection { - background-color: ${token.colorPrimaryBgHover} !important; + background-color: ${token.colorEditorSelection ?? + token.colorPrimaryBgHover} !important; } /* Improve active line highlighting */ diff --git a/superset/config.py b/superset/config.py index 4532b88342b8..995a5268f324 100644 --- a/superset/config.py +++ b/superset/config.py @@ -929,6 +929,8 @@ class D3TimeFormat(TypedDict, total=False): "fontWeightNormal": "400", "fontWeightLight": "300", "fontWeightStrong": "500", + # Editor selection color (for SQL Lab text highlighting) + "colorEditorSelection": "#fff5cf", }, "algorithm": "default", } @@ -938,6 +940,11 @@ class D3TimeFormat(TypedDict, total=False): # Set to None to disable dark mode THEME_DARK: Optional[Theme] = { **THEME_DEFAULT, + "token": { + **THEME_DEFAULT["token"], + # Darker selection color for dark mode + "colorEditorSelection": "#5c4d1a", + }, "algorithm": "dark", } From 87bbd54d0aca3b98671e0516e0b2d7a9e66bc3c3 Mon Sep 17 00:00:00 2001 From: Evan Rusackas Date: Thu, 22 Jan 2026 09:50:05 -0800 Subject: [PATCH 4/4] feat(examples): Transpile virtual dataset SQL on import (#37311) Co-authored-by: Claude Opus 4.5 Co-authored-by: Beto Dealmeida Co-authored-by: bito-code-review[bot] <188872107+bito-code-review[bot]@users.noreply.github.com> --- superset/commands/dashboard/export_example.py | 4 + superset/commands/importers/v1/examples.py | 58 +++++ superset/datasets/schemas.py | 2 + superset/sql/parse.py | 21 +- .../commands/importers/v1/examples_test.py | 244 ++++++++++++++++++ .../sql/transpile_to_dialect_test.py | 51 ++++ 6 files changed, 377 insertions(+), 3 deletions(-) create mode 100644 tests/unit_tests/commands/importers/v1/examples_test.py diff --git a/superset/commands/dashboard/export_example.py b/superset/commands/dashboard/export_example.py index 0446e213b335..7924fe0ad4d1 100644 --- a/superset/commands/dashboard/export_example.py +++ b/superset/commands/dashboard/export_example.py @@ -175,6 +175,10 @@ def export_dataset_yaml( "schema": None, # Don't export - use target database's default schema # Preserve SQL for virtual datasets, None for physical (data is in parquet) "sql": dataset.sql if is_preserved_virtual else None, + # Track source database engine for SQL transpilation during import + "source_db_engine": ( + dataset.database.db_engine_spec.engine if is_preserved_virtual else None + ), "params": None, # Don't export - contains stale import metadata "template_params": dataset.template_params, "filter_select_enabled": dataset.filter_select_enabled, diff --git a/superset/commands/importers/v1/examples.py b/superset/commands/importers/v1/examples.py index 99ecab79555c..19fe811044a7 100644 --- a/superset/commands/importers/v1/examples.py +++ b/superset/commands/importers/v1/examples.py @@ -14,11 +14,13 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import logging from typing import Any, Optional from marshmallow import Schema from sqlalchemy.exc import MultipleResultsFound +from superset import db from superset.charts.schemas import ImportV1ChartSchema from superset.commands.chart.importers.v1 import ImportChartsCommand from superset.commands.chart.importers.v1.utils import import_chart @@ -41,9 +43,62 @@ from superset.dashboards.schemas import ImportV1DashboardSchema from superset.databases.schemas import ImportV1DatabaseSchema from superset.datasets.schemas import ImportV1DatasetSchema +from superset.exceptions import QueryClauseValidationException +from superset.models.core import Database +from superset.sql.parse import transpile_to_dialect from superset.utils.core import get_example_default_schema from superset.utils.decorators import transaction +logger = logging.getLogger(__name__) + + +def transpile_virtual_dataset_sql(config: dict[str, Any], database_id: int) -> None: + """ + Transpile virtual dataset SQL to the target database dialect. + + This ensures that virtual datasets exported from one database type + (e.g., PostgreSQL) can be loaded into a different database type + (e.g., MySQL, DuckDB, SQLite). + + Args: + config: Dataset configuration dict (modified in place) + database_id: ID of the target database + """ + sql = config.get("sql") + if not sql: + return + + database = db.session.query(Database).get(database_id) + if not database: + logger.warning("Database %s not found, skipping SQL transpilation", database_id) + return + + target_engine = database.db_engine_spec.engine + source_engine = config.get("source_db_engine") + if target_engine == source_engine: + logger.info("Source and target dialects are identical, skipping transpilation") + return + + try: + transpiled_sql = transpile_to_dialect(sql, target_engine, source_engine) + if transpiled_sql != sql: + logger.info( + "Transpiled virtual dataset SQL for '%s' from %s to %s dialect", + config.get("table_name", "unknown"), + source_engine or "generic", + target_engine, + ) + config["sql"] = transpiled_sql + except QueryClauseValidationException as ex: + logger.warning( + "Could not transpile SQL for dataset '%s' from %s to %s: %s. " + "Using original SQL which may not be compatible.", + config.get("table_name", "unknown"), + source_engine or "generic", + target_engine, + ex, + ) + class ImportExamplesCommand(ImportModelsCommand): """Import examples""" @@ -119,6 +174,9 @@ def _import( # pylint: disable=too-many-locals, too-many-branches # noqa: C901 if config["schema"] is None: config["schema"] = get_example_default_schema() + # transpile virtual dataset SQL to target database dialect + transpile_virtual_dataset_sql(config, config["database_id"]) + try: dataset = import_dataset( config, diff --git a/superset/datasets/schemas.py b/superset/datasets/schemas.py index 96bc44a9d3cf..1506ef45d167 100644 --- a/superset/datasets/schemas.py +++ b/superset/datasets/schemas.py @@ -322,6 +322,8 @@ def fix_extra(self, data: dict[str, Any], **kwargs: Any) -> dict[str, Any]: schema = fields.String(allow_none=True) catalog = fields.String(allow_none=True) sql = fields.String(allow_none=True) + # Source database engine for SQL transpilation (virtual datasets only) + source_db_engine = fields.String(allow_none=True, load_default=None) params = fields.Dict(allow_none=True) template_params = fields.Dict(allow_none=True) filter_select_enabled = fields.Boolean() diff --git a/superset/sql/parse.py b/superset/sql/parse.py index af72f72e9528..af9a740ec756 100644 --- a/superset/sql/parse.py +++ b/superset/sql/parse.py @@ -1522,9 +1522,21 @@ def sanitize_clause(clause: str, engine: str) -> str: raise QueryClauseValidationException(f"Invalid SQL clause: {clause}") from ex -def transpile_to_dialect(sql: str, target_engine: str) -> str: +def transpile_to_dialect( + sql: str, + target_engine: str, + source_engine: str | None = None, +) -> str: """ - Transpile SQL from "generic SQL" to the target database dialect using SQLGlot. + Transpile SQL from one database dialect to another using SQLGlot. + + Args: + sql: The SQL query to transpile + target_engine: The target database engine (e.g., "mysql", "postgresql") + source_engine: The source database engine. If None, uses generic SQL dialect. + + Returns: + The transpiled SQL string If the target engine is not in SQLGLOT_DIALECTS, returns the SQL as-is. """ @@ -1534,8 +1546,11 @@ def transpile_to_dialect(sql: str, target_engine: str) -> str: if target_dialect is None: return sql + # Get source dialect (default to generic if not specified) + source_dialect = SQLGLOT_DIALECTS.get(source_engine) if source_engine else Dialect + try: - parsed = sqlglot.parse_one(sql, dialect=Dialect) + parsed = sqlglot.parse_one(sql, dialect=source_dialect) return Dialect.get_or_raise(target_dialect).generate( parsed, copy=True, diff --git a/tests/unit_tests/commands/importers/v1/examples_test.py b/tests/unit_tests/commands/importers/v1/examples_test.py new file mode 100644 index 000000000000..1ad6176dc1ca --- /dev/null +++ b/tests/unit_tests/commands/importers/v1/examples_test.py @@ -0,0 +1,244 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Tests for the examples importer, specifically SQL transpilation.""" + +from unittest.mock import MagicMock, patch + +from superset.commands.importers.v1.examples import transpile_virtual_dataset_sql + + +def test_transpile_virtual_dataset_sql_no_sql(): + """Test that configs without SQL are unchanged.""" + config = {"table_name": "my_table", "sql": None} + transpile_virtual_dataset_sql(config, 1) + assert config["sql"] is None + + +def test_transpile_virtual_dataset_sql_empty_sql(): + """Test that configs with empty SQL are unchanged.""" + config = {"table_name": "my_table", "sql": ""} + transpile_virtual_dataset_sql(config, 1) + assert config["sql"] == "" + + +@patch("superset.commands.importers.v1.examples.db") +def test_transpile_virtual_dataset_sql_database_not_found(mock_db): + """Test graceful handling when database is not found.""" + mock_db.session.query.return_value.get.return_value = None + + config = {"table_name": "my_table", "sql": "SELECT * FROM foo"} + original_sql = config["sql"] + + transpile_virtual_dataset_sql(config, 999) + + # SQL should remain unchanged + assert config["sql"] == original_sql + + +@patch("superset.commands.importers.v1.examples.db") +@patch("superset.commands.importers.v1.examples.transpile_to_dialect") +def test_transpile_virtual_dataset_sql_success(mock_transpile, mock_db): + """Test successful SQL transpilation with source engine.""" + mock_database = MagicMock() + mock_database.db_engine_spec.engine = "mysql" + mock_db.session.query.return_value.get.return_value = mock_database + + mock_transpile.return_value = "SELECT * FROM `foo`" + + config = { + "table_name": "my_table", + "sql": "SELECT * FROM foo", + "source_db_engine": "postgresql", + } + transpile_virtual_dataset_sql(config, 1) + + assert config["sql"] == "SELECT * FROM `foo`" + mock_transpile.assert_called_once_with("SELECT * FROM foo", "mysql", "postgresql") + + +@patch("superset.commands.importers.v1.examples.db") +@patch("superset.commands.importers.v1.examples.transpile_to_dialect") +def test_transpile_virtual_dataset_sql_no_source_engine(mock_transpile, mock_db): + """Test transpilation when source_db_engine is not specified (legacy).""" + mock_database = MagicMock() + mock_database.db_engine_spec.engine = "mysql" + mock_db.session.query.return_value.get.return_value = mock_database + + mock_transpile.return_value = "SELECT * FROM `foo`" + + # No source_db_engine - should default to None (generic dialect) + config = {"table_name": "my_table", "sql": "SELECT * FROM foo"} + transpile_virtual_dataset_sql(config, 1) + + assert config["sql"] == "SELECT * FROM `foo`" + mock_transpile.assert_called_once_with("SELECT * FROM foo", "mysql", None) + + +@patch("superset.commands.importers.v1.examples.db") +@patch("superset.commands.importers.v1.examples.transpile_to_dialect") +def test_transpile_virtual_dataset_sql_no_change(mock_transpile, mock_db): + """Test when transpilation returns same SQL (no dialect differences).""" + mock_database = MagicMock() + mock_database.db_engine_spec.engine = "postgresql" + mock_db.session.query.return_value.get.return_value = mock_database + + original_sql = "SELECT * FROM foo" + mock_transpile.return_value = original_sql + + config = { + "table_name": "my_table", + "sql": original_sql, + "source_db_engine": "postgresql", + } + transpile_virtual_dataset_sql(config, 1) + + assert config["sql"] == original_sql + + +@patch("superset.commands.importers.v1.examples.db") +@patch("superset.commands.importers.v1.examples.transpile_to_dialect") +def test_transpile_virtual_dataset_sql_error_fallback(mock_transpile, mock_db): + """Test graceful fallback when transpilation fails.""" + from superset.exceptions import QueryClauseValidationException + + mock_database = MagicMock() + mock_database.db_engine_spec.engine = "mysql" + mock_db.session.query.return_value.get.return_value = mock_database + + mock_transpile.side_effect = QueryClauseValidationException("Parse error") + + original_sql = "SELECT SOME_POSTGRES_SPECIFIC_FUNCTION() FROM foo" + config = { + "table_name": "my_table", + "sql": original_sql, + "source_db_engine": "postgresql", + } + + # Should not raise, should keep original SQL + transpile_virtual_dataset_sql(config, 1) + assert config["sql"] == original_sql + + +@patch("superset.commands.importers.v1.examples.db") +@patch("superset.commands.importers.v1.examples.transpile_to_dialect") +def test_transpile_virtual_dataset_sql_postgres_to_duckdb(mock_transpile, mock_db): + """Test transpilation from PostgreSQL to DuckDB.""" + mock_database = MagicMock() + mock_database.db_engine_spec.engine = "duckdb" + mock_db.session.query.return_value.get.return_value = mock_database + + original_sql = """ + SELECT DATE_TRUNC('month', created_at) AS month, COUNT(*) AS cnt + FROM orders WHERE status = 'completed' GROUP BY 1 + """ + transpiled_sql = """ + SELECT DATE_TRUNC('month', created_at) AS month, COUNT(*) AS cnt + FROM orders WHERE status = 'completed' GROUP BY 1 + """ + mock_transpile.return_value = transpiled_sql + + config = { + "table_name": "monthly_orders", + "sql": original_sql, + "source_db_engine": "postgresql", + } + transpile_virtual_dataset_sql(config, 1) + + assert config["sql"] == transpiled_sql + mock_transpile.assert_called_once_with(original_sql, "duckdb", "postgresql") + + +@patch("superset.commands.importers.v1.examples.db") +@patch("superset.commands.importers.v1.examples.transpile_to_dialect") +def test_transpile_virtual_dataset_sql_postgres_to_clickhouse(mock_transpile, mock_db): + """Test transpilation from PostgreSQL to ClickHouse. + + ClickHouse has different syntax for date functions, so this tests + real dialect differences. + """ + mock_database = MagicMock() + mock_database.db_engine_spec.engine = "clickhouse" + mock_db.session.query.return_value.get.return_value = mock_database + + # PostgreSQL syntax + original_sql = "SELECT DATE_TRUNC('month', created_at) AS month FROM orders" + # ClickHouse uses toStartOfMonth instead + transpiled_sql = "SELECT toStartOfMonth(created_at) AS month FROM orders" + mock_transpile.return_value = transpiled_sql + + config = { + "table_name": "monthly_orders", + "sql": original_sql, + "source_db_engine": "postgresql", + } + transpile_virtual_dataset_sql(config, 1) + + assert config["sql"] == transpiled_sql + mock_transpile.assert_called_once_with(original_sql, "clickhouse", "postgresql") + + +@patch("superset.commands.importers.v1.examples.db") +@patch("superset.commands.importers.v1.examples.transpile_to_dialect") +def test_transpile_virtual_dataset_sql_postgres_to_mysql(mock_transpile, mock_db): + """Test transpilation from PostgreSQL to MySQL. + + MySQL uses backticks for identifiers and has different casting syntax. + """ + mock_database = MagicMock() + mock_database.db_engine_spec.engine = "mysql" + mock_db.session.query.return_value.get.return_value = mock_database + + # PostgreSQL syntax with :: casting + original_sql = "SELECT created_at::DATE AS date_only FROM orders" + # MySQL syntax with CAST + transpiled_sql = "SELECT CAST(created_at AS DATE) AS date_only FROM `orders`" + mock_transpile.return_value = transpiled_sql + + config = { + "table_name": "orders_dates", + "sql": original_sql, + "source_db_engine": "postgresql", + } + transpile_virtual_dataset_sql(config, 1) + + assert config["sql"] == transpiled_sql + mock_transpile.assert_called_once_with(original_sql, "mysql", "postgresql") + + +@patch("superset.commands.importers.v1.examples.db") +@patch("superset.commands.importers.v1.examples.transpile_to_dialect") +def test_transpile_virtual_dataset_sql_postgres_to_sqlite(mock_transpile, mock_db): + """Test transpilation from PostgreSQL to SQLite.""" + mock_database = MagicMock() + mock_database.db_engine_spec.engine = "sqlite" + mock_db.session.query.return_value.get.return_value = mock_database + + original_sql = "SELECT * FROM orders WHERE created_at > NOW() - INTERVAL '7 days'" + transpiled_sql = ( + "SELECT * FROM orders WHERE created_at > DATETIME('now', '-7 days')" + ) + mock_transpile.return_value = transpiled_sql + + config = { + "table_name": "recent_orders", + "sql": original_sql, + "source_db_engine": "postgresql", + } + transpile_virtual_dataset_sql(config, 1) + + assert config["sql"] == transpiled_sql + mock_transpile.assert_called_once_with(original_sql, "sqlite", "postgresql") diff --git a/tests/unit_tests/sql/transpile_to_dialect_test.py b/tests/unit_tests/sql/transpile_to_dialect_test.py index 1327b09009cf..5a11e501fada 100644 --- a/tests/unit_tests/sql/transpile_to_dialect_test.py +++ b/tests/unit_tests/sql/transpile_to_dialect_test.py @@ -345,3 +345,54 @@ def test_sqlglot_generation_error_raises_exception() -> None: match="Cannot transpile SQL to postgresql", ): transpile_to_dialect("name = 'test'", "postgresql") + + +# Tests for source_engine parameter +@pytest.mark.parametrize( + ("sql", "source_engine", "target_engine", "expected"), + [ + # PostgreSQL to MySQL - should convert :: casting to CAST() + ( + "SELECT created_at::DATE FROM orders", + "postgresql", + "mysql", + "SELECT CAST(created_at AS DATE) FROM orders", + ), + # Same dialect - should preserve SQL + ( + "SELECT * FROM orders", + "postgresql", + "postgresql", + "SELECT * FROM orders", + ), + # PostgreSQL to DuckDB - DuckDB supports similar syntax (uppercases date part) + ( + "SELECT DATE_TRUNC('month', ts) FROM orders", + "postgresql", + "duckdb", + "SELECT DATE_TRUNC('MONTH', ts) FROM orders", + ), + ], +) +def test_transpile_with_source_engine( + sql: str, source_engine: str, target_engine: str, expected: str +) -> None: + """Test transpilation with explicit source engine.""" + result = transpile_to_dialect(sql, target_engine, source_engine) + assert result == expected + + +def test_transpile_source_engine_none_uses_generic() -> None: + """Test that source_engine=None uses generic dialect (backward compatible).""" + # Simple SQL that doesn't require dialect-specific parsing + result = transpile_to_dialect("SELECT * FROM orders", "postgresql", None) + assert result == "SELECT * FROM orders" + + +def test_transpile_unknown_source_engine_uses_generic() -> None: + """Test that unknown source_engine falls back to generic dialect.""" + # Unknown engine should be treated as None (generic) + result = transpile_to_dialect( + "SELECT * FROM orders", "postgresql", "unknown_engine" + ) + assert result == "SELECT * FROM orders"