From fadab21493e1e40639f0e434bbcbef0bb2130c60 Mon Sep 17 00:00:00 2001
From: Ramiro Aquino Romero <ramiroaquinoromero@gmail.com>
Date: Thu, 22 Jan 2026 11:29:09 -0400
Subject: [PATCH 1/4] fix(api): nan is not properly handled for athena
 connections (#37071)

---
 superset/dataframe.py              |   7 +-
 tests/unit_tests/dataframe_test.py | 196 ++++++++++++++++++++++++++---
 2 files changed, 184 insertions(+), 19 deletions(-)

diff --git a/superset/dataframe.py b/superset/dataframe.py
index 5f3c0dc77985..0e7cba0bc3c5 100644
--- a/superset/dataframe.py
+++ b/superset/dataframe.py
@@ -41,6 +41,9 @@ def df_to_records(dframe: pd.DataFrame) -> list[dict[str, Any]]:
     """
     Convert a DataFrame to a set of records.
 
+    NaN values are converted to None for JSON compatibility.
+    This handles division by zero and other operations that produce NaN.
+
     :param dframe: the DataFrame to convert
     :returns: a list of dictionaries reflecting each single row of the DataFrame
     """
@@ -52,6 +55,8 @@ def df_to_records(dframe: pd.DataFrame) -> list[dict[str, Any]]:
 
     for record in records:
         for key in record:
-            record[key] = _convert_big_integers(record[key])
+            record[key] = (
+                None if pd.isna(record[key]) else _convert_big_integers(record[key])
+            )
 
     return records
diff --git a/tests/unit_tests/dataframe_test.py b/tests/unit_tests/dataframe_test.py
index 0443bc1461cd..934edea20475 100644
--- a/tests/unit_tests/dataframe_test.py
+++ b/tests/unit_tests/dataframe_test.py
@@ -17,18 +17,19 @@
 # pylint: disable=unused-argument, import-outside-toplevel
 from datetime import datetime
 
+import numpy as np
 import pytest
 from pandas import Timestamp
 from pandas._libs.tslibs import NaT
 
 from superset.dataframe import df_to_records
+from superset.db_engine_specs import BaseEngineSpec
+from superset.result_set import SupersetResultSet
 from superset.superset_typing import DbapiDescription
+from superset.utils import json as superset_json
 
 
 def test_df_to_records() -> None:
-    from superset.db_engine_specs import BaseEngineSpec
-    from superset.result_set import SupersetResultSet
-
     data = [("a1", "b1", "c1"), ("a2", "b2", "c2")]
     cursor_descr: DbapiDescription = [
         (column, "string", None, None, None, None, False) for column in ("a", "b", "c")
@@ -43,9 +44,6 @@ def test_df_to_records() -> None:
 
 
 def test_df_to_records_NaT_type() -> None:  # noqa: N802
-    from superset.db_engine_specs import BaseEngineSpec
-    from superset.result_set import SupersetResultSet
-
     data = [(NaT,), (Timestamp("2023-01-06 20:50:31.749000+0000", tz="UTC"),)]
     cursor_descr: DbapiDescription = [
         ("date", "timestamp with time zone", None, None, None, None, False)
@@ -60,9 +58,6 @@ def test_df_to_records_NaT_type() -> None:  # noqa: N802
 
 
 def test_df_to_records_mixed_emoji_type() -> None:
-    from superset.db_engine_specs import BaseEngineSpec
-    from superset.result_set import SupersetResultSet
-
     data = [
         ("What's up?", "This is a string text", 1),
         ("What's up?", "This is a string with an 😍 added", 2),
@@ -100,9 +95,6 @@ def test_df_to_records_mixed_emoji_type() -> None:
 
 
 def test_df_to_records_mixed_accent_type() -> None:
-    from superset.db_engine_specs import BaseEngineSpec
-    from superset.result_set import SupersetResultSet
-
     data = [
         ("What's up?", "This is a string text", 1),
         ("What's up?", "This is a string with áccent", 2),
@@ -140,9 +132,6 @@ def test_df_to_records_mixed_accent_type() -> None:
 
 
 def test_js_max_int() -> None:
-    from superset.db_engine_specs import BaseEngineSpec
-    from superset.result_set import SupersetResultSet
-
     data = [(1, 1239162456494753670, "c1"), (2, 100, "c2")]
     cursor_descr: DbapiDescription = [
         ("a", "int", None, None, None, None, False),
@@ -192,9 +181,6 @@ def test_js_max_int() -> None:
     ],
 )
 def test_max_pandas_timestamp(input_, expected) -> None:
-    from superset.db_engine_specs import BaseEngineSpec
-    from superset.result_set import SupersetResultSet
-
     cursor_descr: DbapiDescription = [
         ("a", "datetime", None, None, None, None, False),
         ("b", "int", None, None, None, None, False),
@@ -203,3 +189,177 @@ def test_max_pandas_timestamp(input_, expected) -> None:
     df = results.to_pandas_df()
 
     assert df_to_records(df) == expected
+
+
+def test_df_to_records_with_nan_from_division_by_zero() -> None:
+    """Test that NaN values from division by zero are converted to None."""
+    # Simulate Athena query: select 0.00 / 0.00 as test
+    data = [(np.nan,), (5.0,), (np.nan,)]
+    cursor_descr: DbapiDescription = [("test", "double", None, None, None, None, False)]
+    results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
+    df = results.to_pandas_df()
+
+    assert df_to_records(df) == [
+        {"test": None},
+        {"test": 5.0},
+        {"test": None},
+    ]
+
+
+def test_df_to_records_with_mixed_nan_and_valid_values() -> None:
+    """Test that NaN values are properly handled alongside valid numeric data."""
+
+    # Simulate a query with multiple columns containing NaN values
+    data = [
+        ("row1", 10.5, np.nan, 100),
+        ("row2", np.nan, 20.3, 200),
+        ("row3", 30.7, 40.2, np.nan),
+        ("row4", np.nan, np.nan, np.nan),
+    ]
+    cursor_descr: DbapiDescription = [
+        ("name", "varchar", None, None, None, None, False),
+        ("value1", "double", None, None, None, None, False),
+        ("value2", "double", None, None, None, None, False),
+        ("value3", "int", None, None, None, None, False),
+    ]
+    results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
+    df = results.to_pandas_df()
+
+    assert df_to_records(df) == [
+        {"name": "row1", "value1": 10.5, "value2": None, "value3": 100},
+        {"name": "row2", "value1": None, "value2": 20.3, "value3": 200},
+        {"name": "row3", "value1": 30.7, "value2": 40.2, "value3": None},
+        {"name": "row4", "value1": None, "value2": None, "value3": None},
+    ]
+
+
+def test_df_to_records_with_inf_and_nan() -> None:
+    """Test that both NaN and infinity values are handled correctly."""
+    # Test various edge cases: NaN, positive infinity, negative infinity
+    data = [
+        (np.nan, "division by zero"),
+        (np.inf, "positive infinity"),
+        (-np.inf, "negative infinity"),
+        (0.0, "zero"),
+        (42.5, "normal value"),
+    ]
+    cursor_descr: DbapiDescription = [
+        ("result", "double", None, None, None, None, False),
+        ("description", "varchar", None, None, None, None, False),
+    ]
+    results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
+    df = results.to_pandas_df()
+
+    records = df_to_records(df)
+
+    # NaN should be converted to None
+    assert records[0]["result"] is None
+    assert records[0]["description"] == "division by zero"
+
+    # Infinity values should remain as-is (they're valid JSON)
+    assert records[1]["result"] == np.inf
+    assert records[2]["result"] == -np.inf
+
+    # Normal values should remain unchanged
+    assert records[3]["result"] == 0.0
+    assert records[4]["result"] == 42.5
+
+
+def test_df_to_records_nan_json_serialization() -> None:
+    """
+    Test that NaN values are properly converted to None for JSON serialization.
+
+    Without the pd.isna() check, np.nan values would be passed through to JSON
+    serialization, which either produces non-spec-compliant output or requires
+    special handling with ignore_nan flags throughout the codebase.
+
+    This test validates that our fix converts NaN to None for proper JSON
+    serialization.
+    """
+    # Simulate Athena query: SELECT 0.00 / 0.00 as test
+    data = [(np.nan,), (5.0,), (np.nan,)]
+    cursor_descr: DbapiDescription = [("test", "double", None, None, None, None, False)]
+    results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
+    df = results.to_pandas_df()
+
+    # Get records with our fix
+    records = df_to_records(df)
+
+    # Verify NaN values are converted to None
+    assert records == [
+        {"test": None},  # NaN converted to None
+        {"test": 5.0},
+        {"test": None},  # NaN converted to None
+    ]
+
+    # This should succeed with valid, spec-compliant JSON
+    json_output = superset_json.dumps(records)
+    parsed = superset_json.loads(json_output)
+
+    # Verify JSON serialization works correctly
+    assert parsed == records
+
+    # Demonstrate what happens WITHOUT the fix
+    # (simulate the old behavior by directly using to_dict)
+    records_without_fix = df.to_dict(orient="records")
+
+    # Verify the records contain actual NaN values (not None)
+    assert np.isnan(records_without_fix[0]["test"])
+    assert records_without_fix[1]["test"] == 5.0
+    assert np.isnan(records_without_fix[2]["test"])
+
+    # Demonstrate the actual bug: without the fix, ignore_nan=False raises ValueError
+    # This is the error users would see without our fix
+    with pytest.raises(
+        ValueError, match="Out of range float values are not JSON compliant"
+    ):
+        superset_json.dumps(records_without_fix, ignore_nan=False)
+
+    # With ignore_nan=True, it works by converting NaN to null
+    # But this requires the flag to be set everywhere - our fix eliminates this need
+    json_with_ignore = superset_json.dumps(records_without_fix, ignore_nan=True)
+    parsed_with_ignore = superset_json.loads(json_with_ignore)
+    # The output is the same, but our fix doesn't require the ignore_nan flag
+    assert parsed_with_ignore[0]["test"] is None
+
+
+def test_df_to_records_with_json_serialization_like_sql_lab() -> None:
+    """
+    Test that mimics the actual SQL Lab serialization flow.
+    This shows how the fix prevents errors in the real usage path.
+    """
+    # Simulate query with NaN results
+    data = [
+        ("user1", 100.0, np.nan),
+        ("user2", np.nan, 50.0),
+        ("user3", 75.0, 25.0),
+    ]
+    cursor_descr: DbapiDescription = [
+        ("name", "varchar", None, None, None, None, False),
+        ("value1", "double", None, None, None, None, False),
+        ("value2", "double", None, None, None, None, False),
+    ]
+    results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
+    df = results.to_pandas_df()
+
+    # Mimic sql_lab.py:360 - this is where df_to_records is used
+    records = df_to_records(df) or []
+
+    # Mimic sql_lab.py:332 - JSON serialization with Superset's custom json.dumps
+    # This should work without errors
+    json_str = superset_json.dumps(
+        records, default=superset_json.json_iso_dttm_ser, ignore_nan=True
+    )
+
+    # Verify it's valid JSON and NaN values are properly handled as null
+    parsed = superset_json.loads(json_str)
+    assert parsed[0]["value2"] is None  # NaN became null
+    assert parsed[1]["value1"] is None  # NaN became null
+    assert parsed[0]["value1"] == 100.0
+
+    # Also verify it works without ignore_nan flag (since we convert NaN to None)
+    json_str_no_flag = superset_json.dumps(
+        records, default=superset_json.json_iso_dttm_ser, ignore_nan=False
+    )
+    parsed_no_flag = superset_json.loads(json_str_no_flag)
+    assert parsed_no_flag == parsed  # Same result

From 9fabd7f997c7e533537cf18c58528da117d67eba Mon Sep 17 00:00:00 2001
From: Evan Rusackas <evan@preset.io>
Date: Thu, 22 Jan 2026 09:41:07 -0800
Subject: [PATCH 2/4] docs: show Developer Portal in global navigation (#37313)

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
---
 docs/docusaurus.config.ts    | 2 +-
 docs/src/data/databases.json | 2 +-
 docs/versions-config.json    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts
index c9b0e8c28c55..320299e7c603 100644
--- a/docs/docusaurus.config.ts
+++ b/docs/docusaurus.config.ts
@@ -134,7 +134,7 @@ if (!versionsConfig.developer_portal.disabled && !versionsConfig.developer_porta
       {
         type: 'doc',
         docsPluginId: 'developer_portal',
-        docId: 'extensions/architectural-principles',
+        docId: 'extensions/overview',
         label: 'Extensions',
       },
       {
diff --git a/docs/src/data/databases.json b/docs/src/data/databases.json
index 88eb25777a77..35e91f79a90a 100644
--- a/docs/src/data/databases.json
+++ b/docs/src/data/databases.json
@@ -1,5 +1,5 @@
 {
-  "generated": "2026-01-19T22:38:23.768Z",
+  "generated": "2026-01-21T21:46:41.044Z",
   "statistics": {
     "totalDatabases": 67,
     "withDocumentation": 67,
diff --git a/docs/versions-config.json b/docs/versions-config.json
index 3587ebb9d614..d96bfc498d0f 100644
--- a/docs/versions-config.json
+++ b/docs/versions-config.json
@@ -22,7 +22,7 @@
   },
   "developer_portal": {
     "disabled": false,
-    "hideFromNav": true,
+    "hideFromNav": false,
     "lastVersion": "current",
     "includeCurrentVersion": true,
     "onlyIncludeVersions": [

From b6308308419ebb930c35079e4130daf267bf2bf7 Mon Sep 17 00:00:00 2001
From: Gabriel Torres Ruiz <gabo2595@gmail.com>
Date: Thu, 22 Jan 2026 14:45:59 -0300
Subject: [PATCH 3/4] fix(sqllab): add colorEditorSelection token for visible
 text selection (#36932)

---
 .../packages/superset-core/src/ui/theme/types.ts           | 7 +++++++
 .../src/components/AsyncAceEditor/index.tsx                | 3 ++-
 superset/config.py                                         | 7 +++++++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/superset-frontend/packages/superset-core/src/ui/theme/types.ts b/superset-frontend/packages/superset-core/src/ui/theme/types.ts
index 8a2fcd73f67a..03c5e8d45a39 100644
--- a/superset-frontend/packages/superset-core/src/ui/theme/types.ts
+++ b/superset-frontend/packages/superset-core/src/ui/theme/types.ts
@@ -159,6 +159,13 @@ export interface SupersetSpecificTokens {
   echartsOptionsOverridesByChartType?: {
     [chartType: string]: any;
   };
+
+  // Editor-related
+  /**
+   * Background color for code editor text selection.
+   * Defaults to colorPrimaryBgHover if not specified.
+   */
+  colorEditorSelection?: string;
 }
 
 /**
diff --git a/superset-frontend/packages/superset-ui-core/src/components/AsyncAceEditor/index.tsx b/superset-frontend/packages/superset-ui-core/src/components/AsyncAceEditor/index.tsx
index 002d65cd070c..56b0e9576a72 100644
--- a/superset-frontend/packages/superset-ui-core/src/components/AsyncAceEditor/index.tsx
+++ b/superset-frontend/packages/superset-ui-core/src/components/AsyncAceEditor/index.tsx
@@ -292,7 +292,8 @@ export function AsyncAceEditor(
                 }
                 /* Adjust selection color */
                 .ace_editor .ace_selection {
-                  background-color: ${token.colorPrimaryBgHover} !important;
+                  background-color: ${token.colorEditorSelection ??
+                  token.colorPrimaryBgHover} !important;
                 }
 
                 /* Improve active line highlighting */
diff --git a/superset/config.py b/superset/config.py
index 4532b88342b8..995a5268f324 100644
--- a/superset/config.py
+++ b/superset/config.py
@@ -929,6 +929,8 @@ class D3TimeFormat(TypedDict, total=False):
         "fontWeightNormal": "400",
         "fontWeightLight": "300",
         "fontWeightStrong": "500",
+        # Editor selection color (for SQL Lab text highlighting)
+        "colorEditorSelection": "#fff5cf",
     },
     "algorithm": "default",
 }
@@ -938,6 +940,11 @@ class D3TimeFormat(TypedDict, total=False):
 # Set to None to disable dark mode
 THEME_DARK: Optional[Theme] = {
     **THEME_DEFAULT,
+    "token": {
+        **THEME_DEFAULT["token"],
+        # Darker selection color for dark mode
+        "colorEditorSelection": "#5c4d1a",
+    },
     "algorithm": "dark",
 }
 

From 87bbd54d0aca3b98671e0516e0b2d7a9e66bc3c3 Mon Sep 17 00:00:00 2001
From: Evan Rusackas <evan@preset.io>
Date: Thu, 22 Jan 2026 09:50:05 -0800
Subject: [PATCH 4/4] feat(examples): Transpile virtual dataset SQL on import
 (#37311)

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
Co-authored-by: Beto Dealmeida <roberto@dealmeida.net>
Co-authored-by: bito-code-review[bot] <188872107+bito-code-review[bot]@users.noreply.github.com>
---
 superset/commands/dashboard/export_example.py |   4 +
 superset/commands/importers/v1/examples.py    |  58 +++++
 superset/datasets/schemas.py                  |   2 +
 superset/sql/parse.py                         |  21 +-
 .../commands/importers/v1/examples_test.py    | 244 ++++++++++++++++++
 .../sql/transpile_to_dialect_test.py          |  51 ++++
 6 files changed, 377 insertions(+), 3 deletions(-)
 create mode 100644 tests/unit_tests/commands/importers/v1/examples_test.py

diff --git a/superset/commands/dashboard/export_example.py b/superset/commands/dashboard/export_example.py
index 0446e213b335..7924fe0ad4d1 100644
--- a/superset/commands/dashboard/export_example.py
+++ b/superset/commands/dashboard/export_example.py
@@ -175,6 +175,10 @@ def export_dataset_yaml(
         "schema": None,  # Don't export - use target database's default schema
         # Preserve SQL for virtual datasets, None for physical (data is in parquet)
         "sql": dataset.sql if is_preserved_virtual else None,
+        # Track source database engine for SQL transpilation during import
+        "source_db_engine": (
+            dataset.database.db_engine_spec.engine if is_preserved_virtual else None
+        ),
         "params": None,  # Don't export - contains stale import metadata
         "template_params": dataset.template_params,
         "filter_select_enabled": dataset.filter_select_enabled,
diff --git a/superset/commands/importers/v1/examples.py b/superset/commands/importers/v1/examples.py
index 99ecab79555c..19fe811044a7 100644
--- a/superset/commands/importers/v1/examples.py
+++ b/superset/commands/importers/v1/examples.py
@@ -14,11 +14,13 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+import logging
 from typing import Any, Optional
 
 from marshmallow import Schema
 from sqlalchemy.exc import MultipleResultsFound
 
+from superset import db
 from superset.charts.schemas import ImportV1ChartSchema
 from superset.commands.chart.importers.v1 import ImportChartsCommand
 from superset.commands.chart.importers.v1.utils import import_chart
@@ -41,9 +43,62 @@
 from superset.dashboards.schemas import ImportV1DashboardSchema
 from superset.databases.schemas import ImportV1DatabaseSchema
 from superset.datasets.schemas import ImportV1DatasetSchema
+from superset.exceptions import QueryClauseValidationException
+from superset.models.core import Database
+from superset.sql.parse import transpile_to_dialect
 from superset.utils.core import get_example_default_schema
 from superset.utils.decorators import transaction
 
+logger = logging.getLogger(__name__)
+
+
+def transpile_virtual_dataset_sql(config: dict[str, Any], database_id: int) -> None:
+    """
+    Transpile virtual dataset SQL to the target database dialect.
+
+    This ensures that virtual datasets exported from one database type
+    (e.g., PostgreSQL) can be loaded into a different database type
+    (e.g., MySQL, DuckDB, SQLite).
+
+    Args:
+        config: Dataset configuration dict (modified in place)
+        database_id: ID of the target database
+    """
+    sql = config.get("sql")
+    if not sql:
+        return
+
+    database = db.session.query(Database).get(database_id)
+    if not database:
+        logger.warning("Database %s not found, skipping SQL transpilation", database_id)
+        return
+
+    target_engine = database.db_engine_spec.engine
+    source_engine = config.get("source_db_engine")
+    if target_engine == source_engine:
+        logger.info("Source and target dialects are identical, skipping transpilation")
+        return
+
+    try:
+        transpiled_sql = transpile_to_dialect(sql, target_engine, source_engine)
+        if transpiled_sql != sql:
+            logger.info(
+                "Transpiled virtual dataset SQL for '%s' from %s to %s dialect",
+                config.get("table_name", "unknown"),
+                source_engine or "generic",
+                target_engine,
+            )
+            config["sql"] = transpiled_sql
+    except QueryClauseValidationException as ex:
+        logger.warning(
+            "Could not transpile SQL for dataset '%s' from %s to %s: %s. "
+            "Using original SQL which may not be compatible.",
+            config.get("table_name", "unknown"),
+            source_engine or "generic",
+            target_engine,
+            ex,
+        )
+
 
 class ImportExamplesCommand(ImportModelsCommand):
     """Import examples"""
@@ -119,6 +174,9 @@ def _import(  # pylint: disable=too-many-locals, too-many-branches  # noqa: C901
                 if config["schema"] is None:
                     config["schema"] = get_example_default_schema()
 
+                # transpile virtual dataset SQL to target database dialect
+                transpile_virtual_dataset_sql(config, config["database_id"])
+
                 try:
                     dataset = import_dataset(
                         config,
diff --git a/superset/datasets/schemas.py b/superset/datasets/schemas.py
index 96bc44a9d3cf..1506ef45d167 100644
--- a/superset/datasets/schemas.py
+++ b/superset/datasets/schemas.py
@@ -322,6 +322,8 @@ def fix_extra(self, data: dict[str, Any], **kwargs: Any) -> dict[str, Any]:
     schema = fields.String(allow_none=True)
     catalog = fields.String(allow_none=True)
     sql = fields.String(allow_none=True)
+    # Source database engine for SQL transpilation (virtual datasets only)
+    source_db_engine = fields.String(allow_none=True, load_default=None)
     params = fields.Dict(allow_none=True)
     template_params = fields.Dict(allow_none=True)
     filter_select_enabled = fields.Boolean()
diff --git a/superset/sql/parse.py b/superset/sql/parse.py
index af72f72e9528..af9a740ec756 100644
--- a/superset/sql/parse.py
+++ b/superset/sql/parse.py
@@ -1522,9 +1522,21 @@ def sanitize_clause(clause: str, engine: str) -> str:
         raise QueryClauseValidationException(f"Invalid SQL clause: {clause}") from ex
 
 
-def transpile_to_dialect(sql: str, target_engine: str) -> str:
+def transpile_to_dialect(
+    sql: str,
+    target_engine: str,
+    source_engine: str | None = None,
+) -> str:
     """
-    Transpile SQL from "generic SQL" to the target database dialect using SQLGlot.
+    Transpile SQL from one database dialect to another using SQLGlot.
+
+    Args:
+        sql: The SQL query to transpile
+        target_engine: The target database engine (e.g., "mysql", "postgresql")
+        source_engine: The source database engine. If None, uses generic SQL dialect.
+
+    Returns:
+        The transpiled SQL string
 
     If the target engine is not in SQLGLOT_DIALECTS, returns the SQL as-is.
     """
@@ -1534,8 +1546,11 @@ def transpile_to_dialect(sql: str, target_engine: str) -> str:
     if target_dialect is None:
         return sql
 
+    # Get source dialect (default to generic if not specified)
+    source_dialect = SQLGLOT_DIALECTS.get(source_engine) if source_engine else Dialect
+
     try:
-        parsed = sqlglot.parse_one(sql, dialect=Dialect)
+        parsed = sqlglot.parse_one(sql, dialect=source_dialect)
         return Dialect.get_or_raise(target_dialect).generate(
             parsed,
             copy=True,
diff --git a/tests/unit_tests/commands/importers/v1/examples_test.py b/tests/unit_tests/commands/importers/v1/examples_test.py
new file mode 100644
index 000000000000..1ad6176dc1ca
--- /dev/null
+++ b/tests/unit_tests/commands/importers/v1/examples_test.py
@@ -0,0 +1,244 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Tests for the examples importer, specifically SQL transpilation."""
+
+from unittest.mock import MagicMock, patch
+
+from superset.commands.importers.v1.examples import transpile_virtual_dataset_sql
+
+
+def test_transpile_virtual_dataset_sql_no_sql():
+    """Test that configs without SQL are unchanged."""
+    config = {"table_name": "my_table", "sql": None}
+    transpile_virtual_dataset_sql(config, 1)
+    assert config["sql"] is None
+
+
+def test_transpile_virtual_dataset_sql_empty_sql():
+    """Test that configs with empty SQL are unchanged."""
+    config = {"table_name": "my_table", "sql": ""}
+    transpile_virtual_dataset_sql(config, 1)
+    assert config["sql"] == ""
+
+
+@patch("superset.commands.importers.v1.examples.db")
+def test_transpile_virtual_dataset_sql_database_not_found(mock_db):
+    """Test graceful handling when database is not found."""
+    mock_db.session.query.return_value.get.return_value = None
+
+    config = {"table_name": "my_table", "sql": "SELECT * FROM foo"}
+    original_sql = config["sql"]
+
+    transpile_virtual_dataset_sql(config, 999)
+
+    # SQL should remain unchanged
+    assert config["sql"] == original_sql
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_success(mock_transpile, mock_db):
+    """Test successful SQL transpilation with source engine."""
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "mysql"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    mock_transpile.return_value = "SELECT * FROM `foo`"
+
+    config = {
+        "table_name": "my_table",
+        "sql": "SELECT * FROM foo",
+        "source_db_engine": "postgresql",
+    }
+    transpile_virtual_dataset_sql(config, 1)
+
+    assert config["sql"] == "SELECT * FROM `foo`"
+    mock_transpile.assert_called_once_with("SELECT * FROM foo", "mysql", "postgresql")
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_no_source_engine(mock_transpile, mock_db):
+    """Test transpilation when source_db_engine is not specified (legacy)."""
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "mysql"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    mock_transpile.return_value = "SELECT * FROM `foo`"
+
+    # No source_db_engine - should default to None (generic dialect)
+    config = {"table_name": "my_table", "sql": "SELECT * FROM foo"}
+    transpile_virtual_dataset_sql(config, 1)
+
+    assert config["sql"] == "SELECT * FROM `foo`"
+    mock_transpile.assert_called_once_with("SELECT * FROM foo", "mysql", None)
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_no_change(mock_transpile, mock_db):
+    """Test when transpilation returns same SQL (no dialect differences)."""
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "postgresql"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    original_sql = "SELECT * FROM foo"
+    mock_transpile.return_value = original_sql
+
+    config = {
+        "table_name": "my_table",
+        "sql": original_sql,
+        "source_db_engine": "postgresql",
+    }
+    transpile_virtual_dataset_sql(config, 1)
+
+    assert config["sql"] == original_sql
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_error_fallback(mock_transpile, mock_db):
+    """Test graceful fallback when transpilation fails."""
+    from superset.exceptions import QueryClauseValidationException
+
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "mysql"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    mock_transpile.side_effect = QueryClauseValidationException("Parse error")
+
+    original_sql = "SELECT SOME_POSTGRES_SPECIFIC_FUNCTION() FROM foo"
+    config = {
+        "table_name": "my_table",
+        "sql": original_sql,
+        "source_db_engine": "postgresql",
+    }
+
+    # Should not raise, should keep original SQL
+    transpile_virtual_dataset_sql(config, 1)
+    assert config["sql"] == original_sql
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_postgres_to_duckdb(mock_transpile, mock_db):
+    """Test transpilation from PostgreSQL to DuckDB."""
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "duckdb"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    original_sql = """
+        SELECT DATE_TRUNC('month', created_at) AS month, COUNT(*) AS cnt
+        FROM orders WHERE status = 'completed' GROUP BY 1
+    """
+    transpiled_sql = """
+        SELECT DATE_TRUNC('month', created_at) AS month, COUNT(*) AS cnt
+        FROM orders WHERE status = 'completed' GROUP BY 1
+    """
+    mock_transpile.return_value = transpiled_sql
+
+    config = {
+        "table_name": "monthly_orders",
+        "sql": original_sql,
+        "source_db_engine": "postgresql",
+    }
+    transpile_virtual_dataset_sql(config, 1)
+
+    assert config["sql"] == transpiled_sql
+    mock_transpile.assert_called_once_with(original_sql, "duckdb", "postgresql")
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_postgres_to_clickhouse(mock_transpile, mock_db):
+    """Test transpilation from PostgreSQL to ClickHouse.
+
+    ClickHouse has different syntax for date functions, so this tests
+    real dialect differences.
+    """
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "clickhouse"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    # PostgreSQL syntax
+    original_sql = "SELECT DATE_TRUNC('month', created_at) AS month FROM orders"
+    # ClickHouse uses toStartOfMonth instead
+    transpiled_sql = "SELECT toStartOfMonth(created_at) AS month FROM orders"
+    mock_transpile.return_value = transpiled_sql
+
+    config = {
+        "table_name": "monthly_orders",
+        "sql": original_sql,
+        "source_db_engine": "postgresql",
+    }
+    transpile_virtual_dataset_sql(config, 1)
+
+    assert config["sql"] == transpiled_sql
+    mock_transpile.assert_called_once_with(original_sql, "clickhouse", "postgresql")
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_postgres_to_mysql(mock_transpile, mock_db):
+    """Test transpilation from PostgreSQL to MySQL.
+
+    MySQL uses backticks for identifiers and has different casting syntax.
+    """
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "mysql"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    # PostgreSQL syntax with :: casting
+    original_sql = "SELECT created_at::DATE AS date_only FROM orders"
+    # MySQL syntax with CAST
+    transpiled_sql = "SELECT CAST(created_at AS DATE) AS date_only FROM `orders`"
+    mock_transpile.return_value = transpiled_sql
+
+    config = {
+        "table_name": "orders_dates",
+        "sql": original_sql,
+        "source_db_engine": "postgresql",
+    }
+    transpile_virtual_dataset_sql(config, 1)
+
+    assert config["sql"] == transpiled_sql
+    mock_transpile.assert_called_once_with(original_sql, "mysql", "postgresql")
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_postgres_to_sqlite(mock_transpile, mock_db):
+    """Test transpilation from PostgreSQL to SQLite."""
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "sqlite"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    original_sql = "SELECT * FROM orders WHERE created_at > NOW() - INTERVAL '7 days'"
+    transpiled_sql = (
+        "SELECT * FROM orders WHERE created_at > DATETIME('now', '-7 days')"
+    )
+    mock_transpile.return_value = transpiled_sql
+
+    config = {
+        "table_name": "recent_orders",
+        "sql": original_sql,
+        "source_db_engine": "postgresql",
+    }
+    transpile_virtual_dataset_sql(config, 1)
+
+    assert config["sql"] == transpiled_sql
+    mock_transpile.assert_called_once_with(original_sql, "sqlite", "postgresql")
diff --git a/tests/unit_tests/sql/transpile_to_dialect_test.py b/tests/unit_tests/sql/transpile_to_dialect_test.py
index 1327b09009cf..5a11e501fada 100644
--- a/tests/unit_tests/sql/transpile_to_dialect_test.py
+++ b/tests/unit_tests/sql/transpile_to_dialect_test.py
@@ -345,3 +345,54 @@ def test_sqlglot_generation_error_raises_exception() -> None:
                 match="Cannot transpile SQL to postgresql",
             ):
                 transpile_to_dialect("name = 'test'", "postgresql")
+
+
+# Tests for source_engine parameter
+@pytest.mark.parametrize(
+    ("sql", "source_engine", "target_engine", "expected"),
+    [
+        # PostgreSQL to MySQL - should convert :: casting to CAST()
+        (
+            "SELECT created_at::DATE FROM orders",
+            "postgresql",
+            "mysql",
+            "SELECT CAST(created_at AS DATE) FROM orders",
+        ),
+        # Same dialect - should preserve SQL
+        (
+            "SELECT * FROM orders",
+            "postgresql",
+            "postgresql",
+            "SELECT * FROM orders",
+        ),
+        # PostgreSQL to DuckDB - DuckDB supports similar syntax (uppercases date part)
+        (
+            "SELECT DATE_TRUNC('month', ts) FROM orders",
+            "postgresql",
+            "duckdb",
+            "SELECT DATE_TRUNC('MONTH', ts) FROM orders",
+        ),
+    ],
+)
+def test_transpile_with_source_engine(
+    sql: str, source_engine: str, target_engine: str, expected: str
+) -> None:
+    """Test transpilation with explicit source engine."""
+    result = transpile_to_dialect(sql, target_engine, source_engine)
+    assert result == expected
+
+
+def test_transpile_source_engine_none_uses_generic() -> None:
+    """Test that source_engine=None uses generic dialect (backward compatible)."""
+    # Simple SQL that doesn't require dialect-specific parsing
+    result = transpile_to_dialect("SELECT * FROM orders", "postgresql", None)
+    assert result == "SELECT * FROM orders"
+
+
+def test_transpile_unknown_source_engine_uses_generic() -> None:
+    """Test that unknown source_engine falls back to generic dialect."""
+    # Unknown engine should be treated as None (generic)
+    result = transpile_to_dialect(
+        "SELECT * FROM orders", "postgresql", "unknown_engine"
+    )
+    assert result == "SELECT * FROM orders"