From f212e87cd656ef8e3c8c1449437ec1fb27ee338e Mon Sep 17 00:00:00 2001 From: "k.polydorou" Date: Wed, 17 Sep 2025 15:50:12 +0300 Subject: [PATCH 1/6] fix: prevent duplicate extension load Before loading each extension, check if it is already loaded --- duckdb_engine/__init__.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/duckdb_engine/__init__.py b/duckdb_engine/__init__.py index e6b1680e2..1e5e65dd3 100644 --- a/duckdb_engine/__init__.py +++ b/duckdb_engine/__init__.py @@ -301,7 +301,22 @@ def connect(self, *cargs: Any, **cparams: Any) -> "Connection": conn = duckdb.connect(*cargs, **cparams) for extension in preload_extensions: - conn.execute(f"LOAD {extension}") + # skip if already loaded in this connection + row = conn.execute( + "SELECT loaded FROM duckdb_extensions() WHERE extension_name = ?", + [extension] + ).fetchone() + if row and row[0]: # True == already loaded + continue + + try: + conn.execute(f"LOAD {extension}") + except Exception as e: + # tolerate idempotent re-load race if it ever happens + if "already exists" in str(e) or "already registered" in str(e): + pass + else: + raise for filesystem in filesystems: conn.register_filesystem(filesystem) From a5222e6d7fd6cfad1bd32dc32892697f70b2a3c4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 18 Sep 2025 08:05:44 +0000 Subject: [PATCH 2/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- duckdb_engine/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckdb_engine/__init__.py b/duckdb_engine/__init__.py index 1e5e65dd3..2649d9c80 100644 --- a/duckdb_engine/__init__.py +++ b/duckdb_engine/__init__.py @@ -304,7 +304,7 @@ def connect(self, *cargs: Any, **cparams: Any) -> "Connection": # skip if already loaded in this connection row = conn.execute( "SELECT loaded FROM duckdb_extensions() WHERE extension_name = ?", - [extension] + [extension], ).fetchone() if row and row[0]: # True == already loaded continue From 1e3dd23427ec990389ba653e59f3bcfd4a95a7d2 Mon Sep 17 00:00:00 2001 From: Yorgos Lourakis Date: Tue, 7 Oct 2025 15:24:57 +0300 Subject: [PATCH 3/6] test(duckdb-engine): add test for ignoring second load of extension --- duckdb_engine/tests/test_basic.py | 67 +++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/duckdb_engine/tests/test_basic.py b/duckdb_engine/tests/test_basic.py index 15fda1fea..defd38e61 100644 --- a/duckdb_engine/tests/test_basic.py +++ b/duckdb_engine/tests/test_basic.py @@ -706,3 +706,70 @@ def test_register_filesystem() -> None: with engine.connect() as conn: duckdb_conn = getattr(conn.connection.dbapi_connection, "_ConnectionWrapper__c") assert duckdb.list_filesystems(connection=duckdb_conn) == ["memory", "file"] + + +def test_skip_load_extension_if_already_loaded(monkeypatch): + """ + Preloading the same extension from two *separate* engines: + - First connection LOADs it successfully. + - Second connection attempts LOAD again; DB says 'already registered'; + new connect() should swallow that benign error and still connect. + """ + real_connect = duckdb.connect + globally_registered = False # simulate process-global registration + first_loads = 0 + second_load_attempts = 0 + + class _Row: + def __init__(self, val): self._val = val + def fetchone(self): return self._val + + def fake_connect(*args, **kwargs): + inner = real_connect(*args, **kwargs) + loaded_here = False # per-connection view + + class Proxy: + def execute(self, query, params=None): + nonlocal globally_registered, loaded_here, first_loads, second_load_attempts + q = str(query).strip().lower() + + # The dialect probes per connection + if q.startswith("select loaded from duckdb_extensions"): + return _Row((loaded_here,)) + + # LOAD path + if q.startswith("load "): + if not globally_registered: + globally_registered = True + loaded_here = True + first_loads += 1 + return _Row(None) + # simulate a benign idempotent 'already registered' error + second_load_attempts += 1 + raise Exception("extension already registered") + + return inner.execute(query, params) + + def register_filesystem(self, fs): + return inner.register_filesystem(fs) + + def __getattr__(self, name): + return getattr(inner, name) + + return Proxy() + + monkeypatch.setattr(duckdb, "connect", fake_connect) + + # Engine 1: first connection loads the extension + eng1 = create_engine("duckdb:///:memory:", connect_args={"preload_extensions": ["httpfs"]}) + with eng1.connect(): + pass + + # Engine 2: second connection sees 'already registered' and should still succeed (new code only) + eng2 = create_engine("duckdb:///:memory:", connect_args={"preload_extensions": ["httpfs"]}) + with eng2.connect(): + pass + + # Sanity: exactly one real load, one idempotent retry + assert first_loads == 1 + assert second_load_attempts == 1 \ No newline at end of file From cc7475711b3e20d66020b97da2b6007a55f83073 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:51:58 +0000 Subject: [PATCH 4/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- duckdb_engine/tests/test_basic.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/duckdb_engine/tests/test_basic.py b/duckdb_engine/tests/test_basic.py index defd38e61..3882d7081 100644 --- a/duckdb_engine/tests/test_basic.py +++ b/duckdb_engine/tests/test_basic.py @@ -721,8 +721,11 @@ def test_skip_load_extension_if_already_loaded(monkeypatch): second_load_attempts = 0 class _Row: - def __init__(self, val): self._val = val - def fetchone(self): return self._val + def __init__(self, val): + self._val = val + + def fetchone(self): + return self._val def fake_connect(*args, **kwargs): inner = real_connect(*args, **kwargs) @@ -730,7 +733,11 @@ def fake_connect(*args, **kwargs): class Proxy: def execute(self, query, params=None): - nonlocal globally_registered, loaded_here, first_loads, second_load_attempts + nonlocal \ + globally_registered, \ + loaded_here, \ + first_loads, \ + second_load_attempts q = str(query).strip().lower() # The dialect probes per connection @@ -761,15 +768,19 @@ def __getattr__(self, name): monkeypatch.setattr(duckdb, "connect", fake_connect) # Engine 1: first connection loads the extension - eng1 = create_engine("duckdb:///:memory:", connect_args={"preload_extensions": ["httpfs"]}) + eng1 = create_engine( + "duckdb:///:memory:", connect_args={"preload_extensions": ["httpfs"]} + ) with eng1.connect(): pass # Engine 2: second connection sees 'already registered' and should still succeed (new code only) - eng2 = create_engine("duckdb:///:memory:", connect_args={"preload_extensions": ["httpfs"]}) + eng2 = create_engine( + "duckdb:///:memory:", connect_args={"preload_extensions": ["httpfs"]} + ) with eng2.connect(): pass # Sanity: exactly one real load, one idempotent retry assert first_loads == 1 - assert second_load_attempts == 1 \ No newline at end of file + assert second_load_attempts == 1 From 70804e0ad620f4032520e36f8d571fa2804b6e13 Mon Sep 17 00:00:00 2001 From: Yorgos Lourakis Date: Tue, 7 Oct 2025 16:23:50 +0300 Subject: [PATCH 5/6] fix(duckdb-engine): fix ci issues --- duckdb_engine/tests/test_basic.py | 73 ++++++++++++++----------------- 1 file changed, 32 insertions(+), 41 deletions(-) diff --git a/duckdb_engine/tests/test_basic.py b/duckdb_engine/tests/test_basic.py index 3882d7081..20c8a88a8 100644 --- a/duckdb_engine/tests/test_basic.py +++ b/duckdb_engine/tests/test_basic.py @@ -9,6 +9,7 @@ import duckdb import fsspec import sqlalchemy +from _pytest.monkeypatch import MonkeyPatch from hypothesis import assume, given, settings from hypothesis.strategies import text as text_strat from packaging.version import Version @@ -112,6 +113,13 @@ class IntervalModel(Base): field = Column(Interval) +class _Row: + def __init__(self, val: Any) -> None: + self._val = val + def fetchone(self) -> Any: + return self._val + + @fixture def session(engine: Engine) -> Session: return sessionmaker(bind=engine)() @@ -708,39 +716,29 @@ def test_register_filesystem() -> None: assert duckdb.list_filesystems(connection=duckdb_conn) == ["memory", "file"] -def test_skip_load_extension_if_already_loaded(monkeypatch): +def test_skip_load_extension_if_already_loaded(monkeypatch: MonkeyPatch) -> None: """ - Preloading the same extension from two *separate* engines: - - First connection LOADs it successfully. - - Second connection attempts LOAD again; DB says 'already registered'; - new connect() should swallow that benign error and still connect. + First engine LOADs successfully. + Second engine attempts LOAD again; DB says 'already registered'. + New connect() swallows that benign error; old connect() bubbles it. """ real_connect = duckdb.connect - globally_registered = False # simulate process-global registration - first_loads = 0 - second_load_attempts = 0 - - class _Row: - def __init__(self, val): - self._val = val + globally_registered: bool = False # simulate process-global registration + first_loads: int = 0 + second_load_attempts: int = 0 - def fetchone(self): - return self._val - - def fake_connect(*args, **kwargs): + def fake_connect(*args: Any, **kwargs: Any) -> Any: inner = real_connect(*args, **kwargs) - loaded_here = False # per-connection view + loaded_here: bool = False # per-connection view class Proxy: - def execute(self, query, params=None): - nonlocal \ - globally_registered, \ - loaded_here, \ - first_loads, \ - second_load_attempts + def __init__(self, inner_conn: Any) -> None: + self._inner = inner_conn + def execute(self, query: Any, params: Optional[Sequence[Any]] = None) -> Any: + nonlocal globally_registered, loaded_here, first_loads, second_load_attempts q = str(query).strip().lower() - # The dialect probes per connection + # Dialect probes per connection if q.startswith("select loaded from duckdb_extensions"): return _Row((loaded_here,)) @@ -751,36 +749,29 @@ def execute(self, query, params=None): loaded_here = True first_loads += 1 return _Row(None) - # simulate a benign idempotent 'already registered' error + # simulate tolerated idempotent error second_load_attempts += 1 raise Exception("extension already registered") - return inner.execute(query, params) + return self._inner.execute(query, params) - def register_filesystem(self, fs): - return inner.register_filesystem(fs) + def register_filesystem(self, fs: Any) -> Any: + return self._inner.register_filesystem(fs) - def __getattr__(self, name): - return getattr(inner, name) + def __getattr__(self, name: str) -> Any: + return getattr(self._inner, name) - return Proxy() + return Proxy(inner) monkeypatch.setattr(duckdb, "connect", fake_connect) - # Engine 1: first connection loads the extension - eng1 = create_engine( - "duckdb:///:memory:", connect_args={"preload_extensions": ["httpfs"]} - ) + eng1 = create_engine("duckdb:///:memory:", connect_args={"preload_extensions": ["httpfs"]}) with eng1.connect(): pass - # Engine 2: second connection sees 'already registered' and should still succeed (new code only) - eng2 = create_engine( - "duckdb:///:memory:", connect_args={"preload_extensions": ["httpfs"]} - ) + eng2 = create_engine("duckdb:///:memory:", connect_args={"preload_extensions": ["httpfs"]}) with eng2.connect(): pass - # Sanity: exactly one real load, one idempotent retry assert first_loads == 1 - assert second_load_attempts == 1 + assert second_load_attempts == 1 \ No newline at end of file From 1dfc505e98312046fc37baa7fd52f5beac86312f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 7 Oct 2025 13:25:13 +0000 Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- duckdb_engine/tests/test_basic.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/duckdb_engine/tests/test_basic.py b/duckdb_engine/tests/test_basic.py index 20c8a88a8..d2d6dfb9e 100644 --- a/duckdb_engine/tests/test_basic.py +++ b/duckdb_engine/tests/test_basic.py @@ -116,6 +116,7 @@ class IntervalModel(Base): class _Row: def __init__(self, val: Any) -> None: self._val = val + def fetchone(self) -> Any: return self._val @@ -734,8 +735,15 @@ def fake_connect(*args: Any, **kwargs: Any) -> Any: class Proxy: def __init__(self, inner_conn: Any) -> None: self._inner = inner_conn - def execute(self, query: Any, params: Optional[Sequence[Any]] = None) -> Any: - nonlocal globally_registered, loaded_here, first_loads, second_load_attempts + + def execute( + self, query: Any, params: Optional[Sequence[Any]] = None + ) -> Any: + nonlocal \ + globally_registered, \ + loaded_here, \ + first_loads, \ + second_load_attempts q = str(query).strip().lower() # Dialect probes per connection @@ -765,13 +773,17 @@ def __getattr__(self, name: str) -> Any: monkeypatch.setattr(duckdb, "connect", fake_connect) - eng1 = create_engine("duckdb:///:memory:", connect_args={"preload_extensions": ["httpfs"]}) + eng1 = create_engine( + "duckdb:///:memory:", connect_args={"preload_extensions": ["httpfs"]} + ) with eng1.connect(): pass - eng2 = create_engine("duckdb:///:memory:", connect_args={"preload_extensions": ["httpfs"]}) + eng2 = create_engine( + "duckdb:///:memory:", connect_args={"preload_extensions": ["httpfs"]} + ) with eng2.connect(): pass assert first_loads == 1 - assert second_load_attempts == 1 \ No newline at end of file + assert second_load_attempts == 1