Skip to content

Commit 207cf95

Browse files
committed
feat: update SessionContext to support dynamic auto-registration of Python objects based on session config
1 parent 38a6e87 commit 207cf95

File tree

3 files changed

+32
-21
lines changed

3 files changed

+32
-21
lines changed

docs/source/user-guide/dataframe/index.rst

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -228,10 +228,10 @@ Core Classes
228228
* :py:meth:`~datafusion.SessionContext.from_pandas` - Create from Pandas DataFrame
229229
* :py:meth:`~datafusion.SessionContext.from_arrow` - Create from Arrow data
230230

231-
``SessionContext`` automatically resolves SQL table names that match
232-
in-scope Python data objects. When ``auto_register_python_objects`` is
233-
enabled (the default), a query such as ``ctx.sql("SELECT * FROM pdf")``
234-
will register a pandas or PyArrow object named ``pdf`` without calling
231+
``SessionContext`` can automatically resolve SQL table names that match
232+
in-scope Python data objects. When automatic lookup is enabled, a query
233+
such as ``ctx.sql("SELECT * FROM pdf")`` will register a pandas or
234+
PyArrow object named ``pdf`` without calling
235235
:py:meth:`~datafusion.SessionContext.from_pandas` or
236236
:py:meth:`~datafusion.SessionContext.from_arrow` explicitly. This requires
237237
the corresponding library (``pandas`` for pandas objects, ``pyarrow`` for
@@ -242,16 +242,18 @@ Core Classes
242242
import pandas as pd
243243
from datafusion import SessionContext
244244
245-
ctx = SessionContext()
245+
ctx = SessionContext(auto_register_python_objects=True)
246246
pdf = pd.DataFrame({"value": [1, 2, 3]})
247247
248248
df = ctx.sql("SELECT SUM(value) AS total FROM pdf")
249249
print(df.to_pandas()) # automatically registers `pdf`
250250
251-
To opt out, either pass ``auto_register_python_objects=False`` when
252-
constructing the session, or call
253-
:py:meth:`~datafusion.SessionContext.set_python_table_lookup` with
254-
``False`` to require explicit registration.
251+
Automatic lookup is disabled by default. Enable it by passing
252+
``auto_register_python_objects=True`` when constructing the session or by
253+
configuring :py:class:`~datafusion.SessionConfig` with
254+
:py:meth:`~datafusion.SessionConfig.with_python_table_lookup`. Use
255+
:py:meth:`~datafusion.SessionContext.set_python_table_lookup` to toggle the
256+
behaviour at runtime.
255257

256258
See: :py:class:`datafusion.SessionContext`
257259

python/datafusion/context.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ def __init__(
503503
config: SessionConfig | None = None,
504504
runtime: RuntimeEnvBuilder | None = None,
505505
*,
506-
auto_register_python_objects: bool = True,
506+
auto_register_python_objects: bool | None = None,
507507
) -> None:
508508
"""Main interface for executing queries with DataFusion.
509509
@@ -516,7 +516,10 @@ def __init__(
516516
runtime: Runtime configuration options.
517517
auto_register_python_objects: Automatically register referenced
518518
Python objects (such as pandas or PyArrow data) when ``sql``
519-
queries reference them by name.
519+
queries reference them by name. When omitted, this defaults to
520+
the value configured via
521+
:py:meth:`~datafusion.SessionConfig.with_python_table_lookup`
522+
(``False`` unless explicitly enabled).
520523
521524
Example usage:
522525
@@ -532,6 +535,12 @@ def __init__(
532535
config.config_internal if config is not None else None,
533536
runtime.config_internal if runtime is not None else None,
534537
)
538+
539+
if auto_register_python_objects is None:
540+
auto_register_python_objects = getattr(
541+
config, "_python_table_lookup", False
542+
)
543+
535544
self._auto_python_table_lookup = auto_register_python_objects
536545

537546
def __repr__(self) -> str:
@@ -560,18 +569,18 @@ def enable_url_table(self) -> SessionContext:
560569
obj = klass.__new__(klass)
561570
obj.ctx = self.ctx.enable_url_table()
562571
obj._auto_python_table_lookup = getattr(
563-
self, "_auto_python_table_lookup", True
572+
self, "_auto_python_table_lookup", False
564573
)
565574
return obj
566575

567576
def set_python_table_lookup(self, enabled: bool = True) -> SessionContext:
568577
"""Enable or disable automatic registration of Python objects in SQL.
569578
570579
Args:
571-
enabled: When ``True`` (default), SQL queries automatically attempt
572-
to resolve missing table names by looking up Python objects in
573-
the caller's scope. When ``False``, missing tables will raise an
574-
error unless they have been explicitly registered.
580+
enabled: When ``True``, SQL queries automatically attempt to
581+
resolve missing table names by looking up Python objects in the
582+
caller's scope. Use ``False`` to require explicit registration
583+
of any referenced tables.
575584
576585
Returns:
577586
The current :py:class:`SessionContext` instance for chaining.
@@ -653,7 +662,7 @@ def _execute_sql() -> DataFrame:
653662
try:
654663
return _execute_sql()
655664
except Exception as err:
656-
if not getattr(self, "_auto_python_table_lookup", True):
665+
if not getattr(self, "_auto_python_table_lookup", False):
657666
raise
658667

659668
missing_tables = self._extract_missing_table_names(err)

python/tests/test_context.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -738,10 +738,10 @@ def test_sql_with_options_no_statements(ctx):
738738
ctx.sql_with_options(sql, options=options)
739739

740740

741-
def test_sql_auto_register_pandas():
741+
def test_session_config_python_table_lookup_enables_auto_registration():
742742
pd = pytest.importorskip("pandas")
743743

744-
ctx = SessionContext()
744+
ctx = SessionContext(config=SessionConfig().with_python_table_lookup(True))
745745
pdf = pd.DataFrame({"value": [1, 2, 3]})
746746
assert len(pdf) == 3
747747

@@ -750,7 +750,7 @@ def test_sql_auto_register_pandas():
750750

751751

752752
def test_sql_auto_register_arrow():
753-
ctx = SessionContext()
753+
ctx = SessionContext(auto_register_python_objects=True)
754754
arrow_table = pa.table({"value": [1, 2, 3, 4]})
755755
assert arrow_table.num_rows == 4
756756

@@ -761,7 +761,7 @@ def test_sql_auto_register_arrow():
761761
def test_sql_auto_register_disabled():
762762
pd = pytest.importorskip("pandas")
763763

764-
ctx = SessionContext(auto_register_python_objects=False)
764+
ctx = SessionContext()
765765
pdf = pd.DataFrame({"value": [1, 2, 3]})
766766
assert len(pdf) == 3
767767

0 commit comments

Comments
 (0)