1919
2020from __future__ import annotations
2121
22- import inspect
23- import re
2422import warnings
25- import weakref
2623from typing import TYPE_CHECKING , Any , Protocol
2724
2825try :
@@ -104,7 +101,6 @@ def __init__(self, config_options: dict[str, str] | None = None) -> None:
104101 config_options: Configuration options.
105102 """
106103 self .config_internal = SessionConfigInternal (config_options )
107- self ._python_table_lookup = False
108104
109105 def with_create_default_catalog_and_schema (
110106 self , enabled : bool = True
@@ -274,11 +270,6 @@ def with_parquet_pruning(self, enabled: bool = True) -> SessionConfig:
274270 self .config_internal = self .config_internal .with_parquet_pruning (enabled )
275271 return self
276272
277- def with_python_table_lookup (self , enabled : bool = True ) -> SessionConfig :
278- """Enable implicit table lookup for Python objects when running SQL."""
279- self ._python_table_lookup = enabled
280- return self
281-
282273 def set (self , key : str , value : str ) -> SessionConfig :
283274 """Set a configuration option.
284275
@@ -492,8 +483,6 @@ def __init__(
492483 self ,
493484 config : SessionConfig | None = None ,
494485 runtime : RuntimeEnvBuilder | None = None ,
495- * ,
496- auto_register_python_objects : bool | None = None ,
497486 ) -> None :
498487 """Main interface for executing queries with DataFusion.
499488
@@ -504,12 +493,6 @@ def __init__(
504493 Args:
505494 config: Session configuration options.
506495 runtime: Runtime configuration options.
507- auto_register_python_objects: Automatically register referenced
508- Python objects (such as pandas or PyArrow data) when ``sql``
509- queries reference them by name. When omitted, this defaults to
510- the value configured via
511- :py:meth:`~datafusion.SessionConfig.with_python_table_lookup`
512- (``False`` unless explicitly enabled).
513496
514497 Example usage:
515498
@@ -521,22 +504,10 @@ def __init__(
521504 ctx = SessionContext()
522505 df = ctx.read_csv("data.csv")
523506 """
524- self .ctx = SessionContextInternal (
525- config .config_internal if config is not None else None ,
526- runtime .config_internal if runtime is not None else None ,
527- )
528-
529- # Determine the final value for python table lookup
530- if auto_register_python_objects is not None :
531- auto_python_table_lookup = auto_register_python_objects
532- else :
533- # Default to session config value or False if not configured
534- auto_python_table_lookup = getattr (config , "_python_table_lookup" , False )
507+ config = config .config_internal if config is not None else None
508+ runtime = runtime .config_internal if runtime is not None else None
535509
536- self ._auto_python_table_lookup = bool (auto_python_table_lookup )
537- self ._python_table_bindings : dict [
538- str , tuple [weakref .ReferenceType [Any ] | None , int ]
539- ] = {}
510+ self .ctx = SessionContextInternal (config , runtime )
540511
541512 def __repr__ (self ) -> str :
542513 """Print a string representation of the Session Context."""
@@ -563,27 +534,8 @@ def enable_url_table(self) -> SessionContext:
563534 klass = self .__class__
564535 obj = klass .__new__ (klass )
565536 obj .ctx = self .ctx .enable_url_table ()
566- obj ._auto_python_table_lookup = getattr (
567- self , "_auto_python_table_lookup" , False
568- )
569- obj ._python_table_bindings = getattr (self , "_python_table_bindings" , {}).copy ()
570537 return obj
571538
572- def set_python_table_lookup (self , enabled : bool = True ) -> SessionContext :
573- """Enable or disable automatic registration of Python objects in SQL.
574-
575- Args:
576- enabled: When ``True``, SQL queries automatically attempt to
577- resolve missing table names by looking up Python objects in the
578- caller's scope. Use ``False`` to require explicit registration
579- of any referenced tables.
580-
581- Returns:
582- The current :py:class:`SessionContext` instance for chaining.
583- """
584- self ._auto_python_table_lookup = enabled
585- return self
586-
587539 def register_object_store (
588540 self , schema : str , store : Any , host : str | None = None
589541 ) -> None :
@@ -648,34 +600,9 @@ def sql(self, query: str, options: SQLOptions | None = None) -> DataFrame:
648600 Returns:
649601 DataFrame representation of the SQL query.
650602 """
651-
652- def _execute_sql () -> DataFrame :
653- if options is None :
654- return DataFrame (self .ctx .sql (query ))
655- return DataFrame (self .ctx .sql_with_options (query , options .options_internal ))
656-
657- auto_lookup_enabled = getattr (self , "_auto_python_table_lookup" , False )
658-
659- if auto_lookup_enabled :
660- self ._refresh_python_table_bindings ()
661-
662- while True :
663- try :
664- return _execute_sql ()
665- except Exception as err : # noqa: PERF203
666- if not auto_lookup_enabled :
667- raise
668-
669- missing_tables = self ._extract_missing_table_names (err )
670- if not missing_tables :
671- raise
672-
673- registered = self ._register_python_tables (missing_tables )
674- if not registered :
675- raise
676-
677- # Retry to allow registering additional tables referenced in the query.
678- continue
603+ if options is None :
604+ return DataFrame (self .ctx .sql (query ))
605+ return DataFrame (self .ctx .sql_with_options (query , options .options_internal ))
679606
680607 def sql_with_options (self , query : str , options : SQLOptions ) -> DataFrame :
681608 """Create a :py:class:`~datafusion.dataframe.DataFrame` from SQL query text.
@@ -692,138 +619,6 @@ def sql_with_options(self, query: str, options: SQLOptions) -> DataFrame:
692619 """
693620 return self .sql (query , options )
694621
695- @staticmethod
696- def _extract_missing_table_names (err : Exception ) -> list [str ]:
697- def _normalize (names : list [Any ]) -> list [str ]:
698- tables : list [str ] = []
699- for raw_name in names :
700- if not raw_name :
701- continue
702- raw_str = str (raw_name )
703- tables .append (raw_str .rsplit ("." , 1 )[- 1 ])
704- return tables
705-
706- missing_tables = getattr (err , "missing_table_names" , None )
707- if missing_tables is not None :
708- if isinstance (missing_tables , str ):
709- candidates : list [Any ] = [missing_tables ]
710- else :
711- try :
712- candidates = list (missing_tables )
713- except TypeError :
714- candidates = [missing_tables ]
715-
716- return _normalize (candidates )
717-
718- message = str (err )
719- matches = set ()
720- for pattern in (r"table '([^']+)' not found" , r"No table named '([^']+)'" ):
721- matches .update (re .findall (pattern , message ))
722-
723- return _normalize (list (matches ))
724-
725- def _register_python_tables (self , tables : list [str ]) -> bool :
726- registered_any = False
727- for table_name in tables :
728- if not table_name or self .table_exist (table_name ):
729- continue
730-
731- python_obj = self ._lookup_python_object (table_name )
732- if python_obj is None :
733- continue
734-
735- if self ._register_python_object (table_name , python_obj ):
736- registered_any = True
737-
738- return registered_any
739-
740- @staticmethod
741- def _lookup_python_object (name : str ) -> Any | None :
742- frame = inspect .currentframe ()
743- try :
744- frame = frame .f_back if frame is not None else None
745- lower_name = name .lower ()
746-
747- def _match (mapping : dict [str , Any ]) -> Any | None :
748- value = mapping .get (name )
749- if value is not None :
750- return value
751-
752- for key , candidate in mapping .items ():
753- if (
754- isinstance (key , str )
755- and key .lower () == lower_name
756- and candidate is not None
757- ):
758- return candidate
759-
760- return None
761-
762- while frame is not None :
763- for scope in (frame .f_locals , frame .f_globals ):
764- match = _match (scope )
765- if match is not None :
766- return match
767- frame = frame .f_back
768- finally :
769- del frame
770- return None
771-
772- def _refresh_python_table_bindings (self ) -> None :
773- bindings = getattr (self , "_python_table_bindings" , {})
774- for table_name , (obj_ref , cached_id ) in list (bindings .items ()):
775- cached_obj = obj_ref () if obj_ref is not None else None
776- current_obj = self ._lookup_python_object (table_name )
777- weakref_dead = obj_ref is not None and cached_obj is None
778- id_mismatch = current_obj is not None and id (current_obj ) != cached_id
779-
780- if not (weakref_dead or id_mismatch ):
781- continue
782-
783- self .deregister_table (table_name )
784-
785- if current_obj is None :
786- bindings .pop (table_name , None )
787- continue
788-
789- if self ._register_python_object (table_name , current_obj ):
790- continue
791-
792- bindings .pop (table_name , None )
793-
794- def _register_python_object (self , name : str , obj : Any ) -> bool :
795- registered = False
796-
797- if isinstance (obj , DataFrame ):
798- self .register_view (name , obj )
799- registered = True
800- elif (
801- obj .__class__ .__module__ .startswith ("polars." )
802- and obj .__class__ .__name__ == "DataFrame"
803- ):
804- self .from_polars (obj , name = name )
805- registered = True
806- elif (
807- obj .__class__ .__module__ .startswith ("pandas." )
808- and obj .__class__ .__name__ == "DataFrame"
809- ):
810- self .from_pandas (obj , name = name )
811- registered = True
812- elif isinstance (obj , (pa .Table , pa .RecordBatch , pa .RecordBatchReader )) or (
813- hasattr (obj , "__arrow_c_stream__" ) or hasattr (obj , "__arrow_c_array__" )
814- ):
815- self .from_arrow (obj , name = name )
816- registered = True
817-
818- if registered :
819- try :
820- reference : weakref .ReferenceType [Any ] | None = weakref .ref (obj )
821- except TypeError :
822- reference = None
823- self ._python_table_bindings [name ] = (reference , id (obj ))
824-
825- return registered
826-
827622 def create_dataframe (
828623 self ,
829624 partitions : list [list [pa .RecordBatch ]],
@@ -961,7 +756,6 @@ def register_table(self, name: str, table: Table) -> None:
961756 def deregister_table (self , name : str ) -> None :
962757 """Remove a table from the session."""
963758 self .ctx .deregister_table (name )
964- self ._python_table_bindings .pop (name , None )
965759
966760 def catalog_names (self ) -> set [str ]:
967761 """Returns the list of catalogs in this context."""
0 commit comments