From ed4e617c6186940ad6a0840ddabceca509613db3 Mon Sep 17 00:00:00 2001 From: Ben Constable Date: Tue, 21 Jan 2025 20:07:16 +0000 Subject: [PATCH 1/9] Update docs and postgresql connection --- text_2_sql/__init__.py | 0 text_2_sql/data_dictionary/README.md | 3 ++- .../src/text_2_sql_core/connectors/postgresql_sql.py | 7 ++++++- .../src/text_2_sql_core/data_dictionary/cli.py | 11 +++++++++++ .../data_dictionary/data_dictionary_creator.py | 12 +++++------- 5 files changed, 24 insertions(+), 9 deletions(-) delete mode 100644 text_2_sql/__init__.py diff --git a/text_2_sql/__init__.py b/text_2_sql/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/text_2_sql/data_dictionary/README.md b/text_2_sql/data_dictionary/README.md index 92dd92f0..3c9728b7 100644 --- a/text_2_sql/data_dictionary/README.md +++ b/text_2_sql/data_dictionary/README.md @@ -224,7 +224,8 @@ If there is no pre-built script for your database engine, take one of the above ## Running -1. Create your `.env` file based on the provided sample `.env.example`. Place this file in the same place as the `.env.example`. +**Execute all these commands in the `text_2_sql` directory.** +1. Create your `.env` file based on the provided sample `text_2_sql/.env.example`. Place this file in the same place in `text_2_sql/.env`. 2. Package and install the `text_2_sql_core` library. See [build](https://docs.astral.sh/uv/concepts/projects/build/) if you want to build as a wheel and install on an agent. Or you can run from within a `uv` environment and skip packaging. - Install the optional dependencies if you need a database connector other than TSQL. `uv sync --extra ` 3. Run `data_dictionary ` diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/postgresql_sql.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/postgresql_sql.py index f1060417..00f4a13d 100644 --- a/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/postgresql_sql.py +++ b/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/postgresql_sql.py @@ -16,6 +16,11 @@ def __init__(self): self.database_engine = DatabaseEngine.POSTGRESQL + @property + def engine_specific_rules(self) -> str: + """Get the engine specific rules.""" + return "" + @property def engine_specific_fields(self) -> list[str]: """Get the engine specific fields.""" @@ -64,7 +69,7 @@ async def query_execution( connection_string = os.environ["Text2Sql__DatabaseConnectionString"] # Establish an asynchronous connection to the PostgreSQL database - async with psycopg.AsyncConnection.connect(connection_string) as conn: + async with await psycopg.AsyncConnection.connect(connection_string) as conn: # Create an asynchronous cursor async with conn.cursor() as cursor: await cursor.execute(sql_query) diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/cli.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/cli.py index 586f4660..9c858ae0 100644 --- a/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/cli.py +++ b/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/cli.py @@ -5,6 +5,7 @@ import logging import typer from rich import print as rich_print +from tenacity import RetryError logging.basicConfig(level=logging.INFO) @@ -112,8 +113,18 @@ def create( try: asyncio.run(data_dictionary_creator.create_data_dictionary()) + except RetryError as e: + # Fetch the actual exception + e = e.last_attempt.exception() + logging.error(e) + rich_print("Text2SQL Data Dictionary Creator Failed ❌") + + rich_print(f"Error Messages: {e}") + + raise typer.Exit(code=1) except Exception as e: logging.error(e) + rich_print("Text2SQL Data Dictionary Creator Failed ❌") rich_print(f"Error Messages: {e}") diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/data_dictionary_creator.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/data_dictionary_creator.py index e61c1381..890d5e97 100644 --- a/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/data_dictionary_creator.py +++ b/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/data_dictionary_creator.py @@ -21,7 +21,7 @@ class ForeignKeyRelationship(BaseModel): column: str = Field(..., alias="Column") foreign_column: str = Field(..., alias="ForeignColumn") - model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True) + model_config = ConfigDict(populate_by_name=True) class EntityRelationship(BaseModel): @@ -39,7 +39,7 @@ class EntityRelationship(BaseModel): foreign_database: Optional[str] = Field(default=None, alias="ForeignDatabase") foreign_catalog: Optional[str] = Field(default=None, alias="ForeignCatalog") - model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True) + model_config = ConfigDict(populate_by_name=True) def pivot(self): """A method to pivot the entity relationship.""" @@ -128,12 +128,10 @@ class ColumnItem(BaseModel): name: str = Field(..., alias="Name") data_type: str = Field(..., alias="DataType") definition: Optional[str] = Field(..., alias="Definition") - distinct_values: Optional[list[any]] = Field( - None, alias="DistinctValues", exclude=True - ) - sample_values: Optional[list[any]] = Field(None, alias="SampleValues") + distinct_values: Optional[list] = Field(None, alias="DistinctValues", exclude=True) + sample_values: Optional[list] = Field(None, alias="SampleValues") - model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True) + model_config = ConfigDict(populate_by_name=True) def value_store_entry( self, entity, distinct_value, excluded_fields_for_database_engine From 04825d4d3a06f5fe53bab80919563af928abb6d6 Mon Sep 17 00:00:00 2001 From: Ben Constable Date: Tue, 21 Jan 2025 20:20:02 +0000 Subject: [PATCH 2/9] Update old code --- text_2_sql/.env.example | 11 +++++++---- .../autogen/evaluate_autogen_text2sql.ipynb | 4 ++-- .../parallel_query_solving_agent.py | 8 ++++---- .../inner_autogen_text_2_sql.py | 14 ++++++-------- .../connectors/postgresql_sql.py | 2 +- .../connectors/snowflake_sql.py | 2 +- .../text_2_sql_core/connectors/sqlite_sql.py | 18 ++++++++++++------ .../src/text_2_sql_core/connectors/tsql_sql.py | 2 +- .../postgresql_data_dictionary_creator.py | 2 +- .../snowflake_data_dictionary_creator.py | 2 +- .../tsql_data_dictionary_creator.py | 2 +- 11 files changed, 37 insertions(+), 30 deletions(-) diff --git a/text_2_sql/.env.example b/text_2_sql/.env.example index f08cf6dd..69e624ef 100644 --- a/text_2_sql/.env.example +++ b/text_2_sql/.env.example @@ -17,17 +17,20 @@ AIService__AzureSearchOptions__Text2SqlQueryCache__Index= AIService__AzureSearchOptions__Text2SqlColumnValueStore__Index= -# All SQL Engine specific connection details -Text2Sql__DatabaseName= +# TSQL +Text2Sql__Tsql__ConnectionString= +Text2Sql__Database= -# TSQL or PostgreSQL Specific Connection Details -Text2Sql__DatabaseConnectionString= +# PostgreSQL Specific Connection Details +Text2Sql__Postgresql__ConnectionString= +Text2Sql__Postgresql__Database= # Snowflake Specific Connection Details Text2Sql__Snowflake__User= Text2Sql__Snowflake__Password= Text2Sql__Snowflake__Account= Text2Sql__Snowflake__Warehouse= +Text2Sql__Snowflake__Database= # Databricks Specific Connection Details Text2Sql__Databricks__Catalog= diff --git a/text_2_sql/autogen/evaluate_autogen_text2sql.ipynb b/text_2_sql/autogen/evaluate_autogen_text2sql.ipynb index 2b38a091..16c8c164 100644 --- a/text_2_sql/autogen/evaluate_autogen_text2sql.ipynb +++ b/text_2_sql/autogen/evaluate_autogen_text2sql.ipynb @@ -215,8 +215,8 @@ " \n", " # Update database connection string for current database\n", " db_path = DATABASE_DIR / db_id / f\"{db_id}.sqlite\"\n", - " os.environ[\"Text2Sql__DatabaseConnectionString\"] = str(db_path)\n", - " os.environ[\"Text2Sql__DatabaseName\"] = db_id\n", + " os.environ[\"Text2Sql__Tsql__ConnectionString\"] = str(db_path)\n", + " os.environ[\"Text2Sql__Database\"] = db_id\n", " \n", " sql = await generate_sql(question)\n", " predictions.append(f\"{sql}\\t{db_id}\")\n", diff --git a/text_2_sql/autogen/src/autogen_text_2_sql/custom_agents/parallel_query_solving_agent.py b/text_2_sql/autogen/src/autogen_text_2_sql/custom_agents/parallel_query_solving_agent.py index e343729e..3aec8f9f 100644 --- a/text_2_sql/autogen/src/autogen_text_2_sql/custom_agents/parallel_query_solving_agent.py +++ b/text_2_sql/autogen/src/autogen_text_2_sql/custom_agents/parallel_query_solving_agent.py @@ -219,12 +219,12 @@ async def consume_inner_messages_from_agentic_flow( # Add database connection info to injected parameters query_params = injected_parameters.copy() if injected_parameters else {} - if "Text2Sql__DatabaseConnectionString" in os.environ: + if "Text2Sql__Tsql__ConnectionString" in os.environ: query_params["database_connection_string"] = os.environ[ - "Text2Sql__DatabaseConnectionString" + "Text2Sql__Tsql__ConnectionString" ] - if "Text2Sql__DatabaseName" in os.environ: - query_params["database_name"] = os.environ["Text2Sql__DatabaseName"] + if "Text2Sql__Database" in os.environ: + query_params["database_name"] = os.environ["Text2Sql__Database"] # Launch tasks for each sub-query inner_solving_generators.append( diff --git a/text_2_sql/autogen/src/autogen_text_2_sql/inner_autogen_text_2_sql.py b/text_2_sql/autogen/src/autogen_text_2_sql/inner_autogen_text_2_sql.py index a83000d9..c08983fd 100644 --- a/text_2_sql/autogen/src/autogen_text_2_sql/inner_autogen_text_2_sql.py +++ b/text_2_sql/autogen/src/autogen_text_2_sql/inner_autogen_text_2_sql.py @@ -45,27 +45,25 @@ def __init__(self, **kwargs: dict): self.set_mode() # Store original environment variables - self.original_db_conn = os.environ.get("Text2Sql__DatabaseConnectionString") - self.original_db_name = os.environ.get("Text2Sql__DatabaseName") + self.original_db_conn = os.environ.get("Text2Sql__Tsql__ConnectionString") + self.original_db_name = os.environ.get("Text2Sql__Database") def _update_environment(self, injected_parameters: dict = None): """Update environment variables with injected parameters.""" if injected_parameters: if "database_connection_string" in injected_parameters: - os.environ["Text2Sql__DatabaseConnectionString"] = injected_parameters[ + os.environ["Text2Sql__Tsql__ConnectionString"] = injected_parameters[ "database_connection_string" ] if "database_name" in injected_parameters: - os.environ["Text2Sql__DatabaseName"] = injected_parameters[ - "database_name" - ] + os.environ["Text2Sql__Database"] = injected_parameters["database_name"] def _restore_environment(self): """Restore original environment variables.""" if self.original_db_conn: - os.environ["Text2Sql__DatabaseConnectionString"] = self.original_db_conn + os.environ["Text2Sql__Tsql__ConnectionString"] = self.original_db_conn if self.original_db_name: - os.environ["Text2Sql__DatabaseName"] = self.original_db_name + os.environ["Text2Sql__Database"] = self.original_db_name def set_mode(self): """Set the mode of the plugin based on the environment variables.""" diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/postgresql_sql.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/postgresql_sql.py index 00f4a13d..4192e8d1 100644 --- a/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/postgresql_sql.py +++ b/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/postgresql_sql.py @@ -66,7 +66,7 @@ async def query_execution( """ logging.info(f"Running query: {sql_query}") results = [] - connection_string = os.environ["Text2Sql__DatabaseConnectionString"] + connection_string = os.environ["Text2Sql__Postgresql__ConnectionString"] # Establish an asynchronous connection to the PostgreSQL database async with await psycopg.AsyncConnection.connect(connection_string) as conn: diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/snowflake_sql.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/snowflake_sql.py index 5f40627e..8de8743c 100644 --- a/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/snowflake_sql.py +++ b/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/snowflake_sql.py @@ -100,7 +100,7 @@ async def query_execution( password=os.environ["Text2Sql__Snowflake__Password"], account=os.environ["Text2Sql__Snowflake__Account"], warehouse=os.environ["Text2Sql__Snowflake__Warehouse"], - database=os.environ["Text2Sql__DatabaseName"], + database=os.environ["Text2Sql__Database"], ) try: diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/sqlite_sql.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/sqlite_sql.py index 16548cd4..5e35df62 100644 --- a/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/sqlite_sql.py +++ b/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/sqlite_sql.py @@ -63,7 +63,7 @@ async def query_execution( Returns: List of dictionaries containing query results. """ - db_file = os.environ["Text2Sql__DatabaseConnectionString"] + db_file = os.environ["Text2Sql__Tsql__ConnectionString"] if not os.path.exists(db_file): raise FileNotFoundError(f"Database file not found: {db_file}") @@ -127,7 +127,9 @@ def find_matching_tables(self, text: str, table_names: list[str]) -> list[int]: List of matching table indices """ matches = [] - logging.info(f"Looking for tables matching '{text}' in tables: {table_names}") + logging.info( + "Looking for tables matching '%s' in tables: %s", text, table_names + ) # First try exact matches for idx, name in enumerate(table_names): @@ -144,7 +146,9 @@ def find_matching_tables(self, text: str, table_names: list[str]) -> list[int]: for idx, name in enumerate(table_names): table_terms = set(re.split(r"[_\s]+", name.lower())) if search_terms & table_terms: # If there's any overlap in terms - logging.info(f"Found partial match: '{name}' with terms {table_terms}") + logging.info( + "Found partial match: '%s' with terms %s", name, table_terms + ) matches.append(idx) return matches @@ -181,7 +185,7 @@ async def get_entity_schemas( spider_schemas = json.load(f) # Get current database name from path - db_path = os.environ["Text2Sql__DatabaseConnectionString"] + db_path = os.environ["Text2Sql__Tsql__ConnectionString"] db_name = os.path.splitext(os.path.basename(db_path))[0] logging.info(f"Looking for schemas in database: {db_name}") @@ -196,7 +200,7 @@ async def get_entity_schemas( if not db_schema: raise ValueError(f"Schema not found for database: {db_name}") - logging.info(f"Looking for tables matching '{text}' in database '{db_name}'") + logging.info("Looking for tables matching '%s' in database '%s'", text, db_name) logging.info(f"Available tables: {db_schema['table_names']}") # Find all matching tables using flexible matching @@ -228,7 +232,9 @@ async def get_entity_schemas( } schemas.append(schema) logging.info( - f"Added schema for table '{db_schema['table_names'][table_idx]}': {schema}" + "Added schema for table '%s': %s", + db_schema["table_names"][table_idx], + schema, ) if as_json: diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/tsql_sql.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/tsql_sql.py index 3cf6bcd5..adca1f8c 100644 --- a/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/tsql_sql.py +++ b/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/tsql_sql.py @@ -86,7 +86,7 @@ async def query_execution( """ logging.info(f"Running query: {sql_query}") results = [] - connection_string = os.environ["Text2Sql__DatabaseConnectionString"] + connection_string = os.environ["Text2Sql__Tsql__ConnectionString"] async with await aioodbc.connect(dsn=connection_string) as sql_db_client: async with sql_db_client.cursor() as cursor: await cursor.execute(sql_query) diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/postgresql_data_dictionary_creator.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/postgresql_data_dictionary_creator.py index d5116594..a99b56fb 100644 --- a/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/postgresql_data_dictionary_creator.py +++ b/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/postgresql_data_dictionary_creator.py @@ -16,7 +16,7 @@ def __init__(self, **kwargs): excluded_schemas = ["information_schema", "pg_catalog"] super().__init__(excluded_schemas=excluded_schemas, **kwargs) - self.database = os.environ["Text2Sql__DatabaseName"] + self.database = os.environ["Text2Sql__Postgresql__Database"] self.database_engine = DatabaseEngine.POSTGRESQL self.sql_connector = PostgresqlSqlConnector() diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/snowflake_data_dictionary_creator.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/snowflake_data_dictionary_creator.py index fc5a1ded..b4570eee 100644 --- a/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/snowflake_data_dictionary_creator.py +++ b/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/snowflake_data_dictionary_creator.py @@ -18,7 +18,7 @@ def __init__(self, **kwargs): excluded_schemas = ["INFORMATION_SCHEMA"] super().__init__(excluded_schemas=excluded_schemas, **kwargs) - self.database = os.environ["Text2Sql__DatabaseName"] + self.database = os.environ["Text2Sql__Snowflake__Database"] self.warehouse = os.environ["Text2Sql__Snowflake__Warehouse"] self.database_engine = DatabaseEngine.SNOWFLAKE diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/tsql_data_dictionary_creator.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/tsql_data_dictionary_creator.py index 2d360851..7557f4b7 100644 --- a/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/tsql_data_dictionary_creator.py +++ b/text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/tsql_data_dictionary_creator.py @@ -21,7 +21,7 @@ def __init__(self, **kwargs): """ excluded_schemas = ["dbo", "sys"] super().__init__(excluded_schemas=excluded_schemas, **kwargs) - self.database = os.environ["Text2Sql__DatabaseName"] + self.database = os.environ["Text2Sql__Tsql__Database"] self.database_engine = DatabaseEngine.TSQL From c99db0c0315ee83030efb160bbb5850d8f98cdf6 Mon Sep 17 00:00:00 2001 From: Ben Constable Date: Tue, 21 Jan 2025 20:21:47 +0000 Subject: [PATCH 3/9] Update --- text_2_sql/.env.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text_2_sql/.env.example b/text_2_sql/.env.example index 69e624ef..1fd0e83c 100644 --- a/text_2_sql/.env.example +++ b/text_2_sql/.env.example @@ -19,7 +19,7 @@ AIService__AzureSearchOptions__Text2SqlColumnValueStore__Index= -Text2Sql__Database= +Text2Sql__Tsql__Database= # PostgreSQL Specific Connection Details Text2Sql__Postgresql__ConnectionString= From e48539387df88621e85bd91e4f8f414009a5dd81 Mon Sep 17 00:00:00 2001 From: Ben Constable Date: Tue, 21 Jan 2025 20:26:54 +0000 Subject: [PATCH 4/9] Update envs --- .../custom_agents/parallel_query_solving_agent.py | 4 ++-- .../src/autogen_text_2_sql/inner_autogen_text_2_sql.py | 8 +++++--- .../src/text_2_sql_core/connectors/snowflake_sql.py | 2 +- .../src/text_2_sql_core/utils/environment.py | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/text_2_sql/autogen/src/autogen_text_2_sql/custom_agents/parallel_query_solving_agent.py b/text_2_sql/autogen/src/autogen_text_2_sql/custom_agents/parallel_query_solving_agent.py index 3aec8f9f..bbd77583 100644 --- a/text_2_sql/autogen/src/autogen_text_2_sql/custom_agents/parallel_query_solving_agent.py +++ b/text_2_sql/autogen/src/autogen_text_2_sql/custom_agents/parallel_query_solving_agent.py @@ -223,8 +223,8 @@ async def consume_inner_messages_from_agentic_flow( query_params["database_connection_string"] = os.environ[ "Text2Sql__Tsql__ConnectionString" ] - if "Text2Sql__Database" in os.environ: - query_params["database_name"] = os.environ["Text2Sql__Database"] + if "Text2Sql__Tsql__Database" in os.environ: + query_params["database_name"] = os.environ["Text2Sql__Tsql__Database"] # Launch tasks for each sub-query inner_solving_generators.append( diff --git a/text_2_sql/autogen/src/autogen_text_2_sql/inner_autogen_text_2_sql.py b/text_2_sql/autogen/src/autogen_text_2_sql/inner_autogen_text_2_sql.py index c08983fd..454ec332 100644 --- a/text_2_sql/autogen/src/autogen_text_2_sql/inner_autogen_text_2_sql.py +++ b/text_2_sql/autogen/src/autogen_text_2_sql/inner_autogen_text_2_sql.py @@ -46,7 +46,7 @@ def __init__(self, **kwargs: dict): # Store original environment variables self.original_db_conn = os.environ.get("Text2Sql__Tsql__ConnectionString") - self.original_db_name = os.environ.get("Text2Sql__Database") + self.original_db_name = os.environ.get("Text2Sql__Tsql__Database") def _update_environment(self, injected_parameters: dict = None): """Update environment variables with injected parameters.""" @@ -56,14 +56,16 @@ def _update_environment(self, injected_parameters: dict = None): "database_connection_string" ] if "database_name" in injected_parameters: - os.environ["Text2Sql__Database"] = injected_parameters["database_name"] + os.environ["Text2Sql__Tsql__Database"] = injected_parameters[ + "database_name" + ] def _restore_environment(self): """Restore original environment variables.""" if self.original_db_conn: os.environ["Text2Sql__Tsql__ConnectionString"] = self.original_db_conn if self.original_db_name: - os.environ["Text2Sql__Database"] = self.original_db_name + os.environ["Text2Sql__Tsql__Database"] = self.original_db_name def set_mode(self): """Set the mode of the plugin based on the environment variables.""" diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/snowflake_sql.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/snowflake_sql.py index 8de8743c..49d7a436 100644 --- a/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/snowflake_sql.py +++ b/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/snowflake_sql.py @@ -100,7 +100,7 @@ async def query_execution( password=os.environ["Text2Sql__Snowflake__Password"], account=os.environ["Text2Sql__Snowflake__Account"], warehouse=os.environ["Text2Sql__Snowflake__Warehouse"], - database=os.environ["Text2Sql__Database"], + database=os.environ["Text2Sql__Snowflake__Database"], ) try: diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/utils/environment.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/utils/environment.py index 232254ee..c43edecd 100644 --- a/text_2_sql/text_2_sql_core/src/text_2_sql_core/utils/environment.py +++ b/text_2_sql/text_2_sql_core/src/text_2_sql_core/utils/environment.py @@ -18,7 +18,7 @@ def get_identity_type() -> IdentityType: Returns: IdentityType: The identity type """ - identity = os.environ.get("IdentityType") + identity = os.environ["IdentityType"] if identity == "user_assigned": return IdentityType.USER_ASSIGNED From 9e0c18a77852930089ea8096ca088d318b3b3673 Mon Sep 17 00:00:00 2001 From: Ben Constable Date: Tue, 21 Jan 2025 20:30:17 +0000 Subject: [PATCH 5/9] Update example --- text_2_sql/.env.example | 3 +++ 1 file changed, 3 insertions(+) diff --git a/text_2_sql/.env.example b/text_2_sql/.env.example index 1fd0e83c..b8b13509 100644 --- a/text_2_sql/.env.example +++ b/text_2_sql/.env.example @@ -1,5 +1,8 @@ # Environment variables for Text2SQL IdentityType= # system_assigned or user_assigned or key +Text2Sql__UseQueryCache= # True or False +Text2Sql__PreRunQueryCache= # True or False +Text2Sql__UseColumnValueStore= # True or False # Open AI Connection Details OpenAI__CompletionDeployment= From 6ce126d720f8d157dff4ba47ed734e6b0a4e0a93 Mon Sep 17 00:00:00 2001 From: Ben Constable Date: Tue, 21 Jan 2025 20:31:54 +0000 Subject: [PATCH 6/9] Update README --- text_2_sql/GETTING_STARTED.md | 2 ++ text_2_sql/data_dictionary/README.md | 3 +++ 2 files changed, 5 insertions(+) diff --git a/text_2_sql/GETTING_STARTED.md b/text_2_sql/GETTING_STARTED.md index 9acaea55..140a7afd 100644 --- a/text_2_sql/GETTING_STARTED.md +++ b/text_2_sql/GETTING_STARTED.md @@ -1,5 +1,7 @@ # Getting Started with Agentic Text2SQL Component +**Execute all these commands in the `text_2_sql` directory.** + To get started, perform the following steps: 1. Setup Azure OpenAI in your subscription with **gpt-4o-mini** & an embedding model, alongside a SQL Server sample database, AI Search and a storage account. diff --git a/text_2_sql/data_dictionary/README.md b/text_2_sql/data_dictionary/README.md index 3c9728b7..01641727 100644 --- a/text_2_sql/data_dictionary/README.md +++ b/text_2_sql/data_dictionary/README.md @@ -225,6 +225,9 @@ If there is no pre-built script for your database engine, take one of the above ## Running **Execute all these commands in the `text_2_sql` directory.** + +To generate a data dictionary, perform the following steps: + 1. Create your `.env` file based on the provided sample `text_2_sql/.env.example`. Place this file in the same place in `text_2_sql/.env`. 2. Package and install the `text_2_sql_core` library. See [build](https://docs.astral.sh/uv/concepts/projects/build/) if you want to build as a wheel and install on an agent. Or you can run from within a `uv` environment and skip packaging. - Install the optional dependencies if you need a database connector other than TSQL. `uv sync --extra ` From 85f11176efb88945a406b56de60cac4449d78bd1 Mon Sep 17 00:00:00 2001 From: Ben Constable Date: Tue, 21 Jan 2025 20:36:02 +0000 Subject: [PATCH 7/9] Update README --- text_2_sql/GETTING_STARTED.md | 10 ++++++++-- text_2_sql/data_dictionary/README.md | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/text_2_sql/GETTING_STARTED.md b/text_2_sql/GETTING_STARTED.md index 140a7afd..760fbce1 100644 --- a/text_2_sql/GETTING_STARTED.md +++ b/text_2_sql/GETTING_STARTED.md @@ -1,6 +1,6 @@ # Getting Started with Agentic Text2SQL Component -**Execute all these commands in the `text_2_sql` directory.** +**Execute the following commands in the `text_2_sql` directory.** To get started, perform the following steps: @@ -12,4 +12,10 @@ To get started, perform the following steps: 4. Create your `.env` file based on the provided sample `.env.example`. Place this file in the same place as the `.env.example`. 5. Generate a data dictionary for your target server using the instructions in the **Running** section of the `data_dictionary/README.md`. 6. Upload these generated data dictionaries files to the relevant containers in your storage account. Wait for them to be automatically indexed with the included skillsets. -7. Navigate to `autogen` directory to view the AutoGen implementation. Follow the steps in `Iteration 5 - Agentic Vector Based Text2SQL.ipynb` to get started. + +**Execute the following commands in the `autogen` directory.** + +7. Run `uv sync` within the text_2_sql directory to install dependencies. + - Install the optional dependencies if you need a database connector other than TSQL. `uv sync --extra ` + - See the supported connectors in `text_2_sql_core/src/text_2_sql_core/connectors`. +8. Navigate to `autogen` directory to view the AutoGen implementation. Follow the steps in `Iteration 5 - Agentic Vector Based Text2SQL.ipynb` to get started. diff --git a/text_2_sql/data_dictionary/README.md b/text_2_sql/data_dictionary/README.md index 01641727..2c7585ba 100644 --- a/text_2_sql/data_dictionary/README.md +++ b/text_2_sql/data_dictionary/README.md @@ -224,7 +224,7 @@ If there is no pre-built script for your database engine, take one of the above ## Running -**Execute all these commands in the `text_2_sql` directory.** +**Execute the following commands in the `text_2_sql` directory.** To generate a data dictionary, perform the following steps: From 9955524f1b682cda8243d0dfb1eae3df2c5d3db1 Mon Sep 17 00:00:00 2001 From: Ben Constable Date: Tue, 21 Jan 2025 20:41:04 +0000 Subject: [PATCH 8/9] Update example --- text_2_sql/.env.example | 2 ++ 1 file changed, 2 insertions(+) diff --git a/text_2_sql/.env.example b/text_2_sql/.env.example index b8b13509..21c358fb 100644 --- a/text_2_sql/.env.example +++ b/text_2_sql/.env.example @@ -1,5 +1,7 @@ # Environment variables for Text2SQL IdentityType= # system_assigned or user_assigned or key + +Text2Sql__DatabaseEngine= # TSQL or PostgreSQL or Snowflake or Databricks Text2Sql__UseQueryCache= # True or False Text2Sql__PreRunQueryCache= # True or False Text2Sql__UseColumnValueStore= # True or False From baf116cb5bfe54920e6ca3db306830768ac2da65 Mon Sep 17 00:00:00 2001 From: Ben Constable Date: Tue, 21 Jan 2025 20:45:00 +0000 Subject: [PATCH 9/9] Update text2sql --- deploy_ai_search/README.md | 6 +++--- text_2_sql/GETTING_STARTED.md | 25 +++++++++++++++---------- text_2_sql/data_dictionary/README.md | 4 ++-- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/deploy_ai_search/README.md b/deploy_ai_search/README.md index 5df7adb5..bd3ebb1c 100644 --- a/deploy_ai_search/README.md +++ b/deploy_ai_search/README.md @@ -1,8 +1,8 @@ # AI Search Indexing Pre-built Index Setup -The associated scripts in this portion of the repository contains pre-built scripts to deploy the skillset with Azure Document Intelligence. +The associated scripts in this portion of the repository contains pre-built scripts to deploy the skillsets needed for both Text2SQL and Image Processing. -## Steps for Rag Documents Index Deployment (For Unstructured RAG) +## Steps for Rag Documents Index Deployment (For Image Processing) 1. Update `.env` file with the associated values. Not all values are required dependent on whether you are using System / User Assigned Identities or a Key based authentication. 2. Adjust `rag_documents.py` with any changes to the index / indexer. The `get_skills()` method implements the skills pipeline. Make any adjustments here in the skills needed to enrich the data source. @@ -13,7 +13,7 @@ The associated scripts in this portion of the repository contains pre-built scri - `rebuild`. Whether to delete and rebuild the index. - `suffix`. Optional parameter that will apply a suffix onto the deployed index and indexer. This is useful if you want deploy a test version, before overwriting the main version. -## Steps for Text2SQL Index Deployment (For Structured RAG) +## Steps for Text2SQL Index Deployment (For Text2SQL) ### Schema Store Index diff --git a/text_2_sql/GETTING_STARTED.md b/text_2_sql/GETTING_STARTED.md index 760fbce1..43382382 100644 --- a/text_2_sql/GETTING_STARTED.md +++ b/text_2_sql/GETTING_STARTED.md @@ -1,21 +1,26 @@ # Getting Started with Agentic Text2SQL Component -**Execute the following commands in the `text_2_sql` directory.** - To get started, perform the following steps: +**Execute the following commands in the `deploy_ai_search` directory:** + 1. Setup Azure OpenAI in your subscription with **gpt-4o-mini** & an embedding model, alongside a SQL Server sample database, AI Search and a storage account. -2. Clone this repository and deploy the AI Search text2sql indexes from `deploy_ai_search`. -3. Run `uv sync` within the text_2_sql directory to install dependencies. +2. Create your `.env` file based on the provided sample `deploy_ai_search/.env.example`. Place this file in the same place in `deploy_ai_search/.env`. +3. Clone this repository and deploy the AI Search text2sql indexes from `deploy_ai_search`. See the instructions in the **Steps for Text2SQL Index Deployment (For Structured RAG)** section of the `deploy_ai_search/README.md`. + +**Execute the following commands in the `text_2_sql_core` directory:** + +4. Create your `.env` file based on the provided sample `text_2_sql/.env.example`. Place this file in the same place in `text_2_sql/.env`. +5. Run `uv sync` within the text_2_sql directory to install dependencies. - Install the optional dependencies if you need a database connector other than TSQL. `uv sync --extra ` - See the supported connectors in `text_2_sql_core/src/text_2_sql_core/connectors`. -4. Create your `.env` file based on the provided sample `.env.example`. Place this file in the same place as the `.env.example`. -5. Generate a data dictionary for your target server using the instructions in the **Running** section of the `data_dictionary/README.md`. -6. Upload these generated data dictionaries files to the relevant containers in your storage account. Wait for them to be automatically indexed with the included skillsets. +6. Create your `.env` file based on the provided sample `text_2_sql/.env.example`. Place this file in the same place in `text_2_sql/.env`. +7. Generate a data dictionary for your target server using the instructions in the **Running** section of the `data_dictionary/README.md`. +8. Upload these generated data dictionaries files to the relevant containers in your storage account. Wait for them to be automatically indexed with the included skillsets. -**Execute the following commands in the `autogen` directory.** +**Execute the following commands in the `autogen` directory:** -7. Run `uv sync` within the text_2_sql directory to install dependencies. +9. Run `uv sync` within the text_2_sql directory to install dependencies. - Install the optional dependencies if you need a database connector other than TSQL. `uv sync --extra ` - See the supported connectors in `text_2_sql_core/src/text_2_sql_core/connectors`. -8. Navigate to `autogen` directory to view the AutoGen implementation. Follow the steps in `Iteration 5 - Agentic Vector Based Text2SQL.ipynb` to get started. +10. Navigate to `autogen` directory to view the AutoGen implementation. Follow the steps in `Iteration 5 - Agentic Vector Based Text2SQL.ipynb` to get started. diff --git a/text_2_sql/data_dictionary/README.md b/text_2_sql/data_dictionary/README.md index 2c7585ba..d2002aeb 100644 --- a/text_2_sql/data_dictionary/README.md +++ b/text_2_sql/data_dictionary/README.md @@ -224,11 +224,11 @@ If there is no pre-built script for your database engine, take one of the above ## Running -**Execute the following commands in the `text_2_sql` directory.** - To generate a data dictionary, perform the following steps: 1. Create your `.env` file based on the provided sample `text_2_sql/.env.example`. Place this file in the same place in `text_2_sql/.env`. + +**Execute the following commands in the `text_2_sql_core` directory:** 2. Package and install the `text_2_sql_core` library. See [build](https://docs.astral.sh/uv/concepts/projects/build/) if you want to build as a wheel and install on an agent. Or you can run from within a `uv` environment and skip packaging. - Install the optional dependencies if you need a database connector other than TSQL. `uv sync --extra ` 3. Run `data_dictionary `