From 60cd76b109f54eb0897bbb19973c58158cab5944 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 4 Dec 2025 03:08:31 +0000 Subject: [PATCH 1/3] Initial plan From 443fab64cd77ff04e75e90be926084cba9b38b26 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 4 Dec 2025 03:39:22 +0000 Subject: [PATCH 2/3] Add include-vector-fields-by-default property to DataSource configuration - Add IncludeVectorFieldsByDefault property to DataSource record (default: false) - Add UserProvidedIncludeVectorFieldsByDefault flag for serialization control - Update DataSourceConverterFactory to read/write the new property - Update JSON schema with include-vector-fields-by-default property and validation - Add --data-source.include-vector-fields-by-default CLI option to ConfigureOptions - Update ConfigGenerator to handle the new CLI option with MSSQL validation - Add validation in RuntimeConfigValidator for non-MSSQL database types - Add unit tests for the new CLI configure option Co-authored-by: JerryNixon <1749983+JerryNixon@users.noreply.github.com> --- schemas/dab.draft.schema.json | 22 +++++ src/Cli.Tests/ConfigureOptionsTests.cs | 94 +++++++++++++++++++ src/Cli/Commands/ConfigureOptions.cs | 5 + src/Cli/ConfigGenerator.cs | 20 +++- .../Converters/DataSourceConverterFactory.cs | 59 +++++++++++- src/Config/ObjectModel/DataSource.cs | 13 ++- .../Configurations/RuntimeConfigValidator.cs | 9 ++ 7 files changed, 215 insertions(+), 7 deletions(-) diff --git a/schemas/dab.draft.schema.json b/schemas/dab.draft.schema.json index 80cfd953ad..6bba6c6408 100644 --- a/schemas/dab.draft.schema.json +++ b/schemas/dab.draft.schema.json @@ -61,6 +61,11 @@ "maximum": 2147483647 } } + }, + "include-vector-fields-by-default": { + "type": "boolean", + "description": "When false (default), vector-type columns are omitted from results. Only applicable to mssql database type.", + "default": false } }, "allOf": [ @@ -140,6 +145,23 @@ } } } + }, + { + "if": { + "properties": { + "include-vector-fields-by-default": { + "const": true + } + }, + "required": ["include-vector-fields-by-default"] + }, + "then": { + "properties": { + "database-type": { + "const": "mssql" + } + } + } } ], "required": ["database-type", "connection-string"] diff --git a/src/Cli.Tests/ConfigureOptionsTests.cs b/src/Cli.Tests/ConfigureOptionsTests.cs index 073f349a67..33f036181d 100644 --- a/src/Cli.Tests/ConfigureOptionsTests.cs +++ b/src/Cli.Tests/ConfigureOptionsTests.cs @@ -926,6 +926,100 @@ public void TestFailureWhenAddingSetSessionContextToMySQLDatabase() Assert.IsFalse(isSuccess); } + /// + /// Tests that running "dab configure --data-source.include-vector-fields-by-default true" on a MSSQL config + /// correctly updates the include-vector-fields-by-default property to true. + /// + [TestMethod] + public void TestAddIncludeVectorFieldsByDefaultForMSSQL() + { + // Arrange + SetupFileSystemWithInitialConfig(INITIAL_CONFIG); + + // Act: Attempts to add include-vector-fields-by-default option + ConfigureOptions options = new( + dataSourceIncludeVectorFieldsByDefault: true, + config: TEST_RUNTIME_CONFIG_FILE + ); + bool isSuccess = TryConfigureSettings(options, _runtimeConfigLoader!, _fileSystem!); + + // Assert: Validate the include-vector-fields-by-default is added + Assert.IsTrue(isSuccess); + string updatedConfig = _fileSystem!.File.ReadAllText(TEST_RUNTIME_CONFIG_FILE); + Assert.IsTrue(RuntimeConfigLoader.TryParseConfig(updatedConfig, out RuntimeConfig? config)); + Assert.IsNotNull(config.DataSource); + Assert.IsTrue(config.DataSource.IncludeVectorFieldsByDefault); + } + + /// + /// Tests that running "dab configure --data-source.include-vector-fields-by-default false" on a MSSQL config + /// correctly updates the include-vector-fields-by-default property to false. + /// + [TestMethod] + public void TestSetIncludeVectorFieldsByDefaultToFalseForMSSQL() + { + // Arrange + SetupFileSystemWithInitialConfig(INITIAL_CONFIG); + + // Act: Attempts to set include-vector-fields-by-default to false + ConfigureOptions options = new( + dataSourceIncludeVectorFieldsByDefault: false, + config: TEST_RUNTIME_CONFIG_FILE + ); + bool isSuccess = TryConfigureSettings(options, _runtimeConfigLoader!, _fileSystem!); + + // Assert: Validate the include-vector-fields-by-default is set to false + Assert.IsTrue(isSuccess); + string updatedConfig = _fileSystem!.File.ReadAllText(TEST_RUNTIME_CONFIG_FILE); + Assert.IsTrue(RuntimeConfigLoader.TryParseConfig(updatedConfig, out RuntimeConfig? config)); + Assert.IsNotNull(config.DataSource); + Assert.IsFalse(config.DataSource.IncludeVectorFieldsByDefault); + } + + /// + /// Tests that running "dab configure --data-source.include-vector-fields-by-default true" on a non-MSSQL config + /// (e.g., MySQL) fails with an error as this option is only applicable for MSSQL. + /// + [TestMethod] + public void TestFailureWhenAddingIncludeVectorFieldsByDefaultToMySQLDatabase() + { + // Arrange + SetupFileSystemWithInitialConfig(INITIAL_CONFIG); + + // Act: Attempts to set include-vector-fields-by-default on MySQL database type + ConfigureOptions options = new( + dataSourceDatabaseType: "mysql", + dataSourceIncludeVectorFieldsByDefault: true, + config: TEST_RUNTIME_CONFIG_FILE + ); + bool isSuccess = TryConfigureSettings(options, _runtimeConfigLoader!, _fileSystem!); + + // Assert + Assert.IsFalse(isSuccess); + } + + /// + /// Tests that running "dab configure --data-source.include-vector-fields-by-default true" on a PostgreSQL config + /// fails with an error as this option is only applicable for MSSQL. + /// + [TestMethod] + public void TestFailureWhenAddingIncludeVectorFieldsByDefaultToPostgreSQLDatabase() + { + // Arrange + SetupFileSystemWithInitialConfig(INITIAL_CONFIG); + + // Act: Attempts to set include-vector-fields-by-default on PostgreSQL database type + ConfigureOptions options = new( + dataSourceDatabaseType: "postgresql", + dataSourceIncludeVectorFieldsByDefault: true, + config: TEST_RUNTIME_CONFIG_FILE + ); + bool isSuccess = TryConfigureSettings(options, _runtimeConfigLoader!, _fileSystem!); + + // Assert + Assert.IsFalse(isSuccess); + } + /// /// Sets up the mock file system with an initial configuration file. /// This method adds a config file to the mock file system and verifies its existence. diff --git a/src/Cli/Commands/ConfigureOptions.cs b/src/Cli/Commands/ConfigureOptions.cs index 60cb12c3f8..60623b678b 100644 --- a/src/Cli/Commands/ConfigureOptions.cs +++ b/src/Cli/Commands/ConfigureOptions.cs @@ -28,6 +28,7 @@ public ConfigureOptions( string? dataSourceOptionsContainer = null, string? dataSourceOptionsSchema = null, bool? dataSourceOptionsSetSessionContext = null, + bool? dataSourceIncludeVectorFieldsByDefault = null, int? depthLimit = null, bool? runtimeGraphQLEnabled = null, string? runtimeGraphQLPath = null, @@ -80,6 +81,7 @@ public ConfigureOptions( DataSourceOptionsContainer = dataSourceOptionsContainer; DataSourceOptionsSchema = dataSourceOptionsSchema; DataSourceOptionsSetSessionContext = dataSourceOptionsSetSessionContext; + DataSourceIncludeVectorFieldsByDefault = dataSourceIncludeVectorFieldsByDefault; // GraphQL DepthLimit = depthLimit; RuntimeGraphQLEnabled = runtimeGraphQLEnabled; @@ -150,6 +152,9 @@ public ConfigureOptions( [Option("data-source.options.set-session-context", Required = false, HelpText = "Enable session context. Allowed values: true (default), false.")] public bool? DataSourceOptionsSetSessionContext { get; } + [Option("data-source.include-vector-fields-by-default", Required = false, HelpText = "Include vector-type columns in results by default. Only applicable to MSSQL. Default: false (boolean).")] + public bool? DataSourceIncludeVectorFieldsByDefault { get; } + [Option("runtime.graphql.depth-limit", Required = false, HelpText = "Max allowed depth of the nested query. Allowed values: (0,2147483647] inclusive. Default is infinity. Use -1 to remove limit.")] public int? DepthLimit { get; } diff --git a/src/Cli/ConfigGenerator.cs b/src/Cli/ConfigGenerator.cs index 7c35335089..9c4fb41238 100644 --- a/src/Cli/ConfigGenerator.cs +++ b/src/Cli/ConfigGenerator.cs @@ -630,6 +630,8 @@ private static bool TryUpdateConfiguredDataSourceOptions( DatabaseType dbType = runtimeConfig.DataSource.DatabaseType; string dataSourceConnectionString = runtimeConfig.DataSource.ConnectionString; DatasourceHealthCheckConfig? datasourceHealthCheckConfig = runtimeConfig.DataSource.Health; + bool includeVectorFieldsByDefault = runtimeConfig.DataSource.IncludeVectorFieldsByDefault; + bool userProvidedIncludeVectorFieldsByDefault = runtimeConfig.DataSource.UserProvidedIncludeVectorFieldsByDefault; if (options.DataSourceDatabaseType is not null) { @@ -671,8 +673,24 @@ private static bool TryUpdateConfiguredDataSourceOptions( dbOptions.Add(namingPolicy.ConvertName(nameof(MsSqlOptions.SetSessionContext)), options.DataSourceOptionsSetSessionContext.Value); } + if (options.DataSourceIncludeVectorFieldsByDefault is not null) + { + if (!DatabaseType.MSSQL.Equals(dbType)) + { + _logger.LogError("include-vector-fields-by-default option is only applicable for MSSQL database type."); + return false; + } + + includeVectorFieldsByDefault = options.DataSourceIncludeVectorFieldsByDefault.Value; + userProvidedIncludeVectorFieldsByDefault = true; + _logger.LogInformation("Updated RuntimeConfig with data-source.include-vector-fields-by-default as '{updatedValue}'", includeVectorFieldsByDefault); + } + dbOptions = EnumerableUtilities.IsNullOrEmpty(dbOptions) ? null : dbOptions; - DataSource dataSource = new(dbType, dataSourceConnectionString, dbOptions, datasourceHealthCheckConfig); + DataSource dataSource = new(dbType, dataSourceConnectionString, dbOptions, datasourceHealthCheckConfig, includeVectorFieldsByDefault) + { + UserProvidedIncludeVectorFieldsByDefault = userProvidedIncludeVectorFieldsByDefault + }; runtimeConfig = runtimeConfig with { DataSource = dataSource }; return runtimeConfig != null; diff --git a/src/Config/Converters/DataSourceConverterFactory.cs b/src/Config/Converters/DataSourceConverterFactory.cs index 1788ebf2b4..67b402e309 100644 --- a/src/Config/Converters/DataSourceConverterFactory.cs +++ b/src/Config/Converters/DataSourceConverterFactory.cs @@ -51,12 +51,17 @@ public DataSourceConverter(DeserializationVariableReplacementSettings? replaceme string connectionString = string.Empty; DatasourceHealthCheckConfig? health = null; Dictionary? datasourceOptions = null; + bool includeVectorFieldsByDefault = false; + bool userProvidedIncludeVectorFieldsByDefault = false; while (reader.Read()) { if (reader.TokenType is JsonTokenType.EndObject) { - return new DataSource(databaseType, connectionString, datasourceOptions, health); + return new DataSource(databaseType, connectionString, datasourceOptions, health, includeVectorFieldsByDefault) + { + UserProvidedIncludeVectorFieldsByDefault = userProvidedIncludeVectorFieldsByDefault + }; } if (reader.TokenType is JsonTokenType.PropertyName) @@ -136,6 +141,24 @@ public DataSourceConverter(DeserializationVariableReplacementSettings? replaceme datasourceOptions = optionsDict; } + break; + case "include-vector-fields-by-default": + if (reader.TokenType is JsonTokenType.True or JsonTokenType.False) + { + includeVectorFieldsByDefault = reader.GetBoolean(); + userProvidedIncludeVectorFieldsByDefault = true; + } + else if (reader.TokenType is JsonTokenType.String) + { + // Support environment variable replacement + string stringValue = reader.DeserializeString(_replacementSettings)!; + if (bool.TryParse(stringValue, out bool boolValue)) + { + includeVectorFieldsByDefault = boolValue; + userProvidedIncludeVectorFieldsByDefault = true; + } + } + break; default: throw new JsonException($"Unexpected property {propertyName} while deserializing DataSource."); @@ -149,11 +172,37 @@ public DataSourceConverter(DeserializationVariableReplacementSettings? replaceme public override void Write(Utf8JsonWriter writer, DataSource value, JsonSerializerOptions options) { - // Remove the converter so we don't recurse. - JsonSerializerOptions innerOptions = new(options); - innerOptions.Converters.Remove(innerOptions.Converters.First(c => c is DataSourceConverterFactory)); + writer.WriteStartObject(); + + // Always write required properties + writer.WritePropertyName("database-type"); + JsonSerializer.Serialize(writer, value.DatabaseType, options); + + writer.WritePropertyName("connection-string"); + writer.WriteStringValue(value.ConnectionString); + + // Write options if present + if (value.Options is not null && value.Options.Count > 0) + { + writer.WritePropertyName("options"); + JsonSerializer.Serialize(writer, value.Options, options); + } + + // Write health if present + if (value.Health is not null) + { + writer.WritePropertyName("health"); + JsonSerializer.Serialize(writer, value.Health, options); + } + + // Write include-vector-fields-by-default only if user provided it + if (value.UserProvidedIncludeVectorFieldsByDefault) + { + writer.WritePropertyName("include-vector-fields-by-default"); + writer.WriteBooleanValue(value.IncludeVectorFieldsByDefault); + } - JsonSerializer.Serialize(writer, value, innerOptions); + writer.WriteEndObject(); } } } diff --git a/src/Config/ObjectModel/DataSource.cs b/src/Config/ObjectModel/DataSource.cs index d1a2456ef9..5e5d9206ec 100644 --- a/src/Config/ObjectModel/DataSource.cs +++ b/src/Config/ObjectModel/DataSource.cs @@ -14,12 +14,23 @@ namespace Azure.DataApiBuilder.Config.ObjectModel; /// Connection string to access the database. /// Custom options for the specific database. If there are no options, this could be null. /// Health check configuration for the datasource. +/// When false (default), vector-type columns are omitted from results. Only applicable to mssql. public record DataSource( DatabaseType DatabaseType, string ConnectionString, Dictionary? Options = null, - DatasourceHealthCheckConfig? Health = null) + DatasourceHealthCheckConfig? Health = null, + bool IncludeVectorFieldsByDefault = false) { + /// + /// Flag which informs CLI and JSON serializer whether to write include-vector-fields-by-default + /// property and value to the runtime config file. + /// When user doesn't provide the property/value, which signals DAB to use the default (false), + /// the DAB CLI should not write the default value to a serialized config. + /// + [JsonIgnore(Condition = JsonIgnoreCondition.Always)] + public bool UserProvidedIncludeVectorFieldsByDefault { get; init; } = false; + [JsonIgnore] public bool IsDatasourceHealthEnabled => Health is null || Health.Enabled; diff --git a/src/Core/Configurations/RuntimeConfigValidator.cs b/src/Core/Configurations/RuntimeConfigValidator.cs index fd8f811c9e..a28fc12dc3 100644 --- a/src/Core/Configurations/RuntimeConfigValidator.cs +++ b/src/Core/Configurations/RuntimeConfigValidator.cs @@ -116,6 +116,15 @@ public void ValidateDataSourceInConfig( statusCode: HttpStatusCode.ServiceUnavailable, subStatusCode: DataApiBuilderException.SubStatusCodes.ErrorInInitialization)); } + + // include-vector-fields-by-default is only valid for mssql database type + if (dataSource.IncludeVectorFieldsByDefault && dataSource.DatabaseType != DatabaseType.MSSQL) + { + HandleOrRecordException(new DataApiBuilderException( + message: "The include-vector-fields-by-default option is only valid for MSSQL database type.", + statusCode: HttpStatusCode.ServiceUnavailable, + subStatusCode: DataApiBuilderException.SubStatusCodes.ConfigValidationError)); + } } ValidateDatabaseType(runtimeConfig, fileSystem, logger); From 7469d5d1dc3ec56077b5f76b9b2e26552b4c4b4e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 4 Dec 2025 23:24:58 +0000 Subject: [PATCH 3/3] Implement vector field filtering in authorization and metadata - Add IsVectorType property to ColumnDefinition for tracking vector columns - Update MsSqlMetadataProvider to detect vector type columns during schema discovery - Modify AuthorizationResolver.ResolveEntityDefinitionColumns to filter out vector columns when IncludeVectorFieldsByDefault is false - Update SetEntityPermissionMap to pass includeVectorFields setting from DataSource config Co-authored-by: JerryNixon <1749983+JerryNixon@users.noreply.github.com> --- .../DatabasePrimitives/DatabaseObject.cs | 7 ++++ .../Authorization/AuthorizationResolver.cs | 35 +++++++++++++++---- .../MsSqlMetadataProvider.cs | 8 +++++ 3 files changed, 44 insertions(+), 6 deletions(-) diff --git a/src/Config/DatabasePrimitives/DatabaseObject.cs b/src/Config/DatabasePrimitives/DatabaseObject.cs index 8636e8c005..e247c3aa97 100644 --- a/src/Config/DatabasePrimitives/DatabaseObject.cs +++ b/src/Config/DatabasePrimitives/DatabaseObject.cs @@ -269,6 +269,13 @@ public class ColumnDefinition public bool IsReadOnly { get; set; } public object? DefaultValue { get; set; } + /// + /// Indicates whether this column is a vector type (SQL Server vector data type). + /// Vector columns can be large and are excluded from results by default when + /// include-vector-fields-by-default is set to false in the configuration. + /// + public bool IsVectorType { get; set; } + public ColumnDefinition() { } public ColumnDefinition(Type systemType) diff --git a/src/Core/Authorization/AuthorizationResolver.cs b/src/Core/Authorization/AuthorizationResolver.cs index 0f22b9cd28..9d067d4099 100644 --- a/src/Core/Authorization/AuthorizationResolver.cs +++ b/src/Core/Authorization/AuthorizationResolver.cs @@ -285,6 +285,11 @@ private void SetEntityPermissionMap(RuntimeConfig runtimeConfig) HashSet allowedColumnsForAnonymousRole = new(); string dataSourceName = runtimeConfig.GetDataSourceNameFromEntityName(entityName); ISqlMetadataProvider metadataProvider = _metadataProviderFactory.GetMetadataProvider(dataSourceName); + + // Get the include-vector-fields-by-default setting from the data source config + DataSource dataSource = runtimeConfig.GetDataSourceFromDataSourceName(dataSourceName); + bool includeVectorFields = dataSource.IncludeVectorFieldsByDefault; + foreach (EntityPermission permission in entity.Permissions) { string role = permission.Role; @@ -298,11 +303,11 @@ private void SetEntityPermissionMap(RuntimeConfig runtimeConfig) // Use a HashSet to store all the backing field names // that are accessible to the user. HashSet allowedColumns = new(); - IEnumerable allTableColumns = ResolveEntityDefinitionColumns(entityName, metadataProvider); + IEnumerable allTableColumns = ResolveEntityDefinitionColumns(entityName, metadataProvider, includeVectorFields); if (entityAction.Fields is null) { - operationToColumn.Included.UnionWith(ResolveEntityDefinitionColumns(entityName, metadataProvider)); + operationToColumn.Included.UnionWith(ResolveEntityDefinitionColumns(entityName, metadataProvider, includeVectorFields)); } else { @@ -313,7 +318,7 @@ private void SetEntityPermissionMap(RuntimeConfig runtimeConfig) if (entityAction.Fields.Include is null || (entityAction.Fields.Include.Count == 1 && entityAction.Fields.Include.Contains(WILDCARD))) { - operationToColumn.Included.UnionWith(ResolveEntityDefinitionColumns(entityName, metadataProvider)); + operationToColumn.Included.UnionWith(ResolveEntityDefinitionColumns(entityName, metadataProvider, includeVectorFields)); } else { @@ -325,7 +330,7 @@ private void SetEntityPermissionMap(RuntimeConfig runtimeConfig) if (entityAction.Fields.Exclude is null || (entityAction.Fields.Exclude.Count == 1 && entityAction.Fields.Exclude.Contains(WILDCARD))) { - operationToColumn.Excluded.UnionWith(ResolveEntityDefinitionColumns(entityName, metadataProvider)); + operationToColumn.Excluded.UnionWith(ResolveEntityDefinitionColumns(entityName, metadataProvider, includeVectorFields)); } else { @@ -787,8 +792,13 @@ public IEnumerable GetRolesForField(string entityName, string field, Ent /// For CosmosDb_NoSql, read all the column names from schema.gql GraphQL type fields /// /// Used to lookup table definition of specific entity + /// Metadata provider for the entity's data source. + /// When false, vector type columns are excluded from results. /// Collection of columns in table definition. - private static IEnumerable ResolveEntityDefinitionColumns(string entityName, ISqlMetadataProvider metadataProvider) + private static IEnumerable ResolveEntityDefinitionColumns( + string entityName, + ISqlMetadataProvider metadataProvider, + bool includeVectorFields = true) { if (metadataProvider.GetDatabaseType() is DatabaseType.CosmosDB_NoSQL) { @@ -797,7 +807,20 @@ private static IEnumerable ResolveEntityDefinitionColumns(string entityN // Table definition is null on stored procedure entities SourceDefinition? sourceDefinition = metadataProvider.GetSourceDefinition(entityName); - return sourceDefinition is null ? new List() : sourceDefinition.Columns.Keys; + if (sourceDefinition is null) + { + return new List(); + } + + // When includeVectorFields is false and this is MSSQL, filter out vector type columns + if (!includeVectorFields && metadataProvider.GetDatabaseType() is DatabaseType.MSSQL) + { + return sourceDefinition.Columns + .Where(kvp => !kvp.Value.IsVectorType) + .Select(kvp => kvp.Key); + } + + return sourceDefinition.Columns.Keys; } /// diff --git a/src/Core/Services/MetadataProviders/MsSqlMetadataProvider.cs b/src/Core/Services/MetadataProviders/MsSqlMetadataProvider.cs index 7d02798427..753fa8799a 100644 --- a/src/Core/Services/MetadataProviders/MsSqlMetadataProvider.cs +++ b/src/Core/Services/MetadataProviders/MsSqlMetadataProvider.cs @@ -113,6 +113,14 @@ protected override void PopulateColumnDefinitionWithHasDefaultAndDbType( columnDefinition.DbType = TypeHelper.GetDbTypeFromSystemType(columnDefinition.SystemType); string sqlDbTypeName = (string)columnInfo["DATA_TYPE"]; + + // Detect vector type columns (SQL Server 2025+) + // Vector types are identified by the "vector" data type name + if (string.Equals(sqlDbTypeName, "vector", StringComparison.OrdinalIgnoreCase)) + { + columnDefinition.IsVectorType = true; + } + if (Enum.TryParse(sqlDbTypeName, ignoreCase: true, out SqlDbType sqlDbType)) { // The DbType enum in .NET does not distinguish between VarChar and NVarChar. Both are mapped to DbType.String.