diff --git a/crates/iceberg/src/io/storage_azdls.rs b/crates/iceberg/src/io/storage_azdls.rs index b61ecd9af8..7294896551 100644 --- a/crates/iceberg/src/io/storage_azdls.rs +++ b/crates/iceberg/src/io/storage_azdls.rs @@ -80,7 +80,7 @@ pub(crate) fn azdls_config_parse(mut properties: HashMap) -> Res config.account_key = Some(account_key); } - if let Some(sas_token) = properties.remove(ADLS_SAS_TOKEN) { + if let Some(sas_token) = find_sas_token(&properties, config.account_name.as_deref()) { config.sas_token = Some(sas_token); } @@ -103,6 +103,38 @@ pub(crate) fn azdls_config_parse(mut properties: HashMap) -> Res Ok(config) } +/// Finds the appropriate SAS token from properties based on account name. +/// +/// Strategy: +/// 1. If account name is known, search for keys matching `adls.sas-token.` prefix +/// 2. If not found, fall back to searching for keys matching `adls.sas-token` prefix +/// 3. Return the shortest matching key (least specific) +/// 4. Trim leading '?' from the token if present +fn find_sas_token( + properties: &HashMap, + account_name: Option<&str>, +) -> Option { + // Helper function to search for token with a given prefix + let find_with_prefix = |prefix: &str| { + properties + .iter() + .filter(|(key, _)| key.as_str() == prefix || key.starts_with(&format!("{}.", prefix))) + .min_by_key(|(key, _)| key.len()) + .map(|(_, value)| value.strip_prefix('?').unwrap_or(value).to_string()) + }; + + // Try account-specific prefix first if account name is known, then fall back to base + if let Some(account) = account_name { + let account_prefix = format!("{}.{}", ADLS_SAS_TOKEN, account); + if let Some(token) = find_with_prefix(&account_prefix) { + return Some(token); + } + } + + // Fall back to base prefix (adls.sas-token) + find_with_prefix(ADLS_SAS_TOKEN) +} + /// Builds an OpenDAL operator from the AzdlsConfig and path. /// /// The path is expected to include the scheme in a format like: @@ -331,24 +363,19 @@ fn validate_storage_and_scheme( scheme_str: &str, ) -> Result { let scheme = scheme_str.parse::()?; - match scheme { - AzureStorageScheme::Abfss | AzureStorageScheme::Abfs => { - ensure_data_valid!( - storage_service == "dfs", - "AzureStoragePath: Unexpected storage service for abfs[s]: {}", - storage_service - ); - Ok(scheme) - } - AzureStorageScheme::Wasbs | AzureStorageScheme::Wasb => { - ensure_data_valid!( - storage_service == "blob", - "AzureStoragePath: Unexpected storage service for wasb[s]: {}", - storage_service - ); - Ok(scheme) - } - } + // Azure actually is oblivious to what we use for the scheme here. + // It actually supports both dfs and blob endpoints for all storage kinds. + // We should route those to different OpenDAL operators, but given that we don't + // do that today but map both schemes/endpoints to the same ADLS OpenDAL operator + // (which uses dfs endpoint), we might as well accept wasb URL for dfs endpoint, + // and abfs URL for blob endpoint. Especially since some implementations (e.g. Snowflake) + // always use abfs in URL, regardless of the endpoint. + ensure_data_valid!( + storage_service == "dfs" || storage_service == "blob", + "AzureStoragePath: Unexpected storage service for abfs[s]: {}", + storage_service + ); + Ok(scheme) } #[cfg(test)] @@ -423,6 +450,67 @@ mod tests { ..Default::default() }), ), + ( + "account-specific SAS token with full domain", + HashMap::from([ + ( + super::ADLS_ACCOUNT_NAME.to_string(), + "azteststorage".to_string(), + ), + ( + "adls.sas-token.azteststorage.blob.core.windows.net".to_string(), + "token-full".to_string(), + ), + ( + "adls.sas-token.azteststorage".to_string(), + "token-account".to_string(), + ), + ]), + Some(AzdlsConfig { + account_name: Some("azteststorage".to_string()), + sas_token: Some("token-account".to_string()), // Should pick the shorter one + ..Default::default() + }), + ), + ( + "account-specific SAS token with only full domain", + HashMap::from([ + ( + super::ADLS_ACCOUNT_NAME.to_string(), + "myaccount".to_string(), + ), + ( + "adls.sas-token.myaccount.blob.core.windows.net".to_string(), + "token-specific".to_string(), + ), + ]), + Some(AzdlsConfig { + account_name: Some("myaccount".to_string()), + sas_token: Some("token-specific".to_string()), + ..Default::default() + }), + ), + ( + "SAS token without account name picks shortest", + HashMap::from([ + ( + super::ADLS_SAS_TOKEN.to_string(), + "token-generic".to_string(), + ), + ( + "adls.sas-token.someaccount".to_string(), + "token-account".to_string(), + ), + ( + "adls.sas-token.someaccount.blob.core.windows.net".to_string(), + "token-specific".to_string(), + ), + ]), + Some(AzdlsConfig { + sas_token: Some("token-generic".to_string()), // Should pick the shortest one + ..Default::default() + }), + ), ]; for (name, properties, expected) in test_cases {