Skip to content
126 changes: 107 additions & 19 deletions crates/iceberg/src/io/storage_azdls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ pub(crate) fn azdls_config_parse(mut properties: HashMap<String, String>) -> Res
config.account_key = Some(account_key);
}

if let Some(sas_token) = properties.remove(ADLS_SAS_TOKEN) {
if let Some(sas_token) = find_sas_token(&properties, config.account_name.as_deref()) {
config.sas_token = Some(sas_token);
}

Expand All @@ -103,6 +103,38 @@ pub(crate) fn azdls_config_parse(mut properties: HashMap<String, String>) -> Res
Ok(config)
}

/// Finds the appropriate SAS token from properties based on account name.
///
/// Strategy:
/// 1. If account name is known, search for keys matching `adls.sas-token.<account_name>` prefix
/// 2. If not found, fall back to searching for keys matching `adls.sas-token` prefix
/// 3. Return the shortest matching key (least specific)
/// 4. Trim leading '?' from the token if present
fn find_sas_token(
properties: &HashMap<String, String>,
account_name: Option<&str>,
) -> Option<String> {
// Helper function to search for token with a given prefix
let find_with_prefix = |prefix: &str| {
properties
.iter()
.filter(|(key, _)| key.as_str() == prefix || key.starts_with(&format!("{}.", prefix)))
.min_by_key(|(key, _)| key.len())
.map(|(_, value)| value.strip_prefix('?').unwrap_or(value).to_string())
};

// Try account-specific prefix first if account name is known, then fall back to base
if let Some(account) = account_name {
let account_prefix = format!("{}.{}", ADLS_SAS_TOKEN, account);
if let Some(token) = find_with_prefix(&account_prefix) {
return Some(token);
}
}

// Fall back to base prefix (adls.sas-token)
find_with_prefix(ADLS_SAS_TOKEN)
}

/// Builds an OpenDAL operator from the AzdlsConfig and path.
///
/// The path is expected to include the scheme in a format like:
Expand Down Expand Up @@ -331,24 +363,19 @@ fn validate_storage_and_scheme(
scheme_str: &str,
) -> Result<AzureStorageScheme> {
let scheme = scheme_str.parse::<AzureStorageScheme>()?;
match scheme {
AzureStorageScheme::Abfss | AzureStorageScheme::Abfs => {
ensure_data_valid!(
storage_service == "dfs",
"AzureStoragePath: Unexpected storage service for abfs[s]: {}",
storage_service
);
Ok(scheme)
}
AzureStorageScheme::Wasbs | AzureStorageScheme::Wasb => {
ensure_data_valid!(
storage_service == "blob",
"AzureStoragePath: Unexpected storage service for wasb[s]: {}",
storage_service
);
Ok(scheme)
}
}
// Azure actually is oblivious to what we use for the scheme here.
// It actually supports both dfs and blob endpoints for all storage kinds.
// We should route those to different OpenDAL operators, but given that we don't
// do that today but map both schemes/endpoints to the same ADLS OpenDAL operator
// (which uses dfs endpoint), we might as well accept wasb URL for dfs endpoint,
// and abfs URL for blob endpoint. Especially since some implementations (e.g. Snowflake)
// always use abfs in URL, regardless of the endpoint.
ensure_data_valid!(
storage_service == "dfs" || storage_service == "blob",
"AzureStoragePath: Unexpected storage service for abfs[s]: {}",
storage_service
);
Ok(scheme)
}

#[cfg(test)]
Expand Down Expand Up @@ -423,6 +450,67 @@ mod tests {
..Default::default()
}),
),
(
"account-specific SAS token with full domain",
HashMap::from([
(
super::ADLS_ACCOUNT_NAME.to_string(),
"azteststorage".to_string(),
),
(
"adls.sas-token.azteststorage.blob.core.windows.net".to_string(),
"token-full".to_string(),
),
(
"adls.sas-token.azteststorage".to_string(),
"token-account".to_string(),
),
]),
Some(AzdlsConfig {
account_name: Some("azteststorage".to_string()),
sas_token: Some("token-account".to_string()), // Should pick the shorter one
..Default::default()
}),
),
(
"account-specific SAS token with only full domain",
HashMap::from([
(
super::ADLS_ACCOUNT_NAME.to_string(),
"myaccount".to_string(),
),
(
"adls.sas-token.myaccount.blob.core.windows.net".to_string(),
"token-specific".to_string(),
),
]),
Some(AzdlsConfig {
account_name: Some("myaccount".to_string()),
sas_token: Some("token-specific".to_string()),
..Default::default()
}),
),
(
"SAS token without account name picks shortest",
HashMap::from([
(
super::ADLS_SAS_TOKEN.to_string(),
"token-generic".to_string(),
),
(
"adls.sas-token.someaccount".to_string(),
"token-account".to_string(),
),
(
"adls.sas-token.someaccount.blob.core.windows.net".to_string(),
"token-specific".to_string(),
),
]),
Some(AzdlsConfig {
sas_token: Some("token-generic".to_string()), // Should pick the shortest one
..Default::default()
}),
),
];

for (name, properties, expected) in test_cases {
Expand Down
Loading