Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 104 additions & 26 deletions src/azure/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -671,36 +671,74 @@ impl MicrosoftAzureBuilder {
self.container_name = Some(validate(parsed.username())?);
self.account_name = Some(validate(a)?);
self.use_fabric_endpoint = true.into();
} else if let Some(a) = host.strip_suffix(".blob.core.windows.net") {
self.container_name = Some(validate(parsed.username())?);
self.account_name = Some(validate(a)?);
} else if let Some(a) = host.strip_suffix(".blob.fabric.microsoft.com") {
self.container_name = Some(validate(parsed.username())?);
self.account_name = Some(validate(a)?);
self.use_fabric_endpoint = true.into();
} else {
return Err(Error::UrlNotRecognised { url: url.into() }.into());
}
}
"https" => match host.split_once('.') {
Some((a, "dfs.core.windows.net")) | Some((a, "blob.core.windows.net")) => {
self.account_name = Some(validate(a)?);
let container = parsed.path_segments().unwrap().next().expect(
"iterator always contains at least one string (which may be empty)",
);
if !container.is_empty() {
self.container_name = Some(validate(container)?);
"https" => {
// "{workspaceid}.z??.(onelake|dfs|blob).fabric.microsoft.com"
Comment thread
SmritiAgrawal04 marked this conversation as resolved.
match host.split_once('.') {

// Workspace-level Private Link detection
Some((workspaceid, rest)) if rest.starts_with('z') && rest.ends_with("fabric.microsoft.com") => {
// rest looks like: "z28.dfs.fabric.microsoft.com" / "z28.blob.fabric.microsoft.com" / etc.
// Account name for WS-PL is two labels: "{wsid}.z{xy}"
let (zone, _) = rest
.split_once('.')
.unwrap_or((rest, ""));

let wsid = validate(workspaceid)?;
let zone = validate(zone)?;

self.account_name = Some(format!("{wsid}.{zone}"));

// Attempt to infer the container name from the URL
let container = parsed.path_segments().unwrap().next()
.expect("iterator always contains at least one string (which may be empty)");

if !container.is_empty() {
self.container_name = Some(validate(container)?);
}

self.use_fabric_endpoint = true.into();
}
}
Some((a, "dfs.fabric.microsoft.com")) | Some((a, "blob.fabric.microsoft.com")) => {
self.account_name = Some(validate(a)?);
// Attempt to infer the container name from the URL
Comment thread
SmritiAgrawal04 marked this conversation as resolved.
// - https://onelake.dfs.fabric.microsoft.com/<workspaceGUID>/<itemGUID>/Files/test.csv
// - https://onelake.dfs.fabric.microsoft.com/<workspace>/<item>.<itemtype>/<path>/<fileName>
//
// See <https://learn.microsoft.com/en-us/fabric/onelake/onelake-access-api>
let workspace = parsed.path_segments().unwrap().next().expect(
"iterator always contains at least one string (which may be empty)",
);
if !workspace.is_empty() {
self.container_name = Some(workspace.to_string())

// Azure Storage public
Some((a, "dfs.core.windows.net")) | Some((a, "blob.core.windows.net")) => {
self.account_name = Some(validate(a)?);

// Attempt to infer the container name from the URL
let container = parsed.path_segments().unwrap().next()
.expect("iterator always contains at least one string (which may be empty)");

if !container.is_empty() {
self.container_name = Some(validate(container)?);
}
}
self.use_fabric_endpoint = true.into();

// Fabric endpoints
Some((a, "dfs.fabric.microsoft.com")) | Some((a, "blob.fabric.microsoft.com")) => {
self.account_name = Some(validate(a)?);

let workspace = parsed.path_segments().unwrap().next()
.expect("iterator always contains at least one string (which may be empty)");

if !workspace.is_empty() {
self.container_name = Some(workspace.to_string());
}

self.use_fabric_endpoint = true.into();
}

_ => return Err(Error::UrlNotRecognised { url: url.into() }.into()),
}
_ => return Err(Error::UrlNotRecognised { url: url.into() }.into()),
},
scheme => {
let scheme = scheme.into();
Expand Down Expand Up @@ -1165,7 +1203,7 @@ mod tests {
assert_eq!(builder.account_name, Some("account".to_string()));
assert_eq!(builder.container_name, None);
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://account.dfs.fabric.microsoft.com/container")
Expand All @@ -1174,6 +1212,14 @@ mod tests {
assert_eq!(builder.container_name.as_deref(), Some("container"));
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://onelake.dfs.fabric.microsoft.com/c047b3e3-4e89-407a-98d7-cf9949ae92a3/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456.lakehouse/Files/tables/sales/data.parquet")
.unwrap();
assert_eq!(builder.account_name, Some("onelake".to_string()));
assert_eq!(builder.container_name.as_deref(), Some("c047b3e3-4e89-407a-98d7-cf9949ae92a3"));
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://account.blob.fabric.microsoft.com/")
Expand All @@ -1190,14 +1236,46 @@ mod tests {
assert_eq!(builder.container_name.as_deref(), Some("container"));
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://Ab000000000000000000000000000000.zAb.dfs.fabric.microsoft.com/")
.unwrap();
assert_eq!(builder.account_name, Some("ab000000000000000000000000000000.zab".to_string()));
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the account name really supposed to have the ab000000000000000000000000000000 in it? It seems confusing that the container name is also ab000000000000000000000000000000

assert_eq!(builder.container_name.as_deref(), None);
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://ab000000000000000000000000000000.zab.dfs.fabric.microsoft.com/")
.unwrap();
assert_eq!(builder.account_name, Some("ab000000000000000000000000000000.zab".to_string()));
assert_eq!(builder.container_name.as_deref(), None);
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://c047b3e34e89407a98d7cf9949ae92a3.zc0.blob.fabric.microsoft.com/c047b3e3-4e89-407a-98d7-cf9949ae92a3/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456/file")
.unwrap();
assert_eq!(builder.account_name, Some("c047b3e34e89407a98d7cf9949ae92a3.zc0".to_string()));
assert_eq!(builder.container_name.as_deref(), Some("c047b3e3-4e89-407a-98d7-cf9949ae92a3"));
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://c047b3e34e89407a98d7cf9949ae92a3.zc0.onelake.fabric.microsoft.com/c047b3e3-4e89-407a-98d7-cf9949ae92a3/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456/file")
.unwrap();
assert_eq!(builder.account_name, Some("c047b3e34e89407a98d7cf9949ae92a3.zc0".to_string()));
assert_eq!(builder.container_name.as_deref(), Some("c047b3e3-4e89-407a-98d7-cf9949ae92a3"));
assert!(builder.use_fabric_endpoint.get().unwrap());

let err_cases = [
"mailto://account.blob.core.windows.net/",
"az://blob.mydomain/",
"abfs://container.foo/path",
"abfss://file_system@account.foo.dfs.core.windows.net/",
"abfss://file_system.bar@account.dfs.core.windows.net/",
"https://blob.mydomain/",
"https://blob.foo.dfs.core.windows.net/",
"https://blob.foo.dfs.core.windows.net/"
];
let mut builder = MicrosoftAzureBuilder::new();
for case in err_cases {
Expand Down Expand Up @@ -1256,4 +1334,4 @@ mod tests {
panic!("{key} not propagated as ClientConfigKey");
}
}
}
}