From 1e9506720101616d02fbf00bbaa202979a5a92fd Mon Sep 17 00:00:00 2001 From: Adinata Wijaya Date: Wed, 6 Aug 2025 11:20:25 +0200 Subject: [PATCH 1/8] - upgrade a few dependencies --- Cargo.toml | 10 +++++----- README.md | 5 +++++ src/repo_tools/mod.rs | 3 ++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 43136b9..ce42b57 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lfspull" -version = "0.3.1" +version = "0.4.0" edition = "2021" license = "MIT" authors = ["Volume Graphics GmbH"] @@ -11,9 +11,9 @@ description = "A simple git lfs file pulling implementation in pure rust. Can on [dependencies] clap = { version = "4.1", features = ["derive", "env"] } -thiserror = "1" -reqwest = { version="0.11" , features = ["json", "stream"] } -http = "0.2" +thiserror = "2" +reqwest = { version="0.12" , features = ["json", "stream"] } +http = "1.3" serde = {version ="1.0", features=['derive']} serde_json = "1.0" bytes = "1.4" @@ -30,7 +30,7 @@ futures-util = "0.3.30" tempfile = "3.12" [dev-dependencies] -cucumber = "0.19.1" +cucumber = "0.21" tokio = { version = "1", features = ["macros", "rt-multi-thread", "time"] } uuid = { version = "1.2", features = ["serde", "v4"] } diff --git a/README.md b/README.md index 1f5737c..10cdda4 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,11 @@ Please see our docs.rs for example code and the gherkin tests for how to check t ## Changelog +### 0.4.0 + +- upgrade a few dependencies +- add retry fetching from git + ### 0.3.1 - fix bug when trying to rename temp file to cache file, but cache file is already created and locked by other parallel job diff --git a/src/repo_tools/mod.rs b/src/repo_tools/mod.rs index e8eb5d4..4f023d1 100644 --- a/src/repo_tools/mod.rs +++ b/src/repo_tools/mod.rs @@ -31,7 +31,7 @@ async fn get_real_repo_root>(repo_path: P) -> Result>( access_token: Option<&str>, randomizer_bytes: Option, ) -> Result<(PathBuf, FilePullMode), LFSError> { + debug!("version: {}", &metadata.version); let cache_dir = get_cache_dir(&repo_root, metadata).await?; debug!("cache dir {:?}", &cache_dir); let cache_file = cache_dir.join(&metadata.oid); From c4aeb610eed6203baf70c4ff3f88ccfd83c0ebc2 Mon Sep 17 00:00:00 2001 From: Adinata Wijaya Date: Thu, 7 Aug 2025 07:36:28 +0200 Subject: [PATCH 2/8] - use reqwest middleware and retry --- Cargo.toml | 4 +++- src/lib.rs | 3 +++ src/repo_tools/primitives.rs | 21 ++++++++++++++++++++- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ce42b57..0e1bd6e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,9 @@ description = "A simple git lfs file pulling implementation in pure rust. Can on [dependencies] clap = { version = "4.1", features = ["derive", "env"] } thiserror = "2" -reqwest = { version="0.12" , features = ["json", "stream"] } +reqwest = { version="0.12" , features = ["stream"] } +reqwest-retry = { version="0.7" } +reqwest-middleware = { version="0.4" , features = ["json"] } http = "1.3" serde = {version ="1.0", features=['derive']} serde_json = "1.0" diff --git a/src/lib.rs b/src/lib.rs index 46f647d..f244aa0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -53,6 +53,9 @@ pub mod prelude { /// Forward from the `reqwest` package, something failed while executing the fetch #[error("Request-error: {0}")] RequestError(#[from] reqwest::Error), + /// Forward from the `reqwest-middleware` package, something failed while executing the fetch + #[error("Request-middleware-error: {0}")] + RequestMiddlewareError(#[from] reqwest_middleware::Error), /// You tried to pull a non-existing file from the remote #[error("Remote file not found: {0}")] RemoteFileNotFound(&'static str), diff --git a/src/repo_tools/primitives.rs b/src/repo_tools/primitives.rs index 3928c14..ffa5b40 100644 --- a/src/repo_tools/primitives.rs +++ b/src/repo_tools/primitives.rs @@ -2,6 +2,8 @@ use crate::prelude::*; use futures_util::stream::StreamExt; use http::StatusCode; use reqwest::Client; +use reqwest_middleware::ClientBuilder; +use reqwest_retry::{policies::ExponentialBackoff, Jitter, RetryTransientMiddleware}; use serde::{Deserialize, Serialize}; use serde_json::json; use sha2::{Digest, Sha256}; @@ -10,6 +12,7 @@ use std::convert::TryInto; use std::io::Write; use std::path::Path; use std::path::PathBuf; +use std::time::Duration; use tempfile::NamedTempFile; use tokio::fs; use tokio::io::AsyncReadExt; @@ -126,7 +129,7 @@ pub async fn download_file( randomizer_bytes: Option, ) -> Result { const MEDIA_TYPE: &str = "application/vnd.git-lfs+json"; - let client = Client::builder().build()?; + assert_eq!(meta_data.hash, Some(Hash::SHA256)); // we are implementing git-lfs batch API here: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md let request = json!({ @@ -137,8 +140,23 @@ pub async fn download_file( "hash_algo": "sha256" }); + let retry_policy = ExponentialBackoff::builder() + .retry_bounds(Duration::from_secs(1), Duration::from_secs(10)) + .base(1) + .jitter(Jitter::None) + .build_with_max_retries(3); + + debug!("Retry policy: {:?}", retry_policy); + + let client = Client::builder().build()?; + let client = ClientBuilder::new(client) + // Retry failed requests. + .with(RetryTransientMiddleware::new_with_policy(retry_policy)) + .build(); + let request_url = repo_remote_url.to_owned() + "/info/lfs/objects/batch"; let request_url = url_with_auth(&request_url, access_token)?; + let response = client .post(request_url.clone()) .header("Accept", MEDIA_TYPE) @@ -146,6 +164,7 @@ pub async fn download_file( .json(&request) .send() .await?; + if !response.status().is_success() { let status = response.status(); println!( From c72a75c259f55f96ce1f943fdc876851c15b4fa8 Mon Sep 17 00:00:00 2001 From: Adinata Wijaya Date: Thu, 7 Aug 2025 09:47:47 +0200 Subject: [PATCH 3/8] - add reqwest_tracing --- Cargo.toml | 3 ++- src/repo_tools/primitives.rs | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0e1bd6e..59591c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,8 +13,9 @@ description = "A simple git lfs file pulling implementation in pure rust. Can on clap = { version = "4.1", features = ["derive", "env"] } thiserror = "2" reqwest = { version="0.12" , features = ["stream"] } -reqwest-retry = { version="0.7" } +reqwest-retry = { version="0.7", features = ["tracing"] } reqwest-middleware = { version="0.4" , features = ["json"] } +reqwest-tracing = "0.5" http = "1.3" serde = {version ="1.0", features=['derive']} serde_json = "1.0" diff --git a/src/repo_tools/primitives.rs b/src/repo_tools/primitives.rs index ffa5b40..eb04638 100644 --- a/src/repo_tools/primitives.rs +++ b/src/repo_tools/primitives.rs @@ -13,6 +13,7 @@ use std::io::Write; use std::path::Path; use std::path::PathBuf; use std::time::Duration; +use reqwest_tracing::TracingMiddleware; use tempfile::NamedTempFile; use tokio::fs; use tokio::io::AsyncReadExt; @@ -150,6 +151,7 @@ pub async fn download_file( let client = Client::builder().build()?; let client = ClientBuilder::new(client) + .with(TracingMiddleware::default()) // Retry failed requests. .with(RetryTransientMiddleware::new_with_policy(retry_policy)) .build(); From ad9defb540aa75d113fb1eb7692665bcd1356c16 Mon Sep 17 00:00:00 2001 From: Adinata Wijaya Date: Thu, 7 Aug 2025 10:43:09 +0200 Subject: [PATCH 4/8] - add arguments for max-retry-attempt --- README.md | 2 +- src/main.rs | 17 +++++++++++++---- src/repo_tools/mod.rs | 29 +++++++++++++++++++++++------ src/repo_tools/primitives.rs | 7 ++++--- tests/lfspull.rs | 4 ++-- 5 files changed, 43 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 10cdda4..5b6d6bf 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Please see our docs.rs for example code and the gherkin tests for how to check t ### 0.4.0 - upgrade a few dependencies -- add retry fetching from git +- add retry attempt when failing fetching from git ### 0.3.1 diff --git a/src/main.rs b/src/main.rs index 0e276d0..b5b474d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -22,6 +22,10 @@ struct Args { #[clap(short = 'b', long)] random_bytes: Option, + ///max number of retry attempt when http request fails + #[clap(short, long, default_value_t = 3)] + max_retry: u32, + /// Print debug information #[clap(short, long)] verbose: bool, @@ -48,14 +52,19 @@ pub async fn main() -> Result<(), LFSError> { let access_token = args.access_token.as_deref(); if let Some(file) = args.file_to_pull { info!("Single file mode: {}", file.to_string_lossy()); - let result = lfspull::pull_file(file, access_token, args.random_bytes).await?; + let result = + lfspull::pull_file(file, access_token, args.max_retry, args.random_bytes).await?; info!("Result: {}", result); } if let Some(recurse_pattern) = args.recurse_pattern { info!("Glob-recurse mode: {}", &recurse_pattern); - let results = - lfspull::glob_recurse_pull_directory(&recurse_pattern, access_token, args.random_bytes) - .await?; + let results = lfspull::glob_recurse_pull_directory( + &recurse_pattern, + access_token, + args.max_retry, + args.random_bytes, + ) + .await?; info!("Pulling finished! Listing files and sources: "); results.into_iter().enumerate().for_each(|(id, (n, r))| { diff --git a/src/repo_tools/mod.rs b/src/repo_tools/mod.rs index 4f023d1..9ac54e0 100644 --- a/src/repo_tools/mod.rs +++ b/src/repo_tools/mod.rs @@ -99,6 +99,7 @@ async fn get_file_cached>( repo_root: P, metadata: &primitives::MetaData, access_token: Option<&str>, + max_retry: u32, randomizer_bytes: Option, ) -> Result<(PathBuf, FilePullMode), LFSError> { debug!("version: {}", &metadata.version); @@ -119,8 +120,14 @@ async fn get_file_cached>( ) })?; - let temp_file = - primitives::download_file(metadata, &repo_url, access_token, randomizer_bytes).await?; + let temp_file = primitives::download_file( + metadata, + &repo_url, + access_token, + max_retry, + randomizer_bytes, + ) + .await?; if cache_file.exists() { info!( "cache file {:?} is already written from other process", @@ -160,6 +167,7 @@ async fn get_file_cached>( pub async fn pull_file>( lfs_file: P, access_token: Option<&str>, + max_retry: u32, randomizer_bytes: Option, ) -> Result { info!("Pulling file {}", lfs_file.as_ref().to_string_lossy()); @@ -177,8 +185,14 @@ pub async fn pull_file>( let repo_root = get_repo_root(&lfs_file).await.map_err(|e| { LFSError::DirectoryTraversalError(format!("Could not find git repo root: {:?}", e)) })?; - let (file_name_cached, origin) = - get_file_cached(&repo_root, &metadata, access_token, randomizer_bytes).await?; + let (file_name_cached, origin) = get_file_cached( + &repo_root, + &metadata, + access_token, + max_retry, + randomizer_bytes, + ) + .await?; info!( "Found file (Origin: {:?}), linking to {}", origin, @@ -213,18 +227,21 @@ fn glob_recurse(wildcard_pattern: &str) -> Result, LFSError> { /// /// * `access_token` - the token for Bearer-Auth via HTTPS /// +/// * `max retry` - max number of retry attempt when http request fails +/// /// * `randomizer bytes` - bytes used to create a randomized named temp file /// /// # Examples /// /// Load all .jpg files from all subdirectories /// ```no_run -/// let result = lfspull::glob_recurse_pull_directory("dir/to/pull/**/*.jpg", Some("secret-token"), Some(5)); +/// let result = lfspull::glob_recurse_pull_directory("dir/to/pull/**/*.jpg", Some("secret-token"), 3, Some(5)); /// ``` /// pub async fn glob_recurse_pull_directory( wildcard_pattern: &str, access_token: Option<&str>, + max_retry: u32, randomizer_bytes: Option, ) -> Result, LFSError> { let mut result_vec = Vec::new(); @@ -232,7 +249,7 @@ pub async fn glob_recurse_pull_directory( for path in files { result_vec.push(( path.to_string_lossy().to_string(), - pull_file(&path, access_token, randomizer_bytes).await?, + pull_file(&path, access_token, max_retry, randomizer_bytes).await?, )); } diff --git a/src/repo_tools/primitives.rs b/src/repo_tools/primitives.rs index eb04638..6f81052 100644 --- a/src/repo_tools/primitives.rs +++ b/src/repo_tools/primitives.rs @@ -4,6 +4,7 @@ use http::StatusCode; use reqwest::Client; use reqwest_middleware::ClientBuilder; use reqwest_retry::{policies::ExponentialBackoff, Jitter, RetryTransientMiddleware}; +use reqwest_tracing::TracingMiddleware; use serde::{Deserialize, Serialize}; use serde_json::json; use sha2::{Digest, Sha256}; @@ -13,7 +14,6 @@ use std::io::Write; use std::path::Path; use std::path::PathBuf; use std::time::Duration; -use reqwest_tracing::TracingMiddleware; use tempfile::NamedTempFile; use tokio::fs; use tokio::io::AsyncReadExt; @@ -127,6 +127,7 @@ pub async fn download_file( meta_data: &MetaData, repo_remote_url: &str, access_token: Option<&str>, + max_retry: u32, randomizer_bytes: Option, ) -> Result { const MEDIA_TYPE: &str = "application/vnd.git-lfs+json"; @@ -145,7 +146,7 @@ pub async fn download_file( .retry_bounds(Duration::from_secs(1), Duration::from_secs(10)) .base(1) .jitter(Jitter::None) - .build_with_max_retries(3); + .build_with_max_retries(max_retry); debug!("Retry policy: {:?}", retry_policy); @@ -335,7 +336,7 @@ size 226848"#; #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn try_pull_from_demo_repo() { let parsed = parse_lfs_string(LFS_TEST_DATA).expect("Could not parse demo-string!"); - let temp_file = download_file(&parsed, URL, None, None) + let temp_file = download_file(&parsed, URL, None, 3, None) .await .expect("could not download file"); let temp_size = temp_file diff --git a/tests/lfspull.rs b/tests/lfspull.rs index 72c2970..51ebeaf 100644 --- a/tests/lfspull.rs +++ b/tests/lfspull.rs @@ -55,7 +55,7 @@ async fn pull_file_step(world: &mut LFSWorld) { .clone() .join(TEST_LFS_FILE_NAME); world.pull_result = Some( - lfspull::pull_file(file_path, None, Some(5)) + lfspull::pull_file(file_path, None, 3, Some(5)) .await .expect("Could not pull file"), ); @@ -65,7 +65,7 @@ async fn pull_file_step(world: &mut LFSWorld) { async fn pull_directory(world: &mut LFSWorld) { let fake_repo = world.current_fake_repo.as_ref().unwrap().to_string_lossy(); let pattern = format!("{}/**/*", fake_repo); - let recurse_pull = lfspull::glob_recurse_pull_directory(&pattern, None, Some(5)) + let recurse_pull = lfspull::glob_recurse_pull_directory(&pattern, None, 3, Some(5)) .await .expect("Could not pull directory") .into_iter() From 2e53b60b5ec80fe6e76b6d252d6e0b01dd1b9ec2 Mon Sep 17 00:00:00 2001 From: Adinata Wijaya Date: Tue, 19 Aug 2025 15:08:18 +0200 Subject: [PATCH 5/8] - remove reqwest middleware and co crates. - implements our own retry --- Cargo.toml | 5 +--- src/lib.rs | 6 ++-- src/repo_tools/primitives.rs | 54 ++++++++++++++++++++---------------- 3 files changed, 34 insertions(+), 31 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 59591c4..ce42b57 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,10 +12,7 @@ description = "A simple git lfs file pulling implementation in pure rust. Can on [dependencies] clap = { version = "4.1", features = ["derive", "env"] } thiserror = "2" -reqwest = { version="0.12" , features = ["stream"] } -reqwest-retry = { version="0.7", features = ["tracing"] } -reqwest-middleware = { version="0.4" , features = ["json"] } -reqwest-tracing = "0.5" +reqwest = { version="0.12" , features = ["json", "stream"] } http = "1.3" serde = {version ="1.0", features=['derive']} serde_json = "1.0" diff --git a/src/lib.rs b/src/lib.rs index f244aa0..d0ebc87 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -53,9 +53,6 @@ pub mod prelude { /// Forward from the `reqwest` package, something failed while executing the fetch #[error("Request-error: {0}")] RequestError(#[from] reqwest::Error), - /// Forward from the `reqwest-middleware` package, something failed while executing the fetch - #[error("Request-middleware-error: {0}")] - RequestMiddlewareError(#[from] reqwest_middleware::Error), /// You tried to pull a non-existing file from the remote #[error("Remote file not found: {0}")] RemoteFileNotFound(&'static str), @@ -84,6 +81,9 @@ pub mod prelude { /// something failed while creating tempfile #[error("TempFile error: {0}")] TempFile(String), + /// all download attempts have failed + #[error("Maximum download attempts reached")] + ReachedMaxDownloadAttempt, } } pub use prelude::FilePullMode; diff --git a/src/repo_tools/primitives.rs b/src/repo_tools/primitives.rs index 6f81052..a1b5932 100644 --- a/src/repo_tools/primitives.rs +++ b/src/repo_tools/primitives.rs @@ -2,9 +2,6 @@ use crate::prelude::*; use futures_util::stream::StreamExt; use http::StatusCode; use reqwest::Client; -use reqwest_middleware::ClientBuilder; -use reqwest_retry::{policies::ExponentialBackoff, Jitter, RetryTransientMiddleware}; -use reqwest_tracing::TracingMiddleware; use serde::{Deserialize, Serialize}; use serde_json::json; use sha2::{Digest, Sha256}; @@ -17,6 +14,7 @@ use std::time::Duration; use tempfile::NamedTempFile; use tokio::fs; use tokio::io::AsyncReadExt; +use tokio::time::sleep; use tracing::{debug, error, info}; use url::Url; use vg_errortools::{fat_io_wrap_tokio, FatIOError}; @@ -123,15 +121,14 @@ fn url_with_auth(url: &str, access_token: Option<&str>) -> Result Ok(url) } -pub async fn download_file( +pub async fn handle_download( meta_data: &MetaData, repo_remote_url: &str, access_token: Option<&str>, - max_retry: u32, randomizer_bytes: Option, ) -> Result { const MEDIA_TYPE: &str = "application/vnd.git-lfs+json"; - + let client = Client::builder().build()?; assert_eq!(meta_data.hash, Some(Hash::SHA256)); // we are implementing git-lfs batch API here: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md let request = json!({ @@ -142,24 +139,8 @@ pub async fn download_file( "hash_algo": "sha256" }); - let retry_policy = ExponentialBackoff::builder() - .retry_bounds(Duration::from_secs(1), Duration::from_secs(10)) - .base(1) - .jitter(Jitter::None) - .build_with_max_retries(max_retry); - - debug!("Retry policy: {:?}", retry_policy); - - let client = Client::builder().build()?; - let client = ClientBuilder::new(client) - .with(TracingMiddleware::default()) - // Retry failed requests. - .with(RetryTransientMiddleware::new_with_policy(retry_policy)) - .build(); - let request_url = repo_remote_url.to_owned() + "/info/lfs/objects/batch"; let request_url = url_with_auth(&request_url, access_token)?; - let response = client .post(request_url.clone()) .header("Accept", MEDIA_TYPE) @@ -167,10 +148,9 @@ pub async fn download_file( .json(&request) .send() .await?; - if !response.status().is_success() { let status = response.status(); - println!( + error!( "Failed to request git lfs actions with status code {} and body {}", status, response.text().await?, @@ -259,6 +239,32 @@ pub async fn download_file( } } +pub async fn download_file( + meta_data: &MetaData, + repo_remote_url: &str, + access_token: Option<&str>, + max_retry: u32, + randomizer_bytes: Option, +) -> Result { + for attempt in 1..=max_retry { + debug!("Download attempt {attempt}"); + match handle_download(meta_data, repo_remote_url, access_token, randomizer_bytes).await { + Ok(tempfile) => { + return Ok(tempfile); + } + Err(e) => { + if matches!(e, LFSError::AccessDenied) { + return Err(e); + } + error!("Download error: {e}. Attempting another download: {attempt}"); + sleep(Duration::from_secs(1)).await; + } + } + } + + Err(LFSError::ReachedMaxDownloadAttempt) +} + pub async fn is_lfs_node_file>(path: P) -> Result { if path.as_ref().is_dir() { return Ok(false); From 3abaa7fe326d91f0b5349e3cc023e2d8f9a3d33d Mon Sep 17 00:00:00 2001 From: "adinata.wijaya" Date: Fri, 12 Dec 2025 15:00:15 +0100 Subject: [PATCH 6/8] add overwriteable cache dir/lf storage dir --- src/repo_tools/mod.rs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/repo_tools/mod.rs b/src/repo_tools/mod.rs index e8eb5d4..95247f2 100644 --- a/src/repo_tools/mod.rs +++ b/src/repo_tools/mod.rs @@ -4,6 +4,7 @@ mod primitives; use futures_util::TryFutureExt; use glob::glob; use primitives::get_repo_root; +use std::env; use std::path::{Path, PathBuf}; use tokio::fs; use tracing::{debug, error, info}; @@ -86,13 +87,17 @@ async fn get_cache_dir>( let oid_1 = &metadata.oid[0..2]; let oid_2 = &metadata.oid[2..4]; - Ok(get_real_repo_root(repo_root) - .await? - .join(".git") - .join("lfs") - .join("objects") - .join(oid_1) - .join(oid_2)) + let lfs_object_dir = if let Ok(value) = env::var("LFS_STORAGE_DIR") { + debug!("get from env var {}", &value); + PathBuf::from(value) + } else { + get_real_repo_root(repo_root) + .await? + .join(".git") + .join("lfs") + }; + + Ok(lfs_object_dir.join("objects").join(oid_1).join(oid_2)) } async fn get_file_cached>( From 38053de595dea84b158853a3c965486272a4c57c Mon Sep 17 00:00:00 2001 From: "adinata.wijaya" Date: Mon, 22 Dec 2025 14:59:24 +0100 Subject: [PATCH 7/8] - add rust-toolchain 1.88 - read git config for lfs storage path --- Cargo.toml | 2 +- README.md | 5 +++ rust-toolchain.toml | 3 ++ src/repo_tools/mod.rs | 61 +++++++++++++++++++++++++----------- src/repo_tools/primitives.rs | 2 +- 5 files changed, 52 insertions(+), 21 deletions(-) create mode 100644 rust-toolchain.toml diff --git a/Cargo.toml b/Cargo.toml index ce42b57..c53e70f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lfspull" -version = "0.4.0" +version = "0.4.1" edition = "2021" license = "MIT" authors = ["Volume Graphics GmbH"] diff --git a/README.md b/README.md index 5b6d6bf..21baeeb 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,11 @@ Please see our docs.rs for example code and the gherkin tests for how to check t ## Changelog +### 0.4.1 + +- add rust-toolchain 1.88 +- read git config for lfs storage path + ### 0.4.0 - upgrade a few dependencies diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..7e8a18c --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +channel = "1.88.0" +components = ["clippy"] diff --git a/src/repo_tools/mod.rs b/src/repo_tools/mod.rs index 57208b0..eaa3f46 100644 --- a/src/repo_tools/mod.rs +++ b/src/repo_tools/mod.rs @@ -4,15 +4,15 @@ mod primitives; use futures_util::TryFutureExt; use glob::glob; use primitives::get_repo_root; -use std::env; use std::path::{Path, PathBuf}; use tokio::fs; -use tracing::{debug, error, info}; +use tokio::fs::read_to_string; +use tracing::{debug, error, info, warn}; use url::Url; use vg_errortools::{fat_io_wrap_tokio, FatIOError}; async fn get_remote_url_from_file(git_file: impl AsRef) -> Result { - let file_buffer = fat_io_wrap_tokio(git_file, fs::read_to_string).await?; + let file_buffer = fat_io_wrap_tokio(git_file, read_to_string).await?; let remote_url = file_buffer .lines() .find(|&line| line.contains("url")) @@ -21,7 +21,7 @@ async fn get_remote_url_from_file(git_file: impl AsRef) -> Result Result { .host_str() .ok_or(LFSError::InvalidFormat("Url had no valid host"))?; let path = input_url.path(); - Ok(format!("https://{}{}", host, path)) + Ok(format!("https://{host}{path}")) } async fn get_cache_dir>( @@ -87,17 +87,39 @@ async fn get_cache_dir>( let oid_1 = &metadata.oid[0..2]; let oid_2 = &metadata.oid[2..4]; - let lfs_object_dir = if let Ok(value) = env::var("LFS_STORAGE_DIR") { - debug!("get from env var {}", &value); - PathBuf::from(value) - } else { - get_real_repo_root(repo_root) - .await? - .join(".git") - .join("lfs") - }; + let mut git_folder = get_real_repo_root(repo_root).await?.join(".git"); + let config = git_folder.join("config"); + if config.exists() { + debug!("Read git config file in {}", config.to_string_lossy()); + let config_content = read_to_string(&config).await.unwrap_or_else(|e| { + warn!("Could not read git config: {e}"); + String::new() + }); + let mut config_content = config_content.lines().peekable(); + + while config_content.peek().is_some() { + let line = config_content.next().unwrap_or_default(); + let line = line.trim(); + if line.contains("[lfs]") { + while config_content.peek().is_some() { + let next_line = config_content.next().unwrap_or_default(); + let next_line = next_line.trim(); + if let Some(storage_url) = next_line.strip_prefix("storage = ") { + debug!("Found git lfs storage path: '{storage_url}'"); + git_folder = PathBuf::from(storage_url); + break; + } + } + break; + } + } + } - Ok(lfs_object_dir.join("objects").join(oid_1).join(oid_2)) + Ok(git_folder + .join("lfs") + .join("objects") + .join(oid_1) + .join(oid_2)) } async fn get_file_cached>( @@ -188,7 +210,7 @@ pub async fn pull_file>( let metadata = primitives::parse_lfs_file(&lfs_file).await?; debug!("Downloading file"); let repo_root = get_repo_root(&lfs_file).await.map_err(|e| { - LFSError::DirectoryTraversalError(format!("Could not find git repo root: {:?}", e)) + LFSError::DirectoryTraversalError(format!("Could not find git repo root: {e:?}")) })?; let (file_name_cached, origin) = get_file_cached( &repo_root, @@ -214,11 +236,11 @@ fn glob_recurse(wildcard_pattern: &str) -> Result, LFSError> { let mut return_vec = Vec::new(); let glob = glob(wildcard_pattern).map_err(|e| { - LFSError::DirectoryTraversalError(format!("Could not parse glob pattern: {}", e)) + LFSError::DirectoryTraversalError(format!("Could not parse glob pattern: {e}")) })?; for entry in glob { return_vec.push(entry.map_err(|e| { - LFSError::DirectoryTraversalError(format!("Error in glob result list: {}", e)) + LFSError::DirectoryTraversalError(format!("Error in glob result list: {e}")) })?); } Ok(return_vec) @@ -264,6 +286,7 @@ pub async fn glob_recurse_pull_directory( #[cfg(test)] mod tests { use super::*; + use crate::repo_tools::primitives::MetaData; use tracing::error; #[tokio::test(flavor = "multi_thread", worker_threads = 1)] diff --git a/src/repo_tools/primitives.rs b/src/repo_tools/primitives.rs index a1b5932..f83b564 100644 --- a/src/repo_tools/primitives.rs +++ b/src/repo_tools/primitives.rs @@ -158,7 +158,7 @@ pub async fn handle_download( return if status == StatusCode::FORBIDDEN || status == StatusCode::UNAUTHORIZED { Err(LFSError::AccessDenied) } else { - Err(LFSError::ResponseNotOkay(format!("{}", status))) + Err(LFSError::ResponseNotOkay(format!("{status}"))) }; } let parsed_result = response.json::().await?; From e19a5d2c360f5a18a693fceacac2ac21960da1d5 Mon Sep 17 00:00:00 2001 From: "adinata.wijaya" Date: Wed, 24 Dec 2025 10:09:19 +0100 Subject: [PATCH 8/8] - add timeout --- README.md | 5 +++ src/lib.rs | 3 ++ src/main.rs | 17 +++++++- src/repo_tools/mod.rs | 10 +++-- src/repo_tools/primitives.rs | 78 +++++++++++++++++++++++++++++++----- tests/lfspull.rs | 4 +- 6 files changed, 99 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 21baeeb..f1b58a7 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,10 @@ The CLI is pretty straight forward. - e.g. 'lfspull -r "**/*.tgz"' downloads all .tgz files this folder and all subfolders - '-b / --random-bytes [RANDOM_BYTES]' for temp file name. See https://docs.rs/tempfile/latest/tempfile/struct.Builder.html#method.rand_bytes - '-a / --access-token [TOKEN]' sets the token - can also be set via $ACCESS_TOKEN from env +- '-m / --max-retry [NUMBER]' max number of download attempts if fail +- '-t / --timeout [NUMBER]' set timeout in seconds for git lfs pull request + - When None given, the timeout is calculated automatically based on lfs object size + - When 0 given, there is no timeout - '-v' for verbose mode ## Library API guide @@ -36,6 +40,7 @@ Please see our docs.rs for example code and the gherkin tests for how to check t - add rust-toolchain 1.88 - read git config for lfs storage path +- add timeout ### 0.4.0 diff --git a/src/lib.rs b/src/lib.rs index d0ebc87..73fb7ea 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -84,6 +84,9 @@ pub mod prelude { /// all download attempts have failed #[error("Maximum download attempts reached")] ReachedMaxDownloadAttempt, + /// Timeout error + #[error("Download failed due to timeout")] + Timeout, } } pub use prelude::FilePullMode; diff --git a/src/main.rs b/src/main.rs index b5b474d..5db5a79 100644 --- a/src/main.rs +++ b/src/main.rs @@ -29,6 +29,12 @@ struct Args { /// Print debug information #[clap(short, long)] verbose: bool, + + ///timeout in seconds for git lfs pull request + ///When None given, the timeout is calculated automatically based on lfs object size + ///When 0 given, there is no timeout + #[clap(short, long)] + timeout: Option, } #[tokio::main] @@ -52,8 +58,14 @@ pub async fn main() -> Result<(), LFSError> { let access_token = args.access_token.as_deref(); if let Some(file) = args.file_to_pull { info!("Single file mode: {}", file.to_string_lossy()); - let result = - lfspull::pull_file(file, access_token, args.max_retry, args.random_bytes).await?; + let result = lfspull::pull_file( + file, + access_token, + args.max_retry, + args.random_bytes, + args.timeout, + ) + .await?; info!("Result: {}", result); } if let Some(recurse_pattern) = args.recurse_pattern { @@ -63,6 +75,7 @@ pub async fn main() -> Result<(), LFSError> { access_token, args.max_retry, args.random_bytes, + args.timeout, ) .await?; info!("Pulling finished! Listing files and sources: "); diff --git a/src/repo_tools/mod.rs b/src/repo_tools/mod.rs index eaa3f46..25c112e 100644 --- a/src/repo_tools/mod.rs +++ b/src/repo_tools/mod.rs @@ -128,6 +128,7 @@ async fn get_file_cached>( access_token: Option<&str>, max_retry: u32, randomizer_bytes: Option, + timeout: Option, ) -> Result<(PathBuf, FilePullMode), LFSError> { debug!("version: {}", &metadata.version); let cache_dir = get_cache_dir(&repo_root, metadata).await?; @@ -153,6 +154,7 @@ async fn get_file_cached>( access_token, max_retry, randomizer_bytes, + timeout, ) .await?; if cache_file.exists() { @@ -196,6 +198,7 @@ pub async fn pull_file>( access_token: Option<&str>, max_retry: u32, randomizer_bytes: Option, + timeout: Option, ) -> Result { info!("Pulling file {}", lfs_file.as_ref().to_string_lossy()); if !primitives::is_lfs_node_file(&lfs_file).await? { @@ -218,6 +221,7 @@ pub async fn pull_file>( access_token, max_retry, randomizer_bytes, + timeout, ) .await?; info!( @@ -262,7 +266,7 @@ fn glob_recurse(wildcard_pattern: &str) -> Result, LFSError> { /// /// Load all .jpg files from all subdirectories /// ```no_run -/// let result = lfspull::glob_recurse_pull_directory("dir/to/pull/**/*.jpg", Some("secret-token"), 3, Some(5)); +/// let result = lfspull::glob_recurse_pull_directory("dir/to/pull/**/*.jpg", Some("secret-token"), 3, Some(5), Some(0)); /// ``` /// pub async fn glob_recurse_pull_directory( @@ -270,13 +274,14 @@ pub async fn glob_recurse_pull_directory( access_token: Option<&str>, max_retry: u32, randomizer_bytes: Option, + timeout: Option, ) -> Result, LFSError> { let mut result_vec = Vec::new(); let files = glob_recurse(wildcard_pattern)?; for path in files { result_vec.push(( path.to_string_lossy().to_string(), - pull_file(&path, access_token, max_retry, randomizer_bytes).await?, + pull_file(&path, access_token, max_retry, randomizer_bytes, timeout).await?, )); } @@ -286,7 +291,6 @@ pub async fn glob_recurse_pull_directory( #[cfg(test)] mod tests { use super::*; - use crate::repo_tools::primitives::MetaData; use tracing::error; #[tokio::test(flavor = "multi_thread", worker_threads = 1)] diff --git a/src/repo_tools/primitives.rs b/src/repo_tools/primitives.rs index f83b564..bc56b97 100644 --- a/src/repo_tools/primitives.rs +++ b/src/repo_tools/primitives.rs @@ -14,7 +14,7 @@ use std::time::Duration; use tempfile::NamedTempFile; use tokio::fs; use tokio::io::AsyncReadExt; -use tokio::time::sleep; +use tokio::time::{sleep, timeout}; use tracing::{debug, error, info}; use url::Url; use vg_errortools::{fat_io_wrap_tokio, FatIOError}; @@ -245,26 +245,67 @@ pub async fn download_file( access_token: Option<&str>, max_retry: u32, randomizer_bytes: Option, + connection_timeout: Option, ) -> Result { + let effective_timeout = get_effective_timeout(connection_timeout, meta_data.size); for attempt in 1..=max_retry { debug!("Download attempt {attempt}"); - match handle_download(meta_data, repo_remote_url, access_token, randomizer_bytes).await { - Ok(tempfile) => { - return Ok(tempfile); - } - Err(e) => { - if matches!(e, LFSError::AccessDenied) { - return Err(e); + let download = handle_download(meta_data, repo_remote_url, access_token, randomizer_bytes); + let result = if let Some(seconds) = effective_timeout { + timeout(Duration::from_secs(seconds), download).await + } else { + Ok(download.await) + }; + + match result { + Ok(download_result) => match download_result { + Ok(tempfile) => { + return Ok(tempfile); + } + Err(e) => { + if matches!(e, LFSError::AccessDenied) { + return Err(e); + } + error!("Download error: {e}"); } - error!("Download error: {e}. Attempting another download: {attempt}"); - sleep(Duration::from_secs(1)).await; + }, + Err(timeout_err) => { + error!("Timeout reached: {timeout_err}"); } } + sleep(Duration::from_secs(1)).await; } Err(LFSError::ReachedMaxDownloadAttempt) } +/// Some(0) => no timeout +/// Some(x) => x seconds timeout +/// None => automatic +fn get_effective_timeout(timeout: Option, file_size_in_kb: usize) -> Option { + match timeout { + Some(0) => { + debug!("No timeout"); + None + } + Some(val) => { + debug!("Set timeout to {val} s"); + Some(val) + } + None => { + let min_upload_speed_mb_per_sec = 1.0; + let min_timeout_secs = 30; + let file_size_mb = file_size_in_kb as f64 / (1024.0 * 1024.0); + let timeout_secs = (file_size_mb / min_upload_speed_mb_per_sec).ceil() as u64; + let timeout_secs = timeout_secs.max(min_timeout_secs); + + debug!("Automatic calculated timeout: {timeout_secs} s"); + + Some(timeout_secs) + } + } +} + pub async fn is_lfs_node_file>(path: P) -> Result { if path.as_ref().is_dir() { return Ok(false); @@ -342,7 +383,7 @@ size 226848"#; #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn try_pull_from_demo_repo() { let parsed = parse_lfs_string(LFS_TEST_DATA).expect("Could not parse demo-string!"); - let temp_file = download_file(&parsed, URL, None, 3, None) + let temp_file = download_file(&parsed, URL, None, 3, None, Some(0)) .await .expect("could not download file"); let temp_size = temp_file @@ -376,4 +417,19 @@ size 226848"#; .expect("File was not readable"); assert!(!result); } + + #[tokio::test(flavor = "multi_thread", worker_threads = 1)] + async fn test_get_effective_timeout() { + let timeout = get_effective_timeout(Some(9), 1000); + assert_eq!(timeout, Some(9)); + + let timeout = get_effective_timeout(Some(0), 1000); + assert_eq!(timeout, None); + + let timeout = get_effective_timeout(None, 1000); + assert_eq!(timeout, Some(30)); + + let timeout = get_effective_timeout(None, 200000000); + assert_eq!(timeout, Some(191)); + } } diff --git a/tests/lfspull.rs b/tests/lfspull.rs index 51ebeaf..a3b884b 100644 --- a/tests/lfspull.rs +++ b/tests/lfspull.rs @@ -55,7 +55,7 @@ async fn pull_file_step(world: &mut LFSWorld) { .clone() .join(TEST_LFS_FILE_NAME); world.pull_result = Some( - lfspull::pull_file(file_path, None, 3, Some(5)) + lfspull::pull_file(file_path, None, 3, Some(5), Some(0)) .await .expect("Could not pull file"), ); @@ -65,7 +65,7 @@ async fn pull_file_step(world: &mut LFSWorld) { async fn pull_directory(world: &mut LFSWorld) { let fake_repo = world.current_fake_repo.as_ref().unwrap().to_string_lossy(); let pattern = format!("{}/**/*", fake_repo); - let recurse_pull = lfspull::glob_recurse_pull_directory(&pattern, None, 3, Some(5)) + let recurse_pull = lfspull::glob_recurse_pull_directory(&pattern, None, 3, Some(5), Some(0)) .await .expect("Could not pull directory") .into_iter()