diff --git a/Cargo.lock b/Cargo.lock index ee4d09c..a2e658d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1676,7 +1676,7 @@ dependencies = [ [[package]] name = "fula-api" -version = "0.5.0" +version = "0.5.2" dependencies = [ "anyhow", "axum", @@ -1704,7 +1704,7 @@ dependencies = [ [[package]] name = "fula-blockstore" -version = "0.5.0" +version = "0.5.2" dependencies = [ "anyhow", "async-trait", @@ -1742,7 +1742,7 @@ dependencies = [ [[package]] name = "fula-cli" -version = "0.5.0" +version = "0.5.2" dependencies = [ "anyhow", "async-trait", @@ -1794,7 +1794,7 @@ dependencies = [ [[package]] name = "fula-client" -version = "0.5.0" +version = "0.5.2" dependencies = [ "anyhow", "async-trait", @@ -1834,7 +1834,7 @@ dependencies = [ [[package]] name = "fula-core" -version = "0.5.0" +version = "0.5.2" dependencies = [ "anyhow", "async-trait", @@ -1869,7 +1869,7 @@ dependencies = [ [[package]] name = "fula-crypto" -version = "0.5.0" +version = "0.5.2" dependencies = [ "aes-gcm", "anyhow", @@ -1913,7 +1913,7 @@ dependencies = [ [[package]] name = "fula-flutter" -version = "0.5.0" +version = "0.5.2" dependencies = [ "anyhow", "async-lock", @@ -1936,7 +1936,7 @@ dependencies = [ [[package]] name = "fula-js" -version = "0.5.0" +version = "0.5.2" dependencies = [ "base64 0.22.1", "bytes", diff --git a/Cargo.toml b/Cargo.toml index 6514987..20521d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,7 +77,7 @@ name = "encrypted_upload_test" path = "examples/encrypted_upload_test.rs" [workspace.package] -version = "0.5.1" +version = "0.5.2" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/functionland/fula-api" diff --git a/crates/fula-client/src/block_cache.rs b/crates/fula-client/src/block_cache.rs index b2d6595..6348d04 100644 --- a/crates/fula-client/src/block_cache.rs +++ b/crates/fula-client/src/block_cache.rs @@ -90,6 +90,39 @@ const META_USERS_INDEX_CID: &[u8] = b"users_index/cid"; const META_USERS_INDEX_SEQUENCE: &[u8] = b"users_index/sequence"; const META_USERS_INDEX_OBSERVED_AT: &[u8] = b"users_index/observed_at_unix"; +/// Issue #8 fix #2 — list_buckets offline cache. +/// +/// The raw S3 list-buckets XML body stored after each successful +/// master-up call. Served back on `MasterUnreachable` so the SDK has +/// a working offline fallback for the top-level "what buckets exist" +/// query, the same way it does for object reads via the cid-hint +/// path. +/// +/// **Per-JWT scoping (security).** Each user's cached XML lives +/// under its own row keyed by `sha256(access_token)[..16]` (hex). +/// A shared cache file (rare in FxFiles which uses per-app sandbox, +/// but possible on multi-user devices) can hold cached responses +/// for many distinct accounts without one being able to serve +/// another's cached state. JWT rotation does invalidate the cache +/// (new token → new key), which is acceptable — a fresh master-up +/// call re-populates within seconds. +/// +/// FxFiles already implements a similar Dart-side `listBucketsCached` +/// shim — moving it into the SDK means every consumer benefits +/// without re-implementing it. +const META_LIST_BUCKETS_PREFIX: &str = "list_buckets/"; +const META_LIST_BUCKETS_XML_SUFFIX: &str = "/response_xml"; +const META_LIST_BUCKETS_OBSERVED_AT_SUFFIX: &str = "/observed_at_unix"; + +/// Build the per-scope METADATA keys for the list-buckets cache. +/// `scope` is `sha256(jwt)[..16]` hex (32 chars) — see +/// `FulaClient::list_buckets_cache_scope`. +fn list_buckets_keys(scope: &str) -> (Vec, Vec) { + let xml_key = format!("{}{}{}", META_LIST_BUCKETS_PREFIX, scope, META_LIST_BUCKETS_XML_SUFFIX); + let obs_key = format!("{}{}{}", META_LIST_BUCKETS_PREFIX, scope, META_LIST_BUCKETS_OBSERVED_AT_SUFFIX); + (xml_key.into_bytes(), obs_key.into_bytes()) +} + /// Eviction low-watermark: when triggered, free space until usage is at /// or below this fraction of `max_bytes`. 80 % is the industry-standard /// "evict-once-amortize-many-puts" point. @@ -537,6 +570,104 @@ impl BlockCache { ))) } + /// Issue #8 fix #2 — store the raw list-buckets XML body, scoped + /// to the JWT-derived `scope` key, so a subsequent master-down + /// call by the SAME user can serve it offline. + /// + /// `scope` is `sha256(access_token)[..16]` hex — see + /// `FulaClient::list_buckets_cache_scope`. Per-JWT scoping + /// prevents cross-account pollution on shared cache files: user + /// A's cached XML cannot be served to user B even if both share + /// the same redb file. + /// + /// Overwrites on each call (one row per scope). The freshness + /// timestamp is stored alongside so callers can surface staleness. + pub(crate) fn store_list_buckets_xml( + &self, + scope: &str, + xml: &str, + observed_at_unix: u64, + ) -> Result<(), BlockCacheError> { + let (xml_key, obs_key) = list_buckets_keys(scope); + let txn = self.inner.db.begin_write()?; + { + let mut table = txn.open_table(METADATA)?; + + // Prune stale scopes: when JWTs rotate (e.g., every few + // hours), each new token derives a new `scope` key. Without + // pruning, every prior scope's rows linger forever in the + // METADATA table (one user × 24 rotations/day × 1 KB ≈ + // 9 MB/year, never reclaimed since METADATA isn't LRU-evicted). + // + // Policy: keep only the current scope's two rows. Past + // scopes are unreachable anyway (no live JWT can derive + // them), so dropping them loses nothing. + // + // O(n) over METADATA where n is "JWT-rotation history" — + // bounded by usage pattern, but a few-hundred entries is + // negligible. + let prefix = META_LIST_BUCKETS_PREFIX.as_bytes(); + let mut to_remove: Vec> = Vec::new(); + for entry in table.iter()? { + let (k, _) = entry?; + let key_bytes = k.value(); + if key_bytes.starts_with(prefix) + && key_bytes != xml_key.as_slice() + && key_bytes != obs_key.as_slice() + { + to_remove.push(key_bytes.to_vec()); + } + } + for k in to_remove { + table.remove(k.as_slice())?; + } + + table.insert(xml_key.as_slice(), xml.as_bytes())?; + table.insert(obs_key.as_slice(), observed_at_unix.to_be_bytes().as_slice())?; + } + txn.commit()?; + Ok(()) + } + + /// Issue #8 fix #2 — load the previously-cached list-buckets XML + /// body scoped to `scope`. Returns `None` if no entry exists for + /// THIS JWT. Returns the observed-at-unix timestamp alongside so + /// the caller can surface staleness. + pub(crate) fn load_list_buckets_xml( + &self, + scope: &str, + ) -> Result, BlockCacheError> { + let (xml_key, obs_key) = list_buckets_keys(scope); + let read = self.inner.db.begin_read()?; + let table = read.open_table(METADATA)?; + let xml_bytes = match table.get(xml_key.as_slice())? { + Some(v) => v.value().to_vec(), + None => return Ok(None), + }; + let xml = match String::from_utf8(xml_bytes) { + Ok(s) => s, + Err(e) => { + tracing::warn!(error = %e, "list_buckets cache: invalid UTF-8; treating as empty"); + return Ok(None); + } + }; + let observed_at = match table.get(obs_key.as_slice())? { + Some(v) => { + let bytes = v.value(); + if bytes.len() != 8 { + tracing::warn!("list_buckets cache: malformed observed_at length; treating as 0"); + 0 + } else { + let mut buf = [0u8; 8]; + buf.copy_from_slice(bytes); + u64::from_be_bytes(buf) + } + } + None => 0, + }; + Ok(Some((xml, observed_at))) + } + /// Evict LRU entries until `current_bytes <= target_bytes`. Caller /// must hold `evict_lock`. Atomic via a single redb write txn. fn evict_to(&self, target_bytes: u64) -> Result<(), BlockCacheError> { diff --git a/crates/fula-client/src/client.rs b/crates/fula-client/src/client.rs index 829a75b..d5a10e8 100644 --- a/crates/fula-client/src/client.rs +++ b/crates/fula-client/src/client.rs @@ -112,12 +112,33 @@ impl FulaClient { None }; - // GatewayPool requires block_cache as a hard prereq: without - // a cached `(bucket, key) → cid` mapping the fallback path has - // no CID to fetch. If the cache failed to open we silently - // disable gateway fallback too. + // Issue #8 fix #4 — gateway_pool no longer cascades to None + // when block_cache is None. + // + // Old behavior: `gateway_fallback_enabled && block_cache.is_some()`. + // If the redb cache file failed to open (e.g., another + // EncryptedClient instance briefly held the lock during a + // FxFiles reinit), block_cache became None — and gateway_pool + // ALSO became None. The entire offline-fallback path was dead + // for the session, surfaced only by an easy-to-miss `warn!` + // line. + // + // New behavior: `gateway_fallback_enabled` alone gates the + // pool. The cid-hint variant of the offline fallback + // (`try_offline_fallback_with_cid_hint`) holds the CID + // externally (from the just-decrypted parent's `LinkV2` + // pointer or from `page_ref.cid`) — it does NOT need a cache + // to source the CID, only to (best-effort) memoize the + // fetched bytes. Letting the pool survive a cache-open + // failure means walkable-v8 offline reads still work in the + // degraded mode. + // + // The no-hint variant (`try_offline_fallback`) still requires + // a cache to translate `(bucket, key) → cid`; if cache is + // None there, it cleanly returns master_error without + // attempting the race. #[cfg(not(target_arch = "wasm32"))] - let gateway_pool = if config.gateway_fallback_enabled && block_cache.is_some() { + let gateway_pool = if config.gateway_fallback_enabled { let pool = if config.gateway_fallback_urls.is_empty() { GatewayPool::default_pool() } else { @@ -277,12 +298,105 @@ impl FulaClient { // ==================== Bucket Operations ==================== - /// List all buckets + /// List all buckets. + /// + /// Issue #8 fix #2 — when master is reachable, the XML response + /// body is mirrored into the local BlockCache's METADATA table so + /// a subsequent master-down call (DNS error, connection refused, + /// health-gate short-circuit, etc.) can serve from cache instead + /// of propagating the network error. + /// + /// **Per-JWT scoping.** Each user's cached XML lives under a row + /// keyed by `sha256(access_token)[..16]` so a shared cache file + /// cannot leak one account's bucket list to another. JWT rotation + /// invalidates the cache (new token → new scope key); the next + /// master-up call re-populates. + /// + /// **No access token configured (anonymous client).** Caching is + /// disabled — the scope key would be empty and a non-anonymous + /// caller could collide with it. + /// + /// Backward compatibility: master-up behavior is byte-identical + /// to pre-fix. The cache write is best-effort (a redb error logs + /// at `debug` and is dropped). #[instrument(skip(self))] pub async fn list_buckets(&self) -> Result { - let response = self.request("GET", "/", None, None, None).await?; + let response = match self.request("GET", "/", None, None, None).await { + Ok(r) => r, + #[cfg(not(target_arch = "wasm32"))] + Err(e) if is_master_unreachable_error(&e) => { + // Master is unreachable. Try the cached XML body for + // THIS user's scope. + if let (Some(cache), Some(scope)) = (&self.block_cache, self.list_buckets_cache_scope()) { + match cache.load_list_buckets_xml(&scope) { + Ok(Some((xml, _observed_at))) => { + debug!("list_buckets: master unreachable; serving from cache"); + return parse_list_buckets_response(&xml); + } + Ok(None) => { + debug!("list_buckets: master unreachable; no cached entry for this JWT"); + } + Err(cache_err) => { + debug!( + error = %cache_err, + "list_buckets: master unreachable AND cache read failed" + ); + } + } + } + return Err(e); + } + Err(e) => return Err(e), + }; let text = response.text().await?; - parse_list_buckets_response(&text) + let parsed = parse_list_buckets_response(&text)?; + + // Master-up success: mirror the body into the cache for + // future offline reads, scoped to this JWT. Best-effort. + #[cfg(not(target_arch = "wasm32"))] + if let (Some(cache), Some(scope)) = (&self.block_cache, self.list_buckets_cache_scope()) { + let observed_at = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + if let Err(e) = cache.store_list_buckets_xml(&scope, &text, observed_at) { + debug!( + error = %e, + "list_buckets: cache write failed (best-effort; master fetch already succeeded)" + ); + } + } + + Ok(parsed) + } + + /// Issue #8 fix #2 — derive the per-JWT scope key for the + /// list-buckets cache. + /// + /// `sha256(access_token)[..16]` (32 hex chars). Returns `None` + /// when no access token is configured (anonymous clients don't + /// cache list_buckets responses; they can still serve cached + /// reads via the cid-hint path which has its own integrity story). + /// + /// Security note: the cache key is derived from the bearer token + /// rather than the OAuth-derived stable user_id because the + /// stable id is in `EncryptionConfig` (not on `FulaClient`). + /// JWT rotation does invalidate the cache as a side effect, + /// which is acceptable — a fresh master-up call re-populates in + /// seconds. The threat we're closing is cross-account + /// pollution on shared cache files, which the JWT-bound key + /// prevents whether or not it's rotation-stable. + #[cfg(not(target_arch = "wasm32"))] + fn list_buckets_cache_scope(&self) -> Option { + use sha2::{Digest, Sha256}; + let token = self.config.access_token.as_deref()?; + if token.is_empty() { + return None; + } + let digest = Sha256::digest(token.as_bytes()); + // 16 hex chars = 8 bytes of entropy. Plenty against accidental + // collision; not a security-critical derivation. + Some(hex::encode(&digest[..8])) } /// Create a bucket @@ -379,8 +493,15 @@ impl FulaClient { } } + // Clone `data` for the post-PUT cache warm. `Bytes` is + // refcounted (Arc) so this is O(1). The PUT consumes its + // Bytes; the clone gives us bytes for `cache.put(&cid, &bytes)` + // after we've confirmed the PUT succeeded. + #[cfg(not(target_arch = "wasm32"))] + let body_for_cache = data.clone(); + let response = self.request("PUT", &path, None, Some(headers), Some(data)).await?; - + let etag = response .headers() .get("ETag") @@ -388,6 +509,13 @@ impl FulaClient { .map(|s| s.trim_matches('"').to_string()) .unwrap_or_default(); + // Issue #8 fix #3 — warm the local BLOCKS cache with the + // just-PUT bytes so a subsequent offline read can serve them + // without depending on a follow-up master-up read or on + // public-IPFS-DHT propagation of the freshly-pinned CID. + #[cfg(not(target_arch = "wasm32"))] + self.warm_block_cache_after_put(bucket, key, &etag, &body_for_cache).await; + Ok(PutObjectResult { etag, version_id: None, @@ -429,6 +557,10 @@ impl FulaClient { headers.insert("If-None-Match".to_string(), val); } + // Clone for the post-PUT cache warm (see put_object_with_metadata). + #[cfg(not(target_arch = "wasm32"))] + let body_for_cache = data.clone(); + let response = self.request("PUT", &path, None, Some(headers), Some(data)).await?; let etag = response @@ -438,6 +570,14 @@ impl FulaClient { .map(|s| s.trim_matches('"').to_string()) .unwrap_or_default(); + // Issue #8 fix #3 — warm BLOCKS for manifest pages, dir-index + // commits, and the Phase-2 root commit. These all flow through + // `put_object_with_metadata_conditional` directly (NOT through + // S3BlobBackend), so without warming here the offline path + // cannot serve them after a write. + #[cfg(not(target_arch = "wasm32"))] + self.warm_block_cache_after_put(bucket, key, &etag, &body_for_cache).await; + Ok(PutObjectResult { etag, version_id: None }) } @@ -467,8 +607,11 @@ impl FulaClient { headers.insert("X-Pinning-Service".to_string(), pinning_service.to_string()); headers.insert("X-Pinning-Token".to_string(), pinning_token.to_string()); + #[cfg(not(target_arch = "wasm32"))] + let body_for_cache = data.clone(); + let response = self.request("PUT", &path, None, Some(headers), Some(data)).await?; - + let etag = response .headers() .get("ETag") @@ -476,6 +619,9 @@ impl FulaClient { .map(|s| s.trim_matches('"').to_string()) .unwrap_or_default(); + #[cfg(not(target_arch = "wasm32"))] + self.warm_block_cache_after_put(bucket, key, &etag, &body_for_cache).await; + Ok(PutObjectResult { etag, version_id: None, @@ -499,7 +645,7 @@ impl FulaClient { let data = data.into(); let mut headers = HashMap::new(); - + // Add metadata headers if let Some(meta) = metadata { if let Some(ct) = meta.content_type { @@ -509,13 +655,17 @@ impl FulaClient { headers.insert(format!("x-amz-meta-{}", k), v); } } - + // Add pinning headers headers.insert("X-Pinning-Service".to_string(), pinning_service.to_string()); headers.insert("X-Pinning-Token".to_string(), pinning_token.to_string()); + // Clone for the post-PUT cache warm (see put_object_with_metadata). + #[cfg(not(target_arch = "wasm32"))] + let body_for_cache = data.clone(); + let response = self.request("PUT", &path, None, Some(headers), Some(data)).await?; - + let etag = response .headers() .get("ETag") @@ -523,12 +673,132 @@ impl FulaClient { .map(|s| s.trim_matches('"').to_string()) .unwrap_or_default(); + // Issue #8 fix #3 — warm BLOCKS for chunked uploads and any + // other path that routes through the pinning variant. + #[cfg(not(target_arch = "wasm32"))] + self.warm_block_cache_after_put(bucket, key, &etag, &body_for_cache).await; + Ok(PutObjectResult { etag, version_id: None, }) } + /// Issue #8 fix #3 — best-effort cache warm-up after a successful + /// PUT. + /// + /// Background: every encrypted write the SDK makes is also a + /// content-addressed pin. Pre-fix, those bytes lived only on + /// master (and master's downstream IPFS-cluster pin chain) — + /// the SDK's LOCAL `BlockCache` was populated exclusively by + /// successful master-UP READS. So the user-reported flow + /// "upload → reinit to offline → read" would walk through: + /// `request()` → health-gate `MasterUnreachable` short-circuit + /// → `try_offline_fallback*` → `cache.get(cid)` → MISS (writes + /// never populated BLOCKS) → gateway race → 404 (DHT propagation + /// hadn't reached public gateways yet for the freshly-pinned + /// CID) → propagate `Master unreachable`. See issue #8 for the + /// full trace. + /// + /// Fix: every `put_object_*` method on `FulaClient` (and + /// `S3BlobBackend::put` for HAMT internal nodes) calls this + /// helper after a successful PUT. The helper: + /// + /// 1. No-ops when `walkable_v8_writer_enabled` is off (write + /// semantics stay byte-identical to v0.5). + /// 2. No-ops when `block_cache` is None (offline-fallback + /// infrastructure not configured). + /// 3. Tries to parse the master ETag as a CID. Master v0.4.4+ + /// returns `BLAKE3(body)` as the ETag string. On parse + /// failure (legacy master, malformed response) the warm + /// silently skips — the PUT itself already succeeded. + /// 4. Best-effort `cache.put(&cid, body)` + `cache.record_key_cid`. + /// Failures (e.g., `BlockTooLarge` for a single body bigger + /// than the entire cache budget) log at debug and proceed — + /// the bytes are stored on master either way. + /// + /// Threat model: the helper writes ciphertext-as-given to the + /// local cache. The bytes are content-addressed by the parsed + /// CID; a malicious master that returned a wrong-CID ETag + /// would corrupt the SDK's own KEY_TO_CID mapping but cannot + /// inject foreign bytes via this path (the BLOCKS row is keyed + /// by the CID the SDK *parsed*, not the CID the bytes + /// content-address to). Subsequent offline reads through + /// `try_offline_fallback_with_cid_hint` would re-verify the + /// CID against the bytes via `verify_cid_against_bytes` if + /// they came from a gateway race — but a cache HIT skips that + /// check by design (we trust local cache contents). The + /// upgrade story: when `walkable_v8` is on, the same flag also + /// runs `verify_etag_matches_ciphertext` in `S3BlobBackend::put` + /// before stamping a `LinkV2.cid` — that's the authoritative + /// self-verify. This helper relies on that gate being on for + /// trustworthy cache writes. + #[cfg(not(target_arch = "wasm32"))] + async fn warm_block_cache_after_put( + &self, + bucket: &str, + key: &str, + etag: &str, + body: &Bytes, + ) { + // Walkable-v8 writer disabled → master might not return a + // parseable CID ETag (legacy v0.5 master, multipart upload + // returning an aggregate ETag, etc.). Skip entirely so write + // semantics stay byte-identical to v0.5. + if !self.config.walkable_v8_writer_enabled { + return; + } + let cache = match &self.block_cache { + Some(c) => c, + None => return, + }; + if etag.is_empty() { + return; + } + // SECURITY: Self-verify the master-attested CID against + // `BLAKE3(body)` BEFORE caching. Same gate `S3BlobBackend::put` + // uses for `LinkV2.cid` stamping (see encryption.rs:438-450). + // Without this check, a malicious or buggy master could return + // an arbitrary CID as the ETag and the cache row would be + // keyed by an attacker-influenced value, weakening the trust + // chain that the cid-hint variant of `try_offline_fallback` + // relies on (it skips `verify_cid_against_bytes` on BLOCKS + // hits, deliberately, because BLOCKS contents are SDK-internal). + // + // On mismatch, `verify_etag_matches_ciphertext` returns None + // with a rate-limited per-`(bucket, path)` warn — same + // soft-fail-to-None policy that S3BlobBackend::put already + // uses for its `LinkV2.cid` stamping. + let cid = match crate::walkable_v8::verify_etag_matches_ciphertext( + etag, body, bucket, key, + ) { + Some(c) => c, + None => return, + }; + // Best-effort writes. A BlockTooLarge error (single body + // bigger than the entire cache budget) is the only expected + // failure mode; everything else (redb I/O, lock contention) + // is treated identically — log at debug, proceed. + if let Err(e) = cache.put(&cid, body).await { + debug!( + error = %e, + cid = %cid, + bucket = %bucket, + key = %key, + "warm_block_cache_after_put: BLOCKS put failed (best-effort; PUT already succeeded)" + ); + } + if let Err(e) = cache.record_key_cid(bucket, key, &cid) { + debug!( + error = %e, + cid = %cid, + bucket = %bucket, + key = %key, + "warm_block_cache_after_put: KEY_TO_CID record failed (best-effort)" + ); + } + } + /// Get an object #[instrument(skip(self))] pub async fn get_object(&self, bucket: &str, key: &str) -> Result { @@ -921,41 +1191,51 @@ impl FulaClient { cid: &cid::Cid, master_error: ClientError, ) -> Result { - let (cache, pool) = match (&self.block_cache, &self.gateway_pool) { - (Some(c), Some(p)) => (c.clone(), p.clone()), - _ => return Err(master_error), + // Issue #8 fix #4 — the cid-hint variant works even when + // block_cache is None. The CID is supplied externally (from + // a just-decrypted parent's `LinkV2` pointer or from + // `page_ref.cid`), so we can race the gateway pool directly. + // Cache, when present, is used for BLOCKS short-circuit + post- + // fetch memoization; absent, those become no-ops. + let pool = match &self.gateway_pool { + Some(p) => p.clone(), + None => return Err(master_error), }; + let cache = self.block_cache.clone(); // BLOCKS hit — same short-circuit as warm-cache fallback. A // prior cold-start that raced this exact CID populated the // cache, so a follow-up read serves with no network at all. - if let Ok(Some(bytes)) = cache.get(cid) { - debug!(cid = %cid, "offline fallback (cid-hint): BLOCKS hit"); - // Best-effort warm-cache backfill: if this is the first - // time we've seen this `(bucket, key)`, record the mapping - // so the next read can use the no-hint warm path too. - if let Err(e) = cache.record_key_cid(bucket, key, cid) { - debug!( - error = %e, - "block_cache: record_key_cid failed (best-effort)" - ); + if let Some(c) = &cache { + if let Ok(Some(bytes)) = c.get(cid) { + debug!(cid = %cid, "offline fallback (cid-hint): BLOCKS hit"); + // Best-effort warm-cache backfill: if this is the + // first time we've seen this `(bucket, key)`, record + // the mapping so the next read can use the no-hint + // warm path too. + if let Err(e) = c.record_key_cid(bucket, key, cid) { + debug!( + error = %e, + "block_cache: record_key_cid failed (best-effort)" + ); + } + let observed_at = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + return Ok(OfflineGetResult { + inner: GetObjectResult { + content_length: bytes.len() as u64, + data: bytes, + etag: cid.to_string(), + content_type: None, + last_modified: None, + metadata: HashMap::new(), + }, + source: ReadSource::LocalCache, + freshness: ReadFreshness::Cached { observed_at }, + }); } - let observed_at = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_millis() as u64) - .unwrap_or(0); - return Ok(OfflineGetResult { - inner: GetObjectResult { - content_length: bytes.len() as u64, - data: bytes, - etag: cid.to_string(), - content_type: None, - last_modified: None, - metadata: HashMap::new(), - }, - source: ReadSource::LocalCache, - freshness: ReadFreshness::Cached { observed_at }, - }); } // Gateway race using the supplied CID. fetch_verified verifies @@ -969,22 +1249,23 @@ impl FulaClient { gateway = %gateway_url, "offline fallback (cid-hint): gateway race succeeded" ); - // Populate BLOCKS so subsequent reads skip the network. - if let Err(e) = cache.put(cid, &bytes).await { - debug!( - error = %e, - "block_cache: put failed (best-effort)" - ); - } - // Populate KEY_TO_CID so the standard warm-cache path - // (which doesn't have a hint) also serves this object - // on the next read. Best-effort: a redb error doesn't - // break this fetch. - if let Err(e) = cache.record_key_cid(bucket, key, cid) { - debug!( - error = %e, - "block_cache: record_key_cid failed (best-effort)" - ); + // Populate BLOCKS + KEY_TO_CID when a cache is present + // so subsequent reads skip the network. Best-effort — + // a redb error doesn't break this fetch, and an absent + // cache just means we re-race on the next read. + if let Some(c) = &cache { + if let Err(e) = c.put(cid, &bytes).await { + debug!( + error = %e, + "block_cache: put failed (best-effort)" + ); + } + if let Err(e) = c.record_key_cid(bucket, key, cid) { + debug!( + error = %e, + "block_cache: record_key_cid failed (best-effort)" + ); + } } let observed_at = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) diff --git a/crates/fula-client/src/encryption.rs b/crates/fula-client/src/encryption.rs index 8a88305..7a51bbb 100644 --- a/crates/fula-client/src/encryption.rs +++ b/crates/fula-client/src/encryption.rs @@ -448,6 +448,16 @@ impl BlobBackend for S3BlobBackend { // through to the storage-key path. None }; + // Issue #8 fix #3 cache-warm is handled by the + // helper `FulaClient::warm_block_cache_after_put`, + // which runs inside `inner.put_object` above and + // covers ALL writers (HAMT internal nodes via this + // backend, plus manifest pages / dir-index / root + // commits via the conditional path, plus chunks + // via the pinning path). Putting the warm here + // too would duplicate the redb write — idempotent + // (delta=0 per block_cache::put net-delta logic) + // but wasted I/O. Single source of truth. return Ok(BlobPutResult { cid }); } Err(e) diff --git a/crates/fula-client/src/gateway_fetch.rs b/crates/fula-client/src/gateway_fetch.rs index 6eea4bb..4515ac9 100644 --- a/crates/fula-client/src/gateway_fetch.rs +++ b/crates/fula-client/src/gateway_fetch.rs @@ -80,8 +80,16 @@ pub(crate) const DEFAULT_FETCH_TIMEOUT: Duration = Duration::from_secs(10); /// with the requested CID's `to_string()` form. All six gateways speak /// standard `/ipfs/` URL conventions. pub fn default_gateway_urls() -> Vec { + // Cloudflare's `cloudflare-ipfs.com/ipfs/` gateway was retired — + // DNS no longer resolves (verified 2026-05-10). Keeping it in + // the top-K race burned a slot returning fast errors and starved + // real racers. Drop it. + // + // 5 working gateways in race-priority order (lower index = higher + // base priority). Phase 2.3 races the top-K (default 3) in + // parallel; the dynamic priority system lets a slow gateway drift + // down and recover automatically. vec![ - "https://cloudflare-ipfs.com/ipfs/{cid}".to_string(), "https://dweb.link/ipfs/{cid}".to_string(), "https://ipfs.io/ipfs/{cid}".to_string(), "https://trustless-gateway.link/ipfs/{cid}".to_string(), @@ -612,9 +620,28 @@ mod tests { // ============================================================ #[test] - fn test_default_gateway_urls_list_is_six_entries() { + fn test_default_gateway_urls_list_is_five_entries() { + // Issue #8 fix #1 — Cloudflare's `cloudflare-ipfs.com/ipfs/` + // gateway was retired (DNS no longer resolves, verified + // 2026-05-10). Dropped from the default list; remaining 5 + // racers are all live. let urls = default_gateway_urls(); - assert_eq!(urls.len(), 6); + assert_eq!(urls.len(), 5); + } + + #[test] + fn test_default_gateway_urls_does_not_include_cloudflare() { + // Issue #8 regression guard — the list must never re-include + // `cloudflare-ipfs.com` (DNS dead). Re-adding it would burn + // one of the top-K race slots returning DNS-fail errors. + let urls = default_gateway_urls(); + for url in &urls { + assert!( + !url.contains("cloudflare-ipfs.com"), + "cloudflare-ipfs.com is retired; must not be in default list; found: {}", + url + ); + } } #[test] @@ -634,16 +661,16 @@ mod tests { #[test] fn test_default_gateway_urls_quality_order() { - // Cloudflare is slot 0 (lowest latency, generous rate limits). - // Pinata is the last fallback. Verify the published quality - // order so a reorder is a deliberate change. + // Post-issue-#8 quality order: dweb.link is now slot 0 + // (lowest latency among the surviving gateways). Pinata is + // the last fallback. Verify the published quality order so + // a reorder is a deliberate change. let urls = default_gateway_urls(); - assert!(urls[0].contains("cloudflare-ipfs.com")); - assert!(urls[1].contains("dweb.link")); - assert!(urls[2].contains("ipfs.io")); - assert!(urls[3].contains("trustless-gateway.link")); - assert!(urls[4].contains("4everland.io")); - assert!(urls[5].contains("gateway.pinata.cloud")); + assert!(urls[0].contains("dweb.link")); + assert!(urls[1].contains("ipfs.io")); + assert!(urls[2].contains("trustless-gateway.link")); + assert!(urls[3].contains("4everland.io")); + assert!(urls[4].contains("gateway.pinata.cloud")); } #[test] @@ -738,9 +765,14 @@ mod tests { // ============================================================ #[test] - fn test_default_pool_has_six_gateways() { + fn test_default_pool_has_five_gateways() { + // Issue #8 fix #1 — Cloudflare's gateway was retired, dropping + // the default list from 6 to 5. Race concurrency stays at 3 + // (top-K, default 3) so we still parallel-race the same number + // of slots; we just lost a dead racer that was burning one of + // them. let pool = GatewayPool::default_pool(); - assert_eq!(pool.len(), 6); + assert_eq!(pool.len(), 5); assert_eq!(pool.race_concurrency, 3); } diff --git a/crates/fula-client/src/registry_resolver.rs b/crates/fula-client/src/registry_resolver.rs index c9fa601..d4a3e3b 100644 --- a/crates/fula-client/src/registry_resolver.rs +++ b/crates/fula-client/src/registry_resolver.rs @@ -406,13 +406,18 @@ pub fn decode_user_buckets_index(bytes: &[u8]) -> Result Vec { vec![ - "https://cloudflare-ipfs.com/ipfs/{cid}".into(), "https://dweb.link/ipfs/{cid}".into(), "https://ipfs.io/ipfs/{cid}".into(), "https://trustless-gateway.link/ipfs/{cid}".into(), diff --git a/crates/fula-client/tests/offline_write_warms_blocks_e2e.rs b/crates/fula-client/tests/offline_write_warms_blocks_e2e.rs new file mode 100644 index 0000000..4074d5a --- /dev/null +++ b/crates/fula-client/tests/offline_write_warms_blocks_e2e.rs @@ -0,0 +1,216 @@ +//! Regression test for issue #8 (the load-bearing breaker): writes do not +//! warm the local BLOCKS cache, so the freshly-written bytes are +//! unreadable offline unless an explicit master-up READ later re-fetches +//! the same CID. +//! +//! ## Why this test exists +//! +//! User-reported flow (FxFiles, real device): +//! +//! 1. Online (real S3 endpoint): upload a file → master returns ETag = +//! BLAKE3 raw-codec CID of the encrypted ciphertext. +//! 2. Online: list the bucket → SDK serves from in-memory forest cache +//! (dirty since the upload), NO master fetch, NO BLOCKS populate. +//! 3. Reinit to a bogus endpoint (simulate master-down). +//! 4. Offline: list the bucket → fresh client, empty in-memory state → +//! walks via `try_offline_fallback_with_cid_hint` → `cache.get(cid)` +//! → MISS (writes never populated BLOCKS) → propagates +//! `Master unreachable` error. +//! +//! The pre-fix code path: `S3BlobBackend::put` succeeds with a verified +//! CID but never calls `block_cache.put(&cid, &bytes)`. Subsequent +//! offline reads of that CID find an empty BLOCKS table. +//! +//! The post-fix path: `S3BlobBackend::put` (and equivalent seams in +//! `FulaClient::put_object_*`) call `cache.put(&cid, &bytes)` on every +//! successful PUT when walkable-v8 is enabled, so the just-written +//! bytes are immediately available for offline reads with no +//! dependency on a follow-up master-up read or on public-gateway DHT +//! propagation. +//! +//! ## What this test asserts +//! +//! Two distinct facts, in order: +//! +//! **Phase A — fix-#3 is active at the BlobBackend seam:** after a +//! `S3BlobBackend::put` succeeds against a wiremock master that +//! returns `ETag = BLAKE3(body)` (matching v0.4.4+ master behavior), +//! `block_cache.get(&cid)` returns the just-PUT bytes. Pre-fix this +//! is a MISS; post-fix it is a HIT. +//! +//! **Phase B — fix-#3 makes offline reads succeed:** after Phase A, +//! pointing the same backend at a bogus master URL and calling +//! `S3BlobBackend::get_with_cid_hint(path, Some(&cid))` returns the +//! original bytes WITHOUT any network call to a public IPFS gateway. +//! Pre-fix this returns an error; post-fix it returns the bytes from +//! BLOCKS. +//! +//! The wiremock gateways are configured to 404 every CID — models the +//! real production reality that freshly-pinned CIDs are not yet +//! propagated to public IPFS DHT, so the gateway race can NOT save +//! the offline read. The only thing that can save it is BLOCKS being +//! populated at write time. + +#![cfg(not(target_arch = "wasm32"))] + +use cid::Cid; +use cid::multihash::Multihash; +use fula_client::{Config, FulaClient, S3BlobBackend}; +use fula_crypto::wnfs_hamt::BlobBackend; +use tempfile::TempDir; +use wiremock::matchers::{method, path_regex}; +use wiremock::{Mock, MockServer, Request, Respond, ResponseTemplate}; + +const MULTIHASH_BLAKE3: u64 = 0x1e; +const CODEC_RAW: u64 = 0x55; + +fn blake3_raw_cid(data: &[u8]) -> Cid { + let h = blake3::hash(data); + let mh = Multihash::<64>::wrap(MULTIHASH_BLAKE3, h.as_bytes()) + .expect("blake3 multihash wrap"); + Cid::new_v1(CODEC_RAW, mh) +} + +/// wiremock responder that computes ETag = BLAKE3(body) per PUT. +/// Mirrors v0.4.4+ master behavior — master receives the encrypted +/// ciphertext, hashes it, returns the resulting CID string as the +/// ETag header so the SDK's walkable-v8 self-verify accepts it. +struct EchoCidEtag; + +impl Respond for EchoCidEtag { + fn respond(&self, req: &Request) -> ResponseTemplate { + let cid = blake3_raw_cid(&req.body); + // ETag header convention: master returns the CID string + // (unquoted is fine; the SDK trims quotes). + ResponseTemplate::new(200).insert_header("ETag", cid.to_string().as_str()) + } +} + +fn build_config(master_url: &str, cache_path: &std::path::Path) -> Config { + let mut config = Config::new(master_url); + config.timeout = std::time::Duration::from_secs(10); + config.walkable_v8_writer_enabled = true; + config.health_gate_enabled = true; + config.block_cache_enabled = true; + config.block_cache_path = Some(cache_path.to_path_buf()); + config.block_cache_max_bytes = 256 * 1024 * 1024; + // Gateways point at a URL template that doesn't resolve. + // Models the real production reality: just-pinned CIDs are not + // yet propagated to public IPFS DHT, so the gateway race will + // NOT save the offline read. Only BLOCKS can. + config.gateway_fallback_enabled = true; + config.gateway_fallback_urls = vec![ + // 127.0.0.1:1 always rejects connections — fastest possible + // failure, no DNS or TCP timeout wait. + "http://127.0.0.1:1/ipfs/{cid}".to_string(), + ]; + config.gateway_race_concurrency = 1; + config +} + +/// Issue #8 / fix #3 regression test. +/// +/// Demonstrates that `S3BlobBackend::put` must populate BLOCKS so a +/// subsequent offline read can serve the just-written bytes without +/// requiring either a follow-up master-up read or a public-IPFS-DHT +/// hit on the just-pinned CID. +/// +/// Pre-fix: this test FAILS at the Phase-A assertion (`cache.get(&cid)` +/// returns `Ok(None)` because PUT never wrote to BLOCKS). +/// +/// Post-fix: this test PASSES — both Phase A (cache populated) and +/// Phase B (offline read serves from cache) succeed. +#[tokio::test] +async fn write_warms_blocks_cache_so_offline_read_succeeds() { + // ── Phase A setup: wiremock master with realistic CID-ETag behavior. + let master = MockServer::start().await; + Mock::given(method("PUT")) + .and(path_regex(r"^/test-bucket/.+$")) + .respond_with(EchoCidEtag) + .mount(&master) + .await; + + let tmp = TempDir::new().expect("tempdir"); + let cache_path = tmp.path().join("blocks.redb"); + + let online_client = FulaClient::new(build_config(&master.uri(), &cache_path)) + .expect("build online FulaClient"); + let online_backend = S3BlobBackend::new(online_client.clone(), "test-bucket".to_string()); + + // The "encrypted HAMT node ciphertext" — opaque to this layer. + let body: Vec = b"v7 HAMT node ciphertext for issue-8 regression test".to_vec(); + let path = "__fula_forest_v7_nodes/aabbccddeeff"; + + let put_result = online_backend + .put(path, body.clone()) + .await + .expect("master PUT must succeed against wiremock returning 200 + CID ETag"); + + let cid = put_result + .cid + .expect( + "walkable-v8 self-verify must yield Some(cid) when master returns \ + a parseable CID ETag that matches BLAKE3(body)", + ); + + // Sanity: the CID master returned must match what we'd compute locally. + assert_eq!( + cid, + blake3_raw_cid(&body), + "wiremock master returned a non-BLAKE3(body) CID — test setup wrong" + ); + + // ── Phase A assertion: BLOCKS must have the bytes NOW, with no + // intervening read. THIS IS THE LOAD-BEARING ASSERTION FOR FIX #3. + let cache = online_client + .block_cache() + .expect("block_cache must be configured in this test"); + let cached = cache + .get(&cid) + .expect("redb read must not error") + .expect( + "BLOCKS must contain the just-PUT bytes — if this fails, \ + S3BlobBackend::put (or its FulaClient inner) is not warming \ + the cache after a successful PUT. That is precisely the \ + bug issue #8 reports.", + ); + assert_eq!( + cached.as_ref(), + body.as_slice(), + "cached bytes must byte-match the just-PUT body", + ); + + // ── Phase B setup: a SECOND, fresh client (simulating reinit) with + // a BOGUS master URL but the SAME on-disk cache. + drop(online_backend); + drop(online_client); + + let bogus_master = "http://127.0.0.1:1"; + let offline_client = FulaClient::new(build_config(bogus_master, &cache_path)) + .expect("build offline FulaClient against bogus URL"); + let offline_backend = S3BlobBackend::new(offline_client.clone(), "test-bucket".to_string()); + + // ── Phase B assertion: offline read MUST succeed solely from BLOCKS, + // proving fix #3 closes the user-reported gap. The gateway is + // 127.0.0.1:1 (always-rejects), so a successful return here means + // bytes came from the local cache — exactly what the user needs + // on a device that goes offline after a write. + let offline_bytes = offline_backend + .get_with_cid_hint(path, Some(&cid)) + .await + .expect( + "offline read with cid-hint MUST succeed — bytes are in BLOCKS \ + from Phase A's PUT, and the cid-hint variant of \ + try_offline_fallback short-circuits to a BLOCKS hit before \ + attempting the gateway race. If this fails, fix #3 did not \ + land or the offline-fallback wiring regressed.", + ); + + assert_eq!( + offline_bytes.as_slice(), + body.as_slice(), + "offline bytes must byte-match the originally-PUT body — a \ + mismatch would mean either cache corruption or BLOCKS \ + serving stale data", + ); +} diff --git a/packages/fula_client/ios/fula_client.podspec b/packages/fula_client/ios/fula_client.podspec index 9df9b33..044dd30 100644 --- a/packages/fula_client/ios/fula_client.podspec +++ b/packages/fula_client/ios/fula_client.podspec @@ -6,7 +6,7 @@ Pod::Spec.new do |s| s.name = 'fula_client' - s.version = '0.5.1' + s.version = '0.5.2' s.summary = 'Flutter SDK for Fula decentralized storage' s.description = <<-DESC A Flutter plugin providing client-side encryption, metadata privacy, diff --git a/packages/fula_client/pubspec.yaml b/packages/fula_client/pubspec.yaml index 1bfaa23..dfa5be5 100644 --- a/packages/fula_client/pubspec.yaml +++ b/packages/fula_client/pubspec.yaml @@ -1,6 +1,6 @@ name: fula_client description: Flutter SDK for Fula decentralized storage with client-side encryption, metadata privacy, and secure sharing. -version: 0.5.1 +version: 0.5.2 homepage: https://fx.land repository: https://github.com/functionland/fula-api issue_tracker: https://github.com/functionland/fula-api/issues diff --git a/scripts/watch-images-upload.sh b/scripts/watch-images-upload.sh index 2c55af4..87e3b10 100644 --- a/scripts/watch-images-upload.sh +++ b/scripts/watch-images-upload.sh @@ -1,10 +1,10 @@ #!/usr/bin/env bash # Run on the master, then upload an image from FxFiles. This tails the -# gateway log filtered to lines that pinpoint where Phase 1.2 / v0.5.1 +# gateway log filtered to lines that pinpoint where Phase 1.2 / v0.5.2 # migration is succeeding or failing for the user's `images` bucket: # # * "Populated/updated bucket_lookup_h" — Phase 1.2 ran (good — appears on first-flush AND on key-rotation flushes) -# * "Populated forest_manifest_cid" — v0.5.1 ran (good — should appear on every Phase 2 root commit) +# * "Populated forest_manifest_cid" — v0.5.2 ran (good — should appear on every Phase 2 root commit) # * "populate_bucket_lookup_h failed" # * "populate_forest_manifest_cid failed" # * "Failed to flush bucket"