From 556a9a93e659718bc0d19c590afa1a96d09ca075 Mon Sep 17 00:00:00 2001
From: Michal Rzeszutko <michal.rzeszutko@gmail.com>
Date: Mon, 22 Dec 2025 16:13:29 +0000
Subject: [PATCH] Blob storage documentation

---
 docs/docs-network/setup/blob_storage.md       | 211 ++++++++++++++++++
 docs/docs-network/setup/blob_upload.md        | 199 +++++++++++++++++
 docs/docs-words.txt                           |   5 +
 yarn-project/blob-client/README.md            |   2 +-
 .../blob-client/src/archive/config.ts         |   2 +-
 yarn-project/foundation/src/config/env_var.ts |   4 +-
 6 files changed, 418 insertions(+), 5 deletions(-)
 create mode 100644 docs/docs-network/setup/blob_storage.md
 create mode 100644 docs/docs-network/setup/blob_upload.md

diff --git a/docs/docs-network/setup/blob_storage.md b/docs/docs-network/setup/blob_storage.md
new file mode 100644
index 000000000000..7cbda51e5f4b
--- /dev/null
+++ b/docs/docs-network/setup/blob_storage.md
@@ -0,0 +1,211 @@
+---
+id: blob_storage
+sidebar_position: 4
+title: Blob retrieval
+description: Learn how Aztec nodes retrieve blob data for L1 transactions.
+---
+
+## Overview
+
+Aztec uses EIP-4844 blobs to publish transaction data to Ethereum Layer 1. Since blob data is only available on L1 for a limited period (~18 days / 4,096 epochs), nodes need reliable ways to store and retrieve blob data for synchronization and historical access.
+
+Aztec nodes can be configured to retrieve blobs from L1 consensus (beacon nodes), file stores (S3, GCS, R2), and archive services.
+
+:::tip Automatic Configuration
+When using `--network [NETWORK_NAME]`, blob file stores are automatically configured for you. Most users don't need to manually configure blob storage.
+:::
+
+:::warning Override Behavior
+Setting the `BLOB_FILE_STORE_URLS` environment variable overrides the file store configuration from the network config.
+:::
+
+## Understanding blob sources
+
+The blob client can retrieve blobs from multiple sources, tried in order:
+
+1. **File Store**: Fast retrieval from configured storage (S3, GCS, R2, local files, HTTPS)
+2. **L1 Consensus**: Beacon node API to a (semi-)supernode for recent blobs (within ~18 days)
+3. **Archive API**: Services like Blobscan for historical blob data
+
+For near-tip synchronization, the client will retry file stores with backoff to handle eventual consistency when blobs are still being uploaded by other validators.
+
+### L1 consensus and blob availability
+
+If your beacon node has access to [supernodes or semi-supernodes](https://ethereum.org/roadmap/fusaka/peerdas/), L1 consensus alone may be sufficient for retrieving blobs within the ~18 day retention period. With the Fusaka upgrade and [PeerDAS (Peer Data Availability Sampling)](https://eips.ethereum.org/EIPS/eip-7594), Ethereum uses erasure coding to split blobs into 128 columns, enabling robust data availability:
+
+- **Supernodes** (validators with ≥4,096 ETH staked): Custody all 128 columns and all blob data for the full ~18 day retention period. These nodes form the backbone of the network and continuously heal data gaps.
+- **Semi-supernodes** (validators with ≥1,824 ETH / 57 validators): Handle at least 64 columns, enabling reconstruction of complete blob data.
+- **Regular nodes**: Only download 1/8th of the data (8 of 128 columns) to verify availability. This is **not sufficient** to serve complete blob data.
+
+:::warning Supernodes
+If L1 consensus is your only blob source, your beacon node must be a supernode or semi-supernode (or connected to one) to retrieve complete blobs. A regular node cannot reconstruct full blob data from its partial columns alone.
+:::
+
+This means that for recent blobs, configuring `L1_CONSENSUS_HOST_URLS` pointing to a well-connected supernode or semi-supernode may be all you need. However, file stores and archive APIs are still recommended for:
+- Faster retrieval (file stores are typically faster than L1 consensus queries)
+- Historical access (blobs older than ~18 days are pruned from L1)
+- Redundancy (multiple sources improve reliability)
+
+## Configuring blob sources
+
+### Environment variables
+
+Configure blob sources using environment variables:
+
+| Variable | Description | Example |
+|----------|-------------|---------|
+| `BLOB_FILE_STORE_URLS` | Comma-separated URLs to read blobs from | `gs://bucket/,s3://bucket/` |
+| `L1_CONSENSUS_HOST_URLS` | Beacon node URLs (comma-separated) | `https://beacon.example.com` |
+| `L1_CONSENSUS_HOST_API_KEYS` | API keys for beacon nodes | `key1,key2` |
+| `L1_CONSENSUS_HOST_API_KEY_HEADERS` | Header names for API keys | `Authorization` |
+| `BLOB_ARCHIVE_API_URL` | Archive API URL (e.g., Blobscan) | `https://api.blobscan.com` |
+| `BLOB_ALLOW_EMPTY_SOURCES` | Allow no blob sources (default: false) | `false` |
+
+:::tip
+If you want to contribute to the network by hosting a blob file store, see the [Blob upload guide](./blob_upload.md).
+:::
+
+### Supported storage backends
+
+The blob client supports the same storage backends as snapshots:
+
+- **Google Cloud Storage** - `gs://bucket-name/path/`
+- **Amazon S3** - `s3://bucket-name/path/`
+- **Cloudflare R2** - `s3://bucket-name/path/?endpoint=https://[ACCOUNT_ID].r2.cloudflarestorage.com`
+- **HTTP/HTTPS** (read-only) - `https://host/path`
+- **Local filesystem** - `file:///absolute/path`
+
+### Storage path format
+
+Blobs are stored using the following path structure:
+
+```
+{base_url}/aztec-{l1ChainId}-{rollupVersion}-{rollupAddress}/blobs/{versionedBlobHash}.data
+```
+
+For example:
+```
+gs://my-bucket/aztec-1-1-0x1234abcd.../blobs/0x01abc123...def.data
+```
+
+## Configuration examples
+
+### Basic file store configuration
+
+```bash
+# Read blobs from GCS
+BLOB_FILE_STORE_URLS=gs://my-snapshots/
+```
+
+### Multiple read sources with L1 fallback
+
+```bash
+# Try multiple sources in order
+BLOB_FILE_STORE_URLS=gs://primary-bucket/,s3://backup-bucket/
+
+# L1 consensus fallback
+L1_CONSENSUS_HOST_URLS=https://beacon1.example.com,https://beacon2.example.com
+
+# Archive fallback for historical blobs
+BLOB_ARCHIVE_API_URL=https://api.blobscan.com
+```
+
+### Cloudflare R2 configuration
+
+```bash
+BLOB_FILE_STORE_URLS=s3://my-bucket/?endpoint=https://[ACCOUNT_ID].r2.cloudflarestorage.com
+```
+
+Replace `[ACCOUNT_ID]` with your Cloudflare account ID.
+
+### Local filesystem (for testing)
+
+```bash
+BLOB_FILE_STORE_URLS=file:///data/blobs
+```
+
+## Authentication
+
+### Google Cloud Storage
+
+Set up [Application Default Credentials](https://cloud.google.com/docs/authentication/application-default-credentials):
+
+```bash
+gcloud auth application-default login
+```
+
+Or use a service account key:
+
+```bash
+export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json
+```
+
+### Amazon S3 / Cloudflare R2
+
+Set AWS credentials as environment variables:
+
+```bash
+export AWS_ACCESS_KEY_ID=your-access-key
+export AWS_SECRET_ACCESS_KEY=your-secret-key
+```
+
+For R2, these credentials come from your Cloudflare R2 API tokens.
+
+## How blob retrieval works
+
+When a node needs blobs for a block, the blob client follows this retrieval order:
+
+### During historical sync
+1. **File Store** - Quick lookup in configured file stores
+2. **L1 Consensus** - Query beacon nodes using slot number
+3. **Archive API** - Fall back to Blobscan or similar service
+
+### During near-tip sync
+1. **File Store** - Quick lookup (no retries)
+2. **L1 Consensus** - Query beacon nodes
+3. **File Store with retries** - Retry with backoff for eventual consistency
+4. **Archive API** - Final fallback
+
+## Troubleshooting
+
+### No blob sources configured
+
+**Issue**: Node starts with warning about no blob sources.
+
+**Solutions**:
+- Configure at least one of: `BLOB_FILE_STORE_URLS`, `L1_CONSENSUS_HOST_URLS`, or `BLOB_ARCHIVE_API_URL`
+- Set `BLOB_ALLOW_EMPTY_SOURCES=true` only if you understand the implications (node may fail to sync)
+
+### Blob retrieval fails
+
+**Issue**: Node cannot retrieve blobs for a block.
+
+**Solutions**:
+- Verify your file store URLs are accessible
+- Check L1 consensus host connectivity
+- Ensure authentication credentials are configured
+- Try using multiple file store URLs for redundancy
+
+### L1 consensus host errors
+
+**Issue**: Cannot connect to beacon nodes.
+
+**Solutions**:
+- Verify beacon node URLs are correct and accessible
+- Check if API keys are required and correctly configured
+- Ensure the beacon node is synced
+- Try multiple beacon node URLs for redundancy
+
+## Best practices
+
+- **Configure multiple sources**: Use multiple file store URLs and L1 consensus hosts for redundancy
+- **Use file stores for production**: File stores provide faster, more reliable blob retrieval than L1 consensus
+- **Use archive API for historical access**: Configure `BLOB_ARCHIVE_API_URL` for accessing blobs older than ~18 days. Even with PeerDAS supernodes providing robust data availability, blob data is pruned from L1 after 4,096 epochs. Archive services like [Blobscan](https://blobscan.com/) store historical blob data indefinitely
+
+## Next Steps
+
+- Learn how to [host a blob file store](./blob_upload.md) to contribute to the network
+- Learn about [using snapshots](./syncing_best_practices.md) for faster node synchronization
+- Set up [monitoring](../operation/monitoring.md) to track your node's blob retrieval
+- Check the [CLI reference](../reference/cli_reference.md) for additional blob-related options
+- Join the [Aztec Discord](https://discord.gg/aztec) for support
diff --git a/docs/docs-network/setup/blob_upload.md b/docs/docs-network/setup/blob_upload.md
new file mode 100644
index 000000000000..1110637127b8
--- /dev/null
+++ b/docs/docs-network/setup/blob_upload.md
@@ -0,0 +1,199 @@
+---
+id: blob_upload
+sidebar_position: 5
+title: Blob upload
+description: Learn how to host a blob file store to contribute to the Aztec network.
+---
+
+## Overview
+
+While most nodes only need to retrieve blobs, you can contribute to the network by hosting a blob file store. When configured with an upload URL, your node will automatically upload blobs it retrieves to your file store, making them available for other nodes to download.
+
+:::note Upload is Optional
+Configuring blob upload is optional. You can still download blobs from file stores without uploading them yourself — other network participants (such as sequencers and validators) upload blobs to shared storage, making them available for all nodes to retrieve.
+:::
+
+## Prerequisites
+
+Before configuring blob upload, you should:
+
+- Have access to cloud storage (Google Cloud Storage, Amazon S3, or Cloudflare R2) with **write permissions**
+- Understand the [blob retrieval](./blob_storage.md) configuration
+
+## Configuring blob upload
+
+### Environment variable
+
+Configure blob upload using the following environment variable in your `.env` file:
+
+| Variable | Description | Example |
+|----------|-------------|---------|
+| `BLOB_FILE_STORE_UPLOAD_URL` | URL for uploading blobs | `s3://my-bucket/blobs/` |
+
+### Supported storage backends
+
+The blob client supports the following storage backends for upload:
+
+- **Google Cloud Storage** - `gs://bucket-name/path/`
+- **Amazon S3** - `s3://bucket-name/path/`
+- **Cloudflare R2** - `s3://bucket-name/path/?endpoint=https://[ACCOUNT_ID].r2.cloudflarestorage.com`
+- **Local filesystem** - `file:///absolute/path`
+
+:::warning
+HTTPS URLs are read-only and cannot be used for uploads.
+:::
+
+### Storage path format
+
+Blobs are stored using the following path structure:
+
+```
+{base_url}/aztec-{l1ChainId}-{rollupVersion}-{rollupAddress}/blobs/{versionedBlobHash}.data
+```
+
+For example:
+```
+gs://my-bucket/aztec-1-1-0x1234abcd.../blobs/0x01abc123...def.data
+```
+
+## Healthcheck file
+
+When blob upload is configured, your node uploads a `.healthcheck` file to the storage path on startup and periodically thereafter. Other nodes use this file to verify connectivity to your file store before attempting to download blobs.
+
+:::warning Exclude from pruning
+If you configure lifecycle rules or pruning policies on your storage bucket, ensure the `.healthcheck` file is excluded. Deleting this file will cause connectivity checks to fail on other nodes.
+:::
+
+## Configuration examples
+
+### Google Cloud Storage
+
+```bash
+BLOB_FILE_STORE_UPLOAD_URL=gs://my-bucket/blobs/
+```
+
+### Amazon S3
+
+```bash
+BLOB_FILE_STORE_UPLOAD_URL=s3://my-bucket/blobs/
+```
+
+### Cloudflare R2
+
+```bash
+BLOB_FILE_STORE_UPLOAD_URL=s3://my-bucket/blobs/?endpoint=https://[ACCOUNT_ID].r2.cloudflarestorage.com
+```
+
+Replace `[ACCOUNT_ID]` with your Cloudflare account ID.
+
+### Local filesystem (for testing)
+
+```bash
+BLOB_FILE_STORE_UPLOAD_URL=file:///data/blobs
+```
+
+## Authentication
+
+Upload requires write permissions to your storage bucket.
+
+### Google Cloud Storage
+
+Set up [Application Default Credentials](https://cloud.google.com/docs/authentication/application-default-credentials):
+
+```bash
+gcloud auth application-default login
+```
+
+Or use a service account key with write permissions:
+
+```bash
+export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json
+```
+
+### Amazon S3 / Cloudflare R2
+
+Set AWS credentials as environment variables:
+
+```bash
+export AWS_ACCESS_KEY_ID=your-access-key
+export AWS_SECRET_ACCESS_KEY=your-secret-key
+```
+
+For R2, these credentials come from your Cloudflare R2 API tokens. Ensure the token has write permissions.
+
+## Exposing a public HTTP endpoint
+
+While you upload blobs using SDK URLs (`gs://`, `s3://`), you should configure a public HTTP endpoint so other nodes can download blobs without needing cloud credentials. This allows anyone to add your file store as a read source using a simple HTTPS URL.
+
+### Google Cloud Storage
+
+GCS buckets can be accessed publicly at `https://storage.googleapis.com/BUCKET_NAME/path/to/object`.
+
+To enable public access:
+1. Go to your bucket in the [Google Cloud Console](https://console.cloud.google.com/storage/browser)
+2. Select the **Permissions** tab
+3. Click **Grant Access**
+4. Add `allUsers` as a principal with the **Storage Object Viewer** role
+
+See [Making data public](https://cloud.google.com/storage/docs/access-control/making-data-public) for detailed instructions.
+
+Once configured, other nodes can use:
+```bash
+BLOB_FILE_STORE_URLS=https://storage.googleapis.com/my-bucket/blobs/
+```
+
+### Amazon S3
+
+S3 buckets can be accessed publicly via static website hosting at `http://BUCKET_NAME.s3-website.REGION.amazonaws.com`.
+
+To enable public access:
+1. Go to your bucket in the [AWS S3 Console](https://console.aws.amazon.com/s3/)
+2. Disable **Block Public Access** settings
+3. Add a bucket policy granting public read access
+4. Enable **Static website hosting** in the bucket properties
+
+See [Hosting a static website on S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/WebsiteHosting.html) for detailed instructions.
+
+:::note
+S3 website endpoints only support HTTP. For HTTPS, use [CloudFront](https://docs.aws.amazon.com/AmazonS3/latest/userguide/website-hosting-cloudfront-walkthrough.html) as a CDN in front of your bucket.
+:::
+
+### Cloudflare R2
+
+R2 buckets can expose a public HTTP endpoint via a custom domain or the managed `r2.dev` subdomain.
+
+To enable public access:
+1. Go to your bucket in the [Cloudflare Dashboard](https://dash.cloudflare.com/)
+2. Select **Settings** > **Public Access**
+3. Either enable the `r2.dev` subdomain or connect a custom domain
+
+See [Public buckets](https://developers.cloudflare.com/r2/buckets/public-buckets/) for detailed instructions.
+
+Once configured, other nodes can use:
+```bash
+BLOB_FILE_STORE_URLS=https://pub-[ID].r2.dev/
+# or with custom domain:
+BLOB_FILE_STORE_URLS=https://blobs.yourdomain.com/
+```
+
+:::tip
+R2 offers free egress, making it cost-effective for public blob distribution.
+:::
+
+## Troubleshooting
+
+### Upload fails
+
+**Issue**: Blobs are not being uploaded to file store.
+
+**Solutions**:
+- Verify `BLOB_FILE_STORE_UPLOAD_URL` is set
+- Check write permissions on the storage bucket
+- Ensure credentials are configured (AWS/GCP)
+- Note: HTTPS URLs are read-only and cannot be used for uploads
+
+## Next Steps
+
+- Learn about [blob retrieval](./blob_storage.md) configuration
+- Learn about [using snapshots](./syncing_best_practices.md) for faster node synchronization
+- Join the [Aztec Discord](https://discord.gg/aztec) for support
diff --git a/docs/docs-words.txt b/docs/docs-words.txt
index df9c918ecc13..dd375730eecc 100644
--- a/docs/docs-words.txt
+++ b/docs/docs-words.txt
@@ -371,3 +371,8 @@ notegetteroptions
 lookback
 noirfmt
 postprocesses
+blobscan
+fusaka
+healthcheck
+supernode
+supernodes
diff --git a/yarn-project/blob-client/README.md b/yarn-project/blob-client/README.md
index 84ee2cbf7988..9ced547f797c 100644
--- a/yarn-project/blob-client/README.md
+++ b/yarn-project/blob-client/README.md
@@ -31,7 +31,7 @@ URL for uploading blobs to a file store.
 **L1 Consensus Host URLs** (`L1_CONSENSUS_HOST_URLS`):
 Beacon node URLs for fetching recent blobs directly from L1.
 
-**Archive API URL** (`BLOB_SINK_ARCHIVE_API_URL`):
+**Archive API URL** (`BLOB_ARCHIVE_API_URL`):
 Blobscan or similar archive API for historical blob data.
 
 ### File Store Connectivity Testing
diff --git a/yarn-project/blob-client/src/archive/config.ts b/yarn-project/blob-client/src/archive/config.ts
index 49b38fd4e46e..62ba1c0aa1ba 100644
--- a/yarn-project/blob-client/src/archive/config.ts
+++ b/yarn-project/blob-client/src/archive/config.ts
@@ -7,7 +7,7 @@ export type BlobArchiveApiConfig = {
 
 export const blobArchiveApiConfigMappings: ConfigMappingsType<BlobArchiveApiConfig> = {
   archiveApiUrl: {
-    env: 'BLOB_SINK_ARCHIVE_API_URL',
+    env: 'BLOB_ARCHIVE_API_URL',
     description: 'The URL of the archive API',
   },
   ...pickConfigMappings(l1ReaderConfigMappings, ['l1ChainId']),
diff --git a/yarn-project/foundation/src/config/env_var.ts b/yarn-project/foundation/src/config/env_var.ts
index 9f113c183dab..ca7b280df6fc 100644
--- a/yarn-project/foundation/src/config/env_var.ts
+++ b/yarn-project/foundation/src/config/env_var.ts
@@ -21,9 +21,7 @@ export type EnvVar =
   | 'BB_NUM_IVC_VERIFIERS'
   | 'BB_IVC_CONCURRENCY'
   | 'BOOTSTRAP_NODES'
-  | 'BLOB_SINK_ARCHIVE_API_URL'
-  | 'BLOB_SINK_PORT'
-  | 'BLOB_SINK_URL'
+  | 'BLOB_ARCHIVE_API_URL'
   | 'BLOB_FILE_STORE_URLS'
   | 'BLOB_FILE_STORE_UPLOAD_URL'
   | 'BLOB_HEALTHCHECK_UPLOAD_INTERVAL_MINUTES'