From 556a9a93e659718bc0d19c590afa1a96d09ca075 Mon Sep 17 00:00:00 2001 From: Michal Rzeszutko Date: Mon, 22 Dec 2025 16:13:29 +0000 Subject: [PATCH] Blob storage documentation --- docs/docs-network/setup/blob_storage.md | 211 ++++++++++++++++++ docs/docs-network/setup/blob_upload.md | 199 +++++++++++++++++ docs/docs-words.txt | 5 + yarn-project/blob-client/README.md | 2 +- .../blob-client/src/archive/config.ts | 2 +- yarn-project/foundation/src/config/env_var.ts | 4 +- 6 files changed, 418 insertions(+), 5 deletions(-) create mode 100644 docs/docs-network/setup/blob_storage.md create mode 100644 docs/docs-network/setup/blob_upload.md diff --git a/docs/docs-network/setup/blob_storage.md b/docs/docs-network/setup/blob_storage.md new file mode 100644 index 000000000000..7cbda51e5f4b --- /dev/null +++ b/docs/docs-network/setup/blob_storage.md @@ -0,0 +1,211 @@ +--- +id: blob_storage +sidebar_position: 4 +title: Blob retrieval +description: Learn how Aztec nodes retrieve blob data for L1 transactions. +--- + +## Overview + +Aztec uses EIP-4844 blobs to publish transaction data to Ethereum Layer 1. Since blob data is only available on L1 for a limited period (~18 days / 4,096 epochs), nodes need reliable ways to store and retrieve blob data for synchronization and historical access. + +Aztec nodes can be configured to retrieve blobs from L1 consensus (beacon nodes), file stores (S3, GCS, R2), and archive services. + +:::tip Automatic Configuration +When using `--network [NETWORK_NAME]`, blob file stores are automatically configured for you. Most users don't need to manually configure blob storage. +::: + +:::warning Override Behavior +Setting the `BLOB_FILE_STORE_URLS` environment variable overrides the file store configuration from the network config. +::: + +## Understanding blob sources + +The blob client can retrieve blobs from multiple sources, tried in order: + +1. **File Store**: Fast retrieval from configured storage (S3, GCS, R2, local files, HTTPS) +2. **L1 Consensus**: Beacon node API to a (semi-)supernode for recent blobs (within ~18 days) +3. **Archive API**: Services like Blobscan for historical blob data + +For near-tip synchronization, the client will retry file stores with backoff to handle eventual consistency when blobs are still being uploaded by other validators. + +### L1 consensus and blob availability + +If your beacon node has access to [supernodes or semi-supernodes](https://ethereum.org/roadmap/fusaka/peerdas/), L1 consensus alone may be sufficient for retrieving blobs within the ~18 day retention period. With the Fusaka upgrade and [PeerDAS (Peer Data Availability Sampling)](https://eips.ethereum.org/EIPS/eip-7594), Ethereum uses erasure coding to split blobs into 128 columns, enabling robust data availability: + +- **Supernodes** (validators with ≥4,096 ETH staked): Custody all 128 columns and all blob data for the full ~18 day retention period. These nodes form the backbone of the network and continuously heal data gaps. +- **Semi-supernodes** (validators with ≥1,824 ETH / 57 validators): Handle at least 64 columns, enabling reconstruction of complete blob data. +- **Regular nodes**: Only download 1/8th of the data (8 of 128 columns) to verify availability. This is **not sufficient** to serve complete blob data. + +:::warning Supernodes +If L1 consensus is your only blob source, your beacon node must be a supernode or semi-supernode (or connected to one) to retrieve complete blobs. A regular node cannot reconstruct full blob data from its partial columns alone. +::: + +This means that for recent blobs, configuring `L1_CONSENSUS_HOST_URLS` pointing to a well-connected supernode or semi-supernode may be all you need. However, file stores and archive APIs are still recommended for: +- Faster retrieval (file stores are typically faster than L1 consensus queries) +- Historical access (blobs older than ~18 days are pruned from L1) +- Redundancy (multiple sources improve reliability) + +## Configuring blob sources + +### Environment variables + +Configure blob sources using environment variables: + +| Variable | Description | Example | +|----------|-------------|---------| +| `BLOB_FILE_STORE_URLS` | Comma-separated URLs to read blobs from | `gs://bucket/,s3://bucket/` | +| `L1_CONSENSUS_HOST_URLS` | Beacon node URLs (comma-separated) | `https://beacon.example.com` | +| `L1_CONSENSUS_HOST_API_KEYS` | API keys for beacon nodes | `key1,key2` | +| `L1_CONSENSUS_HOST_API_KEY_HEADERS` | Header names for API keys | `Authorization` | +| `BLOB_ARCHIVE_API_URL` | Archive API URL (e.g., Blobscan) | `https://api.blobscan.com` | +| `BLOB_ALLOW_EMPTY_SOURCES` | Allow no blob sources (default: false) | `false` | + +:::tip +If you want to contribute to the network by hosting a blob file store, see the [Blob upload guide](./blob_upload.md). +::: + +### Supported storage backends + +The blob client supports the same storage backends as snapshots: + +- **Google Cloud Storage** - `gs://bucket-name/path/` +- **Amazon S3** - `s3://bucket-name/path/` +- **Cloudflare R2** - `s3://bucket-name/path/?endpoint=https://[ACCOUNT_ID].r2.cloudflarestorage.com` +- **HTTP/HTTPS** (read-only) - `https://host/path` +- **Local filesystem** - `file:///absolute/path` + +### Storage path format + +Blobs are stored using the following path structure: + +``` +{base_url}/aztec-{l1ChainId}-{rollupVersion}-{rollupAddress}/blobs/{versionedBlobHash}.data +``` + +For example: +``` +gs://my-bucket/aztec-1-1-0x1234abcd.../blobs/0x01abc123...def.data +``` + +## Configuration examples + +### Basic file store configuration + +```bash +# Read blobs from GCS +BLOB_FILE_STORE_URLS=gs://my-snapshots/ +``` + +### Multiple read sources with L1 fallback + +```bash +# Try multiple sources in order +BLOB_FILE_STORE_URLS=gs://primary-bucket/,s3://backup-bucket/ + +# L1 consensus fallback +L1_CONSENSUS_HOST_URLS=https://beacon1.example.com,https://beacon2.example.com + +# Archive fallback for historical blobs +BLOB_ARCHIVE_API_URL=https://api.blobscan.com +``` + +### Cloudflare R2 configuration + +```bash +BLOB_FILE_STORE_URLS=s3://my-bucket/?endpoint=https://[ACCOUNT_ID].r2.cloudflarestorage.com +``` + +Replace `[ACCOUNT_ID]` with your Cloudflare account ID. + +### Local filesystem (for testing) + +```bash +BLOB_FILE_STORE_URLS=file:///data/blobs +``` + +## Authentication + +### Google Cloud Storage + +Set up [Application Default Credentials](https://cloud.google.com/docs/authentication/application-default-credentials): + +```bash +gcloud auth application-default login +``` + +Or use a service account key: + +```bash +export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json +``` + +### Amazon S3 / Cloudflare R2 + +Set AWS credentials as environment variables: + +```bash +export AWS_ACCESS_KEY_ID=your-access-key +export AWS_SECRET_ACCESS_KEY=your-secret-key +``` + +For R2, these credentials come from your Cloudflare R2 API tokens. + +## How blob retrieval works + +When a node needs blobs for a block, the blob client follows this retrieval order: + +### During historical sync +1. **File Store** - Quick lookup in configured file stores +2. **L1 Consensus** - Query beacon nodes using slot number +3. **Archive API** - Fall back to Blobscan or similar service + +### During near-tip sync +1. **File Store** - Quick lookup (no retries) +2. **L1 Consensus** - Query beacon nodes +3. **File Store with retries** - Retry with backoff for eventual consistency +4. **Archive API** - Final fallback + +## Troubleshooting + +### No blob sources configured + +**Issue**: Node starts with warning about no blob sources. + +**Solutions**: +- Configure at least one of: `BLOB_FILE_STORE_URLS`, `L1_CONSENSUS_HOST_URLS`, or `BLOB_ARCHIVE_API_URL` +- Set `BLOB_ALLOW_EMPTY_SOURCES=true` only if you understand the implications (node may fail to sync) + +### Blob retrieval fails + +**Issue**: Node cannot retrieve blobs for a block. + +**Solutions**: +- Verify your file store URLs are accessible +- Check L1 consensus host connectivity +- Ensure authentication credentials are configured +- Try using multiple file store URLs for redundancy + +### L1 consensus host errors + +**Issue**: Cannot connect to beacon nodes. + +**Solutions**: +- Verify beacon node URLs are correct and accessible +- Check if API keys are required and correctly configured +- Ensure the beacon node is synced +- Try multiple beacon node URLs for redundancy + +## Best practices + +- **Configure multiple sources**: Use multiple file store URLs and L1 consensus hosts for redundancy +- **Use file stores for production**: File stores provide faster, more reliable blob retrieval than L1 consensus +- **Use archive API for historical access**: Configure `BLOB_ARCHIVE_API_URL` for accessing blobs older than ~18 days. Even with PeerDAS supernodes providing robust data availability, blob data is pruned from L1 after 4,096 epochs. Archive services like [Blobscan](https://blobscan.com/) store historical blob data indefinitely + +## Next Steps + +- Learn how to [host a blob file store](./blob_upload.md) to contribute to the network +- Learn about [using snapshots](./syncing_best_practices.md) for faster node synchronization +- Set up [monitoring](../operation/monitoring.md) to track your node's blob retrieval +- Check the [CLI reference](../reference/cli_reference.md) for additional blob-related options +- Join the [Aztec Discord](https://discord.gg/aztec) for support diff --git a/docs/docs-network/setup/blob_upload.md b/docs/docs-network/setup/blob_upload.md new file mode 100644 index 000000000000..1110637127b8 --- /dev/null +++ b/docs/docs-network/setup/blob_upload.md @@ -0,0 +1,199 @@ +--- +id: blob_upload +sidebar_position: 5 +title: Blob upload +description: Learn how to host a blob file store to contribute to the Aztec network. +--- + +## Overview + +While most nodes only need to retrieve blobs, you can contribute to the network by hosting a blob file store. When configured with an upload URL, your node will automatically upload blobs it retrieves to your file store, making them available for other nodes to download. + +:::note Upload is Optional +Configuring blob upload is optional. You can still download blobs from file stores without uploading them yourself — other network participants (such as sequencers and validators) upload blobs to shared storage, making them available for all nodes to retrieve. +::: + +## Prerequisites + +Before configuring blob upload, you should: + +- Have access to cloud storage (Google Cloud Storage, Amazon S3, or Cloudflare R2) with **write permissions** +- Understand the [blob retrieval](./blob_storage.md) configuration + +## Configuring blob upload + +### Environment variable + +Configure blob upload using the following environment variable in your `.env` file: + +| Variable | Description | Example | +|----------|-------------|---------| +| `BLOB_FILE_STORE_UPLOAD_URL` | URL for uploading blobs | `s3://my-bucket/blobs/` | + +### Supported storage backends + +The blob client supports the following storage backends for upload: + +- **Google Cloud Storage** - `gs://bucket-name/path/` +- **Amazon S3** - `s3://bucket-name/path/` +- **Cloudflare R2** - `s3://bucket-name/path/?endpoint=https://[ACCOUNT_ID].r2.cloudflarestorage.com` +- **Local filesystem** - `file:///absolute/path` + +:::warning +HTTPS URLs are read-only and cannot be used for uploads. +::: + +### Storage path format + +Blobs are stored using the following path structure: + +``` +{base_url}/aztec-{l1ChainId}-{rollupVersion}-{rollupAddress}/blobs/{versionedBlobHash}.data +``` + +For example: +``` +gs://my-bucket/aztec-1-1-0x1234abcd.../blobs/0x01abc123...def.data +``` + +## Healthcheck file + +When blob upload is configured, your node uploads a `.healthcheck` file to the storage path on startup and periodically thereafter. Other nodes use this file to verify connectivity to your file store before attempting to download blobs. + +:::warning Exclude from pruning +If you configure lifecycle rules or pruning policies on your storage bucket, ensure the `.healthcheck` file is excluded. Deleting this file will cause connectivity checks to fail on other nodes. +::: + +## Configuration examples + +### Google Cloud Storage + +```bash +BLOB_FILE_STORE_UPLOAD_URL=gs://my-bucket/blobs/ +``` + +### Amazon S3 + +```bash +BLOB_FILE_STORE_UPLOAD_URL=s3://my-bucket/blobs/ +``` + +### Cloudflare R2 + +```bash +BLOB_FILE_STORE_UPLOAD_URL=s3://my-bucket/blobs/?endpoint=https://[ACCOUNT_ID].r2.cloudflarestorage.com +``` + +Replace `[ACCOUNT_ID]` with your Cloudflare account ID. + +### Local filesystem (for testing) + +```bash +BLOB_FILE_STORE_UPLOAD_URL=file:///data/blobs +``` + +## Authentication + +Upload requires write permissions to your storage bucket. + +### Google Cloud Storage + +Set up [Application Default Credentials](https://cloud.google.com/docs/authentication/application-default-credentials): + +```bash +gcloud auth application-default login +``` + +Or use a service account key with write permissions: + +```bash +export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json +``` + +### Amazon S3 / Cloudflare R2 + +Set AWS credentials as environment variables: + +```bash +export AWS_ACCESS_KEY_ID=your-access-key +export AWS_SECRET_ACCESS_KEY=your-secret-key +``` + +For R2, these credentials come from your Cloudflare R2 API tokens. Ensure the token has write permissions. + +## Exposing a public HTTP endpoint + +While you upload blobs using SDK URLs (`gs://`, `s3://`), you should configure a public HTTP endpoint so other nodes can download blobs without needing cloud credentials. This allows anyone to add your file store as a read source using a simple HTTPS URL. + +### Google Cloud Storage + +GCS buckets can be accessed publicly at `https://storage.googleapis.com/BUCKET_NAME/path/to/object`. + +To enable public access: +1. Go to your bucket in the [Google Cloud Console](https://console.cloud.google.com/storage/browser) +2. Select the **Permissions** tab +3. Click **Grant Access** +4. Add `allUsers` as a principal with the **Storage Object Viewer** role + +See [Making data public](https://cloud.google.com/storage/docs/access-control/making-data-public) for detailed instructions. + +Once configured, other nodes can use: +```bash +BLOB_FILE_STORE_URLS=https://storage.googleapis.com/my-bucket/blobs/ +``` + +### Amazon S3 + +S3 buckets can be accessed publicly via static website hosting at `http://BUCKET_NAME.s3-website.REGION.amazonaws.com`. + +To enable public access: +1. Go to your bucket in the [AWS S3 Console](https://console.aws.amazon.com/s3/) +2. Disable **Block Public Access** settings +3. Add a bucket policy granting public read access +4. Enable **Static website hosting** in the bucket properties + +See [Hosting a static website on S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/WebsiteHosting.html) for detailed instructions. + +:::note +S3 website endpoints only support HTTP. For HTTPS, use [CloudFront](https://docs.aws.amazon.com/AmazonS3/latest/userguide/website-hosting-cloudfront-walkthrough.html) as a CDN in front of your bucket. +::: + +### Cloudflare R2 + +R2 buckets can expose a public HTTP endpoint via a custom domain or the managed `r2.dev` subdomain. + +To enable public access: +1. Go to your bucket in the [Cloudflare Dashboard](https://dash.cloudflare.com/) +2. Select **Settings** > **Public Access** +3. Either enable the `r2.dev` subdomain or connect a custom domain + +See [Public buckets](https://developers.cloudflare.com/r2/buckets/public-buckets/) for detailed instructions. + +Once configured, other nodes can use: +```bash +BLOB_FILE_STORE_URLS=https://pub-[ID].r2.dev/ +# or with custom domain: +BLOB_FILE_STORE_URLS=https://blobs.yourdomain.com/ +``` + +:::tip +R2 offers free egress, making it cost-effective for public blob distribution. +::: + +## Troubleshooting + +### Upload fails + +**Issue**: Blobs are not being uploaded to file store. + +**Solutions**: +- Verify `BLOB_FILE_STORE_UPLOAD_URL` is set +- Check write permissions on the storage bucket +- Ensure credentials are configured (AWS/GCP) +- Note: HTTPS URLs are read-only and cannot be used for uploads + +## Next Steps + +- Learn about [blob retrieval](./blob_storage.md) configuration +- Learn about [using snapshots](./syncing_best_practices.md) for faster node synchronization +- Join the [Aztec Discord](https://discord.gg/aztec) for support diff --git a/docs/docs-words.txt b/docs/docs-words.txt index df9c918ecc13..dd375730eecc 100644 --- a/docs/docs-words.txt +++ b/docs/docs-words.txt @@ -371,3 +371,8 @@ notegetteroptions lookback noirfmt postprocesses +blobscan +fusaka +healthcheck +supernode +supernodes diff --git a/yarn-project/blob-client/README.md b/yarn-project/blob-client/README.md index 84ee2cbf7988..9ced547f797c 100644 --- a/yarn-project/blob-client/README.md +++ b/yarn-project/blob-client/README.md @@ -31,7 +31,7 @@ URL for uploading blobs to a file store. **L1 Consensus Host URLs** (`L1_CONSENSUS_HOST_URLS`): Beacon node URLs for fetching recent blobs directly from L1. -**Archive API URL** (`BLOB_SINK_ARCHIVE_API_URL`): +**Archive API URL** (`BLOB_ARCHIVE_API_URL`): Blobscan or similar archive API for historical blob data. ### File Store Connectivity Testing diff --git a/yarn-project/blob-client/src/archive/config.ts b/yarn-project/blob-client/src/archive/config.ts index 49b38fd4e46e..62ba1c0aa1ba 100644 --- a/yarn-project/blob-client/src/archive/config.ts +++ b/yarn-project/blob-client/src/archive/config.ts @@ -7,7 +7,7 @@ export type BlobArchiveApiConfig = { export const blobArchiveApiConfigMappings: ConfigMappingsType = { archiveApiUrl: { - env: 'BLOB_SINK_ARCHIVE_API_URL', + env: 'BLOB_ARCHIVE_API_URL', description: 'The URL of the archive API', }, ...pickConfigMappings(l1ReaderConfigMappings, ['l1ChainId']), diff --git a/yarn-project/foundation/src/config/env_var.ts b/yarn-project/foundation/src/config/env_var.ts index 9f113c183dab..ca7b280df6fc 100644 --- a/yarn-project/foundation/src/config/env_var.ts +++ b/yarn-project/foundation/src/config/env_var.ts @@ -21,9 +21,7 @@ export type EnvVar = | 'BB_NUM_IVC_VERIFIERS' | 'BB_IVC_CONCURRENCY' | 'BOOTSTRAP_NODES' - | 'BLOB_SINK_ARCHIVE_API_URL' - | 'BLOB_SINK_PORT' - | 'BLOB_SINK_URL' + | 'BLOB_ARCHIVE_API_URL' | 'BLOB_FILE_STORE_URLS' | 'BLOB_FILE_STORE_UPLOAD_URL' | 'BLOB_HEALTHCHECK_UPLOAD_INTERVAL_MINUTES'