From 9ed88681e6d4a3cc9522ab0b7b101c5f290a8448 Mon Sep 17 00:00:00 2001 From: Leo Kettmeir Date: Tue, 10 Mar 2026 01:45:22 +0100 Subject: [PATCH] refactor: switch modules bucket to R2 (#1315) --- api/src/analysis.rs | 2 +- api/src/api/package.rs | 6 +++--- api/src/buckets.rs | 4 +++- api/src/main.rs | 16 +++++++++------- api/src/npm/tarball.rs | 2 +- api/src/publish.rs | 17 ++++++++++------- api/src/tarball.rs | 5 +++-- api/src/util.rs | 5 ++--- lb/local.ts | 8 +++++--- lb/main.ts | 5 ++--- lb/types.ts | 2 +- terraform/buckets.tf | 19 +++++++++++++------ terraform/lb.tf | 8 ++++---- 13 files changed, 57 insertions(+), 42 deletions(-) diff --git a/api/src/analysis.rs b/api/src/analysis.rs index 6e645eaaa..b5983aa04 100644 --- a/api/src/analysis.rs +++ b/api/src/analysis.rs @@ -43,7 +43,6 @@ use tracing::Instrument; use tracing::instrument; use url::Url; -use crate::buckets::BucketWithQueue; use crate::db::DependencyKind; use crate::db::ExportsMap; use crate::db::PackageVersionMeta; @@ -57,6 +56,7 @@ use crate::npm::NpmTarball; use crate::npm::NpmTarballFiles; use crate::npm::NpmTarballOptions; use crate::npm::create_npm_tarball; +use crate::s3::BucketWithQueue; use crate::tarball::PublishError; pub struct PackageAnalysisData { diff --git a/api/src/api/package.rs b/api/src/api/package.rs index b527a1e17..f8bdc5202 100644 --- a/api/src/api/package.rs +++ b/api/src/api/package.rs @@ -55,7 +55,6 @@ use crate::analysis::ModuleParser; use crate::auth::GithubOauth2Client; use crate::auth::access_token; use crate::buckets::Buckets; -use crate::buckets::UploadTaskBody; use crate::db::CreatePackageResult; use crate::db::CreatePublishingTaskResult; use crate::db::Database; @@ -81,6 +80,7 @@ use crate::npm::generate_npm_version_manifest; use crate::orama::OramaClient; use crate::provenance; use crate::publish::publish_task; +use crate::s3::UploadTaskBody; use crate::tarball::bucket_tarball_path; use crate::util; use crate::util::LicenseStore; @@ -1981,7 +1981,7 @@ struct DepTreeLoader { scope: ScopeName, package: PackageName, version: crate::ids::Version, - bucket: crate::buckets::BucketWithQueue, + bucket: crate::s3::BucketWithQueue, exports: Arc>>>, } @@ -2188,7 +2188,7 @@ async fn analyze_deps_tree( scope: ScopeName, package: PackageName, version: crate::ids::Version, - bucket: crate::buckets::BucketWithQueue, + bucket: crate::s3::BucketWithQueue, exports: IndexMap, ) -> Result< IndexMap, diff --git a/api/src/buckets.rs b/api/src/buckets.rs index b5fedb7af..21cebafb0 100644 --- a/api/src/buckets.rs +++ b/api/src/buckets.rs @@ -20,6 +20,7 @@ use crate::task_queue::DynamicBackgroundTaskQueue; use crate::task_queue::RestartableTask; use crate::task_queue::RestartableTaskResult; +#[allow(dead_code)] #[derive(Clone)] pub struct BucketWithQueue { pub bucket: gcp::Bucket, @@ -29,6 +30,7 @@ pub struct BucketWithQueue { list_queue: DynamicBackgroundTaskQueue, } +#[allow(dead_code)] impl BucketWithQueue { pub fn new(bucket: gcp::Bucket) -> Self { Self { @@ -116,7 +118,7 @@ impl BucketWithQueue { #[derive(Clone)] pub struct Buckets { pub publishing_bucket: crate::s3::BucketWithQueue, - pub modules_bucket: BucketWithQueue, + pub modules_bucket: crate::s3::BucketWithQueue, pub docs_bucket: crate::s3::BucketWithQueue, pub npm_bucket: crate::s3::BucketWithQueue, } diff --git a/api/src/main.rs b/api/src/main.rs index 082bae955..3aa6178d3 100644 --- a/api/src/main.rs +++ b/api/src/main.rs @@ -34,7 +34,6 @@ use crate::api::ApiError; use crate::api::PublishQueue; use crate::api::api_router; use crate::auth::GithubOauth2Client; -use crate::buckets::BucketWithQueue; use crate::buckets::Buckets; use crate::config::Config; use crate::db::Database; @@ -184,11 +183,14 @@ async fn main() { ) .unwrap(), ); - let modules_bucket = BucketWithQueue::new(gcp::Bucket::new( - gcp_client.clone(), - config.modules_bucket, - config.gcs_endpoint.clone(), - )); + let modules_bucket = s3::BucketWithQueue::new( + s3::Bucket::new( + config.modules_bucket, + s3_region.clone(), + s3_credentials.clone(), + ) + .unwrap(), + ); let docs_bucket = s3::BucketWithQueue::new( s3::Bucket::new( config.docs_bucket, @@ -202,7 +204,7 @@ async fn main() { ); let buckets = Buckets { publishing_bucket, - modules_bucket: modules_bucket.clone(), + modules_bucket, docs_bucket, npm_bucket, }; diff --git a/api/src/npm/tarball.rs b/api/src/npm/tarball.rs index 9a3096a76..2ca1fbc92 100644 --- a/api/src/npm/tarball.rs +++ b/api/src/npm/tarball.rs @@ -25,7 +25,6 @@ use tar::Header; use tracing::error; use url::Url; -use crate::buckets::BucketWithQueue; use crate::db::DependencyKind; use crate::db::ExportsMap; use crate::ids::PackageName; @@ -33,6 +32,7 @@ use crate::ids::PackagePath; use crate::ids::ScopeName; use crate::ids::ScopedPackageName; use crate::ids::Version; +use crate::s3::BucketWithQueue; use super::NPM_TARBALL_REVISION; use super::emit::transpile_to_dts; diff --git a/api/src/publish.rs b/api/src/publish.rs index 435a143b5..770e12198 100644 --- a/api/src/publish.rs +++ b/api/src/publish.rs @@ -6,7 +6,6 @@ use crate::NpmUrl; use crate::RegistryUrl; use crate::api::ApiError; use crate::buckets::Buckets; -use crate::buckets::UploadTaskBody; use crate::db::Database; use crate::db::DependencyKind; use crate::db::ExportsMap; @@ -28,6 +27,7 @@ use crate::metadata::VersionMetadata; use crate::npm::NPM_TARBALL_REVISION; use crate::npm::generate_npm_version_manifest; use crate::orama::OramaClient; +use crate::s3::UploadTaskBody; use crate::tarball::NpmTarballInfo; use crate::tarball::ProcessTarballOutput; use crate::tarball::process_tarball; @@ -694,28 +694,31 @@ pub mod tests { .buckets .modules_bucket .bucket - .download_resp("@scope/foo/1.2.3/jsr.json") + .bucket + .get_object("@scope/foo/1.2.3/jsr.json") .await .unwrap(); - assert_eq!(response.status(), 200); + assert_eq!(response.status_code(), 200); assert_eq!(response.headers()["content-type"], "application/json"); let response = t .buckets .modules_bucket .bucket - .download_resp("@scope/foo/1.2.3/mod.ts") + .bucket + .get_object("@scope/foo/1.2.3/mod.ts") .await .unwrap(); - assert_eq!(response.status(), 200); + assert_eq!(response.status_code(), 200); assert_eq!(response.headers()["content-type"], "text/typescript"); let response = t .buckets .modules_bucket .bucket - .download_resp("@scope/foo/1.2.3/logo.svg") + .bucket + .get_object("@scope/foo/1.2.3/logo.svg") .await .unwrap(); - assert_eq!(response.status(), 200); + assert_eq!(response.status_code(), 200); assert_eq!(response.headers()["content-type"], "image/svg+xml"); } diff --git a/api/src/tarball.rs b/api/src/tarball.rs index d41f0333a..a3d6a4462 100644 --- a/api/src/tarball.rs +++ b/api/src/tarball.rs @@ -31,7 +31,6 @@ use crate::analysis::PackageAnalysisData; use crate::analysis::PackageAnalysisOutput; use crate::analysis::analyze_package; use crate::buckets::Buckets; -use crate::buckets::UploadTaskBody; use crate::db::Database; use crate::db::ExportsMap; use crate::db::PublishingTask; @@ -50,6 +49,7 @@ use crate::ids::ScopedPackageNameValidateError; use crate::ids::Version; use crate::npm::NPM_TARBALL_REVISION; use crate::s3::S3Error; +use crate::s3::UploadTaskBody; use crate::util::LicenseStore; const MAX_FILE_SIZE: u64 = 20 * 1024 * 1024; // 20 MB @@ -491,7 +491,7 @@ pub async fn process_tarball( }, ) .await - .map_err(PublishError::GcsUploadError) + .map_err(PublishError::S3UploadError) } }) .buffer_unordered(MAX_CONCURRENT_UPLOADS); @@ -530,6 +530,7 @@ pub enum PublishError { #[error("missing tarball")] MissingTarball, + #[allow(dead_code)] #[error("gcs upload error: {0}")] GcsUploadError(GcsError), diff --git a/api/src/util.rs b/api/src/util.rs index 4033adc14..40901ad76 100644 --- a/api/src/util.rs +++ b/api/src/util.rs @@ -491,7 +491,6 @@ pub mod test { use crate::ApiError; use crate::MainRouterOptions; use crate::auth::GithubOauth2Client; - use crate::buckets::BucketWithQueue; use crate::buckets::Buckets; use crate::db::Database; use crate::db::EphemeralDatabase; @@ -501,6 +500,7 @@ pub mod test { use crate::errors_internal::ApiErrorStruct; use crate::gcp::FakeGcsTester; use crate::ids::ScopeDescription; + use crate::s3::BucketWithQueue; use crate::s3::FakeS3Tester; use crate::util::LicenseStore; @@ -566,7 +566,6 @@ pub mod test { .fetch_add(1, std::sync::atomic::Ordering::Relaxed); let ephemeral_database = EphemeralDatabase::create().await; let db = ephemeral_database.database.clone().unwrap(); - let gcs = FakeGcsTester::new(); let s3 = FakeS3Tester::new(); let publishing_name = format!("publishing-{test_id}"); let modules_name = format!("modules-{test_id}"); @@ -574,7 +573,7 @@ pub mod test { let npm_name = format!("npm-{test_id}"); let (publishing_bucket, modules_bucket, docs_bucket, npm_bucket) = tokio::join!( s3.create_bucket(&publishing_name), - gcs.create_bucket(&modules_name), + s3.create_bucket(&modules_name), s3.create_bucket(&docs_name), s3.create_bucket(&npm_name), ); diff --git a/lb/local.ts b/lb/local.ts index bfde32016..06e287686 100755 --- a/lb/local.ts +++ b/lb/local.ts @@ -67,10 +67,12 @@ async function createMinioBucket(name: string) { const bucketCreationInterval = setInterval(async () => { let allBucketsCreated = true; - for (const bucket of [MODULES_BUCKET]) { + for (const bucket of []) { allBucketsCreated &&= await createBucket(bucket); } - for (const bucket of [DOCS_BUCKET, PUBLISHING_BUCKET, NPM_BUCKET]) { + for ( + const bucket of [MODULES_BUCKET, DOCS_BUCKET, PUBLISHING_BUCKET, NPM_BUCKET] + ) { allBucketsCreated &&= await createMinioBucket(bucket); } @@ -197,7 +199,7 @@ function handler(req: Request): Promise { REGISTRY_API_URL, REGISTRY_FRONTEND_URL, GCS_ENDPOINT, - MODULES_BUCKET, + MODULES_BUCKET: new R2BucketShim(MODULES_BUCKET), NPM_BUCKET: new R2BucketShim(NPM_BUCKET), ROOT_DOMAIN, API_DOMAIN, diff --git a/lb/main.ts b/lb/main.ts index c005d5b63..423a6ca9c 100644 --- a/lb/main.ts +++ b/lb/main.ts @@ -1,7 +1,7 @@ // Copyright 2024 the JSR authors. All rights reserved. MIT license. import type { WorkerEnv } from "./types.ts"; -import { proxyToCloudRun, proxyToGCS, proxyToR2 } from "./proxy.ts"; +import { proxyToCloudRun, proxyToR2 } from "./proxy.ts"; import { handleCORSPreflight, isCORSPreflight, @@ -236,9 +236,8 @@ async function handleModuleFileRoute( env: WorkerEnv, ): Promise { const url = new URL(request.url); - const response = await proxyToGCS( + const response = await proxyToR2( request, - env.GCS_ENDPOINT, env.MODULES_BUCKET, ); diff --git a/lb/types.ts b/lb/types.ts index d28182a1e..d09a2dc45 100644 --- a/lb/types.ts +++ b/lb/types.ts @@ -18,7 +18,6 @@ export interface WorkerEnv { REGISTRY_FRONTEND_URL: string; GCS_ENDPOINT?: string; - MODULES_BUCKET: string; ROOT_DOMAIN: string; API_DOMAIN: string; @@ -26,4 +25,5 @@ export interface WorkerEnv { DOWNLOADS?: AnalyticsEngineDataset; NPM_BUCKET: PartialBucket; + MODULES_BUCKET: PartialBucket; } diff --git a/terraform/buckets.tf b/terraform/buckets.tf index fa7e33dc3..75d38f155 100644 --- a/terraform/buckets.tf +++ b/terraform/buckets.tf @@ -23,6 +23,12 @@ resource "google_storage_bucket" "publishing" { force_destroy = true } +resource "cloudflare_r2_bucket" "modules" { + account_id = var.cloudflare_account_id + name = "${var.gcp_project}-modules" + location = "enam" +} + resource "cloudflare_r2_bucket" "publishing" { account_id = var.cloudflare_account_id name = "${var.gcp_project}-publishing" @@ -60,6 +66,7 @@ resource "cloudflare_account_token" "buckets_rw" { { id = "2efd5506f9c8494dacb1fa10a3e7d5b6" }, // Workers R2 Storage Bucket Item Write ] resources = jsonencode({ + "com.cloudflare.edge.r2.bucket.${var.cloudflare_account_id}_default_${cloudflare_r2_bucket.modules.name}" = "*", "com.cloudflare.edge.r2.bucket.${var.cloudflare_account_id}_default_${cloudflare_r2_bucket.publishing.name}" = "*", "com.cloudflare.edge.r2.bucket.${var.cloudflare_account_id}_default_${cloudflare_r2_bucket.docs.name}" = "*", "com.cloudflare.edge.r2.bucket.${var.cloudflare_account_id}_default_${cloudflare_r2_bucket.npm.name}" = "*" @@ -77,8 +84,8 @@ resource "google_service_account" "r2_sippy" { description = "Service account for Cloudflare R2 Sippy to read from GCS buckets" } -resource "google_storage_bucket_iam_member" "r2_sippy_npm_reader" { - bucket = google_storage_bucket.npm.name +resource "google_storage_bucket_iam_member" "r2_sippy_modules_reader" { + bucket = google_storage_bucket.modules.name role = "roles/storage.objectViewer" member = "serviceAccount:${google_service_account.r2_sippy.email}" } @@ -87,9 +94,9 @@ resource "google_service_account_key" "r2_sippy" { service_account_id = google_service_account.r2_sippy.name } -resource "cloudflare_r2_bucket_sippy" "r2_npm_sippy" { +resource "cloudflare_r2_bucket_sippy" "r2_modules_sippy" { account_id = var.cloudflare_account_id - bucket_name = cloudflare_r2_bucket.npm.name + bucket_name = cloudflare_r2_bucket.modules.name destination = { access_key_id = cloudflare_account_token.buckets_rw.id cloud_provider = "r2" @@ -98,11 +105,11 @@ resource "cloudflare_r2_bucket_sippy" "r2_npm_sippy" { source = { client_email = google_service_account.r2_sippy.email private_key = jsondecode(base64decode(google_service_account_key.r2_sippy.private_key)).private_key - bucket = google_storage_bucket.npm.name + bucket = google_storage_bucket.modules.name cloud_provider = "gcs" } - depends_on = [google_storage_bucket_iam_member.r2_sippy_npm_reader] + depends_on = [google_storage_bucket_iam_member.r2_sippy_modules_reader] } resource "google_storage_bucket" "docs" { diff --git a/terraform/lb.tf b/terraform/lb.tf index 695927d10..8268fc832 100644 --- a/terraform/lb.tf +++ b/terraform/lb.tf @@ -26,6 +26,10 @@ resource "cloudflare_workers_script" "jsr_lb" { name = "DOWNLOADS" dataset = local.worker_download_analytics_dataset }, { + type = "r2_bucket" + name = "MODULES_BUCKET" + bucket_name = cloudflare_r2_bucket.modules.name + }, { type = "r2_bucket" name = "NPM_BUCKET" bucket_name = cloudflare_r2_bucket.npm.name @@ -49,10 +53,6 @@ resource "cloudflare_workers_script" "jsr_lb" { type = "secret_text" name = "REGISTRY_FRONTEND_URL" text = google_cloud_run_v2_service.registry_frontend["us-central1"].uri - }, { - type = "secret_text" - name = "MODULES_BUCKET" - text = google_storage_bucket.modules.name } ]