Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/mirror-sync.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ jobs:
run: |
./scripts/mirror-binaries/mirror-binaries \
--runtime=${{ matrix.runtime }} \
--manifest-dir=src/internal/manifest/data \
--r2-endpoint="$R2_ENDPOINT" \
--r2-bucket="$R2_BUCKET" \
--r2-access-key="$R2_ACCESS_KEY" \
Expand Down
88 changes: 19 additions & 69 deletions scripts/mirror-binaries/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,6 @@ import (
"github.com/aws/aws-sdk-go-v2/service/s3"
)

// Manifest represents the structure of a runtime manifest
type Manifest struct {
Versions map[string]map[string]*Download `json:"versions"`
}

// Download represents a single download entry
type Download struct {
URL string `json:"url"`
SHA256 string `json:"sha256,omitempty"`
}

// BinaryMeta represents metadata stored alongside each binary
type BinaryMeta struct {
SHA256 string `json:"sha256"`
Expand All @@ -56,26 +45,25 @@ type MirrorJob struct {

// Stats tracks mirroring statistics
type Stats struct {
Total int64
Skipped int64
Mirrored int64
Failed int64
BytesDown int64
UpstreamChecksum int64
Total int64
Skipped int64
Mirrored int64
Failed int64
BytesDown int64
UpstreamChecksum int64
GeneratedChecksum int64
}

var (
runtimeFlag = flag.String("runtime", "", "Runtime to mirror (node, python, ruby, or all)")
dryRun = flag.Bool("dry-run", false, "Report what would be done without doing it")
syncOnly = flag.Bool("sync-only", false, "Only mirror files not already in R2")
manifestDir = flag.String("manifest-dir", "src/internal/manifest/data", "Directory containing manifest files")
r2Endpoint = flag.String("r2-endpoint", "", "R2 endpoint URL")
r2Bucket = flag.String("r2-bucket", "", "R2 bucket name")
r2AccessKey = flag.String("r2-access-key", "", "R2 access key ID")
r2SecretKey = flag.String("r2-secret-key", "", "R2 secret access key")
workers = flag.Int("workers", 10, "Number of parallel workers")
retries = flag.Int("retries", 3, "Number of retries for failed downloads")
runtimeFlag = flag.String("runtime", "", "Runtime to mirror (node, python, ruby, or all)")
dryRun = flag.Bool("dry-run", false, "Report what would be done without doing it")
syncOnly = flag.Bool("sync-only", false, "Only mirror files not already in R2")
r2Endpoint = flag.String("r2-endpoint", "", "R2 endpoint URL")
r2Bucket = flag.String("r2-bucket", "", "R2 bucket name")
r2AccessKey = flag.String("r2-access-key", "", "R2 access key ID")
r2SecretKey = flag.String("r2-secret-key", "", "R2 secret access key")
workers = flag.Int("workers", 10, "Number of parallel workers")
retries = flag.Int("retries", 3, "Number of retries for failed downloads")
)

func main() {
Expand Down Expand Up @@ -121,13 +109,13 @@ func main() {
}
}

// Collect all jobs
// Collect all jobs from upstream sources
var jobs []MirrorJob
for _, rt := range runtimes {
manifestPath := filepath.Join(*manifestDir, rt+".json")
rtJobs, err := loadJobs(rt, manifestPath)
fmt.Printf("Discovering %s versions from upstream...\n", rt)
rtJobs, err := fetchJobsFromUpstream(rt)
if err != nil {
fmt.Fprintf(os.Stderr, "Error loading manifest for %s: %v\n", rt, err)
fmt.Fprintf(os.Stderr, "Error fetching upstream versions for %s: %v\n", rt, err)
os.Exit(1)
}
jobs = append(jobs, rtJobs...)
Expand Down Expand Up @@ -230,44 +218,6 @@ func listExistingKeys(client *s3.Client) (map[string]bool, error) {
return keys, nil
}

func loadJobs(runtime, manifestPath string) ([]MirrorJob, error) {
data, err := os.ReadFile(manifestPath)
if err != nil {
return nil, err
}

var manifest Manifest
if err := json.Unmarshal(data, &manifest); err != nil {
return nil, err
}

var jobs []MirrorJob
for version, platforms := range manifest.Versions {
for platform, dl := range platforms {
if dl == nil || dl.URL == "" {
continue
}

// Determine file extension from URL
ext := getExtension(dl.URL)
r2Key := fmt.Sprintf("%s/%s/%s%s", runtime, version, platform, ext)
metaKey := fmt.Sprintf("%s/%s/%s.meta.json", runtime, version, platform)

jobs = append(jobs, MirrorJob{
Runtime: runtime,
Version: version,
Platform: platform,
URL: dl.URL,
UpstreamSHA256: dl.SHA256,
R2Key: r2Key,
MetaKey: metaKey,
})
}
}

return jobs, nil
}

func getExtension(url string) string {
// Handle common archive extensions
if strings.HasSuffix(url, ".tar.gz") {
Expand Down
181 changes: 181 additions & 0 deletions scripts/mirror-binaries/node.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
package main

import (
"encoding/json"
"fmt"
"io"
"strings"
)

const nodeIndexURL = "https://nodejs.org/dist/index.json"

// NodeOfficialSource fetches Node.js versions from nodejs.org
type NodeOfficialSource struct{}

func (s *NodeOfficialSource) Name() string {
return "nodejs.org"
}

// nodeIndexEntry represents an entry in nodejs.org/dist/index.json
type nodeIndexEntry struct {
Version string `json:"version"`
Date string `json:"date"`
Files []string `json:"files"`
LTS any `json:"lts"` // Can be string or false
Shasums string `json:"shasums,omitempty"`
}

// nodeShasums maps filename to SHA256 checksum
type nodeShasums map[string]string

func (s *NodeOfficialSource) FetchVersions() ([]MirrorJob, error) {
// Fetch index.json
resp, err := httpClient.Get(nodeIndexURL)
if err != nil {
return nil, fmt.Errorf("fetching index: %w", err)
}
defer resp.Body.Close()

if resp.StatusCode != 200 {
return nil, fmt.Errorf("fetching index: HTTP %d", resp.StatusCode)
}

var entries []nodeIndexEntry
if err := json.NewDecoder(resp.Body).Decode(&entries); err != nil {
return nil, fmt.Errorf("parsing index: %w", err)
}

var jobs []MirrorJob

for _, entry := range entries {
version := strings.TrimPrefix(entry.Version, "v")

// Fetch checksums for this version
shasums, err := s.fetchShasums(entry.Version)
if err != nil {
// Older versions may not have SHASUMS256, continue without checksums
shasums = nil
}

// Map Node.js file types to our platform naming
for _, file := range entry.Files {
platform, ext := s.mapFileToPlatform(file)
if platform == "" {
continue // Skip unsupported file types
}

archiveName := s.getArchiveName(entry.Version, file)
url := fmt.Sprintf("https://nodejs.org/dist/%s/%s", entry.Version, archiveName)

var sha256 string
if shasums != nil {
sha256 = shasums[archiveName]
}

r2Key := fmt.Sprintf("node/%s/%s%s", version, platform, ext)
metaKey := fmt.Sprintf("node/%s/%s.meta.json", version, platform)

jobs = append(jobs, MirrorJob{
Runtime: "node",
Version: version,
Platform: platform,
URL: url,
UpstreamSHA256: sha256,
R2Key: r2Key,
MetaKey: metaKey,
})
}
}

return jobs, nil
}

func (s *NodeOfficialSource) fetchShasums(version string) (nodeShasums, error) {
url := fmt.Sprintf("https://nodejs.org/dist/%s/SHASUMS256.txt", version)
resp, err := httpClient.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()

if resp.StatusCode != 200 {
return nil, fmt.Errorf("HTTP %d", resp.StatusCode)
}

body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}

shasums := make(nodeShasums)
for _, line := range strings.Split(string(body), "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
// Format: "checksum filename" (two spaces)
parts := strings.SplitN(line, " ", 2)
if len(parts) == 2 {
shasums[parts[1]] = parts[0]
}
}

return shasums, nil
}

func (s *NodeOfficialSource) mapFileToPlatform(file string) (platform, ext string) {
// Node.js file naming: linux-x64, darwin-x64, win-x64, etc.
// We want: linux-amd64, darwin-amd64, windows-amd64, etc.

switch file {
// Linux
case "linux-x64":
return "linux-amd64", ".tar.gz"
case "linux-arm64":
return "linux-arm64", ".tar.gz"
case "linux-armv7l":
return "linux-armv7", ".tar.gz"

// macOS
case "darwin-x64":
return "darwin-amd64", ".tar.gz"
case "darwin-arm64":
return "darwin-arm64", ".tar.gz"

// Windows
case "win-x64-zip":
return "windows-amd64", ".zip"
case "win-arm64-zip":
return "windows-arm64", ".zip"
case "win-x86-zip":
return "windows-386", ".zip"

default:
// Skip MSI installers, source tarballs, headers, etc.
return "", ""
}
}

func (s *NodeOfficialSource) getArchiveName(version, file string) string {
// Convert file type to actual archive filename
switch file {
case "linux-x64":
return fmt.Sprintf("node-%s-linux-x64.tar.gz", version)
case "linux-arm64":
return fmt.Sprintf("node-%s-linux-arm64.tar.gz", version)
case "linux-armv7l":
return fmt.Sprintf("node-%s-linux-armv7l.tar.gz", version)
case "darwin-x64":
return fmt.Sprintf("node-%s-darwin-x64.tar.gz", version)
case "darwin-arm64":
return fmt.Sprintf("node-%s-darwin-arm64.tar.gz", version)
case "win-x64-zip":
return fmt.Sprintf("node-%s-win-x64.zip", version)
case "win-arm64-zip":
return fmt.Sprintf("node-%s-win-arm64.zip", version)
case "win-x86-zip":
return fmt.Sprintf("node-%s-win-x86.zip", version)
default:
return ""
}
}
Loading