From fd51310fc1028eb13a2f5867c5becb5ed4517209 Mon Sep 17 00:00:00 2001 From: "Calvin A. Allen" Date: Sun, 25 Jan 2026 17:58:34 -0500 Subject: [PATCH] fix(mirror): discover versions from upstream sources Refactor mirror-binaries to fetch version lists directly from upstream sources instead of reading from local manifests. This fixes the chicken-and-egg problem where new releases were never discovered. Upstream sources: - Node.js: nodejs.org/dist/index.json (comprehensive) - Python: python-build-standalone (GitHub releases) - Ruby: rubyinstaller2 (Windows) + ruby-builder (Linux/macOS) Changes: - Add sources.go with UpstreamSource interface and retry logic - Add node.go for Node.js official source - Add python.go for Python standalone builds - Add ruby.go for RubyInstaller and ruby-builder - Update main.go to use upstream discovery - Remove --manifest-dir flag from workflow (no longer needed) Closes #208 --- .github/workflows/mirror-sync.yml | 1 - scripts/mirror-binaries/main.go | 88 +++---------- scripts/mirror-binaries/node.go | 181 +++++++++++++++++++++++++++ scripts/mirror-binaries/python.go | 179 +++++++++++++++++++++++++++ scripts/mirror-binaries/ruby.go | 190 +++++++++++++++++++++++++++++ scripts/mirror-binaries/sources.go | 104 ++++++++++++++++ 6 files changed, 673 insertions(+), 70 deletions(-) create mode 100644 scripts/mirror-binaries/node.go create mode 100644 scripts/mirror-binaries/python.go create mode 100644 scripts/mirror-binaries/ruby.go create mode 100644 scripts/mirror-binaries/sources.go diff --git a/.github/workflows/mirror-sync.yml b/.github/workflows/mirror-sync.yml index 66dc6c0..9ef546a 100644 --- a/.github/workflows/mirror-sync.yml +++ b/.github/workflows/mirror-sync.yml @@ -51,7 +51,6 @@ jobs: run: | ./scripts/mirror-binaries/mirror-binaries \ --runtime=${{ matrix.runtime }} \ - --manifest-dir=src/internal/manifest/data \ --r2-endpoint="$R2_ENDPOINT" \ --r2-bucket="$R2_BUCKET" \ --r2-access-key="$R2_ACCESS_KEY" \ diff --git a/scripts/mirror-binaries/main.go b/scripts/mirror-binaries/main.go index d43b5bc..c6574a2 100644 --- a/scripts/mirror-binaries/main.go +++ b/scripts/mirror-binaries/main.go @@ -23,17 +23,6 @@ import ( "github.com/aws/aws-sdk-go-v2/service/s3" ) -// Manifest represents the structure of a runtime manifest -type Manifest struct { - Versions map[string]map[string]*Download `json:"versions"` -} - -// Download represents a single download entry -type Download struct { - URL string `json:"url"` - SHA256 string `json:"sha256,omitempty"` -} - // BinaryMeta represents metadata stored alongside each binary type BinaryMeta struct { SHA256 string `json:"sha256"` @@ -56,26 +45,25 @@ type MirrorJob struct { // Stats tracks mirroring statistics type Stats struct { - Total int64 - Skipped int64 - Mirrored int64 - Failed int64 - BytesDown int64 - UpstreamChecksum int64 + Total int64 + Skipped int64 + Mirrored int64 + Failed int64 + BytesDown int64 + UpstreamChecksum int64 GeneratedChecksum int64 } var ( - runtimeFlag = flag.String("runtime", "", "Runtime to mirror (node, python, ruby, or all)") - dryRun = flag.Bool("dry-run", false, "Report what would be done without doing it") - syncOnly = flag.Bool("sync-only", false, "Only mirror files not already in R2") - manifestDir = flag.String("manifest-dir", "src/internal/manifest/data", "Directory containing manifest files") - r2Endpoint = flag.String("r2-endpoint", "", "R2 endpoint URL") - r2Bucket = flag.String("r2-bucket", "", "R2 bucket name") - r2AccessKey = flag.String("r2-access-key", "", "R2 access key ID") - r2SecretKey = flag.String("r2-secret-key", "", "R2 secret access key") - workers = flag.Int("workers", 10, "Number of parallel workers") - retries = flag.Int("retries", 3, "Number of retries for failed downloads") + runtimeFlag = flag.String("runtime", "", "Runtime to mirror (node, python, ruby, or all)") + dryRun = flag.Bool("dry-run", false, "Report what would be done without doing it") + syncOnly = flag.Bool("sync-only", false, "Only mirror files not already in R2") + r2Endpoint = flag.String("r2-endpoint", "", "R2 endpoint URL") + r2Bucket = flag.String("r2-bucket", "", "R2 bucket name") + r2AccessKey = flag.String("r2-access-key", "", "R2 access key ID") + r2SecretKey = flag.String("r2-secret-key", "", "R2 secret access key") + workers = flag.Int("workers", 10, "Number of parallel workers") + retries = flag.Int("retries", 3, "Number of retries for failed downloads") ) func main() { @@ -121,13 +109,13 @@ func main() { } } - // Collect all jobs + // Collect all jobs from upstream sources var jobs []MirrorJob for _, rt := range runtimes { - manifestPath := filepath.Join(*manifestDir, rt+".json") - rtJobs, err := loadJobs(rt, manifestPath) + fmt.Printf("Discovering %s versions from upstream...\n", rt) + rtJobs, err := fetchJobsFromUpstream(rt) if err != nil { - fmt.Fprintf(os.Stderr, "Error loading manifest for %s: %v\n", rt, err) + fmt.Fprintf(os.Stderr, "Error fetching upstream versions for %s: %v\n", rt, err) os.Exit(1) } jobs = append(jobs, rtJobs...) @@ -230,44 +218,6 @@ func listExistingKeys(client *s3.Client) (map[string]bool, error) { return keys, nil } -func loadJobs(runtime, manifestPath string) ([]MirrorJob, error) { - data, err := os.ReadFile(manifestPath) - if err != nil { - return nil, err - } - - var manifest Manifest - if err := json.Unmarshal(data, &manifest); err != nil { - return nil, err - } - - var jobs []MirrorJob - for version, platforms := range manifest.Versions { - for platform, dl := range platforms { - if dl == nil || dl.URL == "" { - continue - } - - // Determine file extension from URL - ext := getExtension(dl.URL) - r2Key := fmt.Sprintf("%s/%s/%s%s", runtime, version, platform, ext) - metaKey := fmt.Sprintf("%s/%s/%s.meta.json", runtime, version, platform) - - jobs = append(jobs, MirrorJob{ - Runtime: runtime, - Version: version, - Platform: platform, - URL: dl.URL, - UpstreamSHA256: dl.SHA256, - R2Key: r2Key, - MetaKey: metaKey, - }) - } - } - - return jobs, nil -} - func getExtension(url string) string { // Handle common archive extensions if strings.HasSuffix(url, ".tar.gz") { diff --git a/scripts/mirror-binaries/node.go b/scripts/mirror-binaries/node.go new file mode 100644 index 0000000..75ea070 --- /dev/null +++ b/scripts/mirror-binaries/node.go @@ -0,0 +1,181 @@ +package main + +import ( + "encoding/json" + "fmt" + "io" + "strings" +) + +const nodeIndexURL = "https://nodejs.org/dist/index.json" + +// NodeOfficialSource fetches Node.js versions from nodejs.org +type NodeOfficialSource struct{} + +func (s *NodeOfficialSource) Name() string { + return "nodejs.org" +} + +// nodeIndexEntry represents an entry in nodejs.org/dist/index.json +type nodeIndexEntry struct { + Version string `json:"version"` + Date string `json:"date"` + Files []string `json:"files"` + LTS any `json:"lts"` // Can be string or false + Shasums string `json:"shasums,omitempty"` +} + +// nodeShasums maps filename to SHA256 checksum +type nodeShasums map[string]string + +func (s *NodeOfficialSource) FetchVersions() ([]MirrorJob, error) { + // Fetch index.json + resp, err := httpClient.Get(nodeIndexURL) + if err != nil { + return nil, fmt.Errorf("fetching index: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return nil, fmt.Errorf("fetching index: HTTP %d", resp.StatusCode) + } + + var entries []nodeIndexEntry + if err := json.NewDecoder(resp.Body).Decode(&entries); err != nil { + return nil, fmt.Errorf("parsing index: %w", err) + } + + var jobs []MirrorJob + + for _, entry := range entries { + version := strings.TrimPrefix(entry.Version, "v") + + // Fetch checksums for this version + shasums, err := s.fetchShasums(entry.Version) + if err != nil { + // Older versions may not have SHASUMS256, continue without checksums + shasums = nil + } + + // Map Node.js file types to our platform naming + for _, file := range entry.Files { + platform, ext := s.mapFileToPlatform(file) + if platform == "" { + continue // Skip unsupported file types + } + + archiveName := s.getArchiveName(entry.Version, file) + url := fmt.Sprintf("https://nodejs.org/dist/%s/%s", entry.Version, archiveName) + + var sha256 string + if shasums != nil { + sha256 = shasums[archiveName] + } + + r2Key := fmt.Sprintf("node/%s/%s%s", version, platform, ext) + metaKey := fmt.Sprintf("node/%s/%s.meta.json", version, platform) + + jobs = append(jobs, MirrorJob{ + Runtime: "node", + Version: version, + Platform: platform, + URL: url, + UpstreamSHA256: sha256, + R2Key: r2Key, + MetaKey: metaKey, + }) + } + } + + return jobs, nil +} + +func (s *NodeOfficialSource) fetchShasums(version string) (nodeShasums, error) { + url := fmt.Sprintf("https://nodejs.org/dist/%s/SHASUMS256.txt", version) + resp, err := httpClient.Get(url) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return nil, fmt.Errorf("HTTP %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + shasums := make(nodeShasums) + for _, line := range strings.Split(string(body), "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + // Format: "checksum filename" (two spaces) + parts := strings.SplitN(line, " ", 2) + if len(parts) == 2 { + shasums[parts[1]] = parts[0] + } + } + + return shasums, nil +} + +func (s *NodeOfficialSource) mapFileToPlatform(file string) (platform, ext string) { + // Node.js file naming: linux-x64, darwin-x64, win-x64, etc. + // We want: linux-amd64, darwin-amd64, windows-amd64, etc. + + switch file { + // Linux + case "linux-x64": + return "linux-amd64", ".tar.gz" + case "linux-arm64": + return "linux-arm64", ".tar.gz" + case "linux-armv7l": + return "linux-armv7", ".tar.gz" + + // macOS + case "darwin-x64": + return "darwin-amd64", ".tar.gz" + case "darwin-arm64": + return "darwin-arm64", ".tar.gz" + + // Windows + case "win-x64-zip": + return "windows-amd64", ".zip" + case "win-arm64-zip": + return "windows-arm64", ".zip" + case "win-x86-zip": + return "windows-386", ".zip" + + default: + // Skip MSI installers, source tarballs, headers, etc. + return "", "" + } +} + +func (s *NodeOfficialSource) getArchiveName(version, file string) string { + // Convert file type to actual archive filename + switch file { + case "linux-x64": + return fmt.Sprintf("node-%s-linux-x64.tar.gz", version) + case "linux-arm64": + return fmt.Sprintf("node-%s-linux-arm64.tar.gz", version) + case "linux-armv7l": + return fmt.Sprintf("node-%s-linux-armv7l.tar.gz", version) + case "darwin-x64": + return fmt.Sprintf("node-%s-darwin-x64.tar.gz", version) + case "darwin-arm64": + return fmt.Sprintf("node-%s-darwin-arm64.tar.gz", version) + case "win-x64-zip": + return fmt.Sprintf("node-%s-win-x64.zip", version) + case "win-arm64-zip": + return fmt.Sprintf("node-%s-win-arm64.zip", version) + case "win-x86-zip": + return fmt.Sprintf("node-%s-win-x86.zip", version) + default: + return "" + } +} diff --git a/scripts/mirror-binaries/python.go b/scripts/mirror-binaries/python.go new file mode 100644 index 0000000..7defcc0 --- /dev/null +++ b/scripts/mirror-binaries/python.go @@ -0,0 +1,179 @@ +package main + +import ( + "encoding/json" + "fmt" + "io" + "regexp" + "strings" +) + +// PythonStandaloneSource fetches Python versions from astral-sh/python-build-standalone +type PythonStandaloneSource struct{} + +func (s *PythonStandaloneSource) Name() string { + return "python-build-standalone" +} + +// githubRelease represents a GitHub release +type githubRelease struct { + TagName string `json:"tag_name"` + Assets []githubAsset `json:"assets"` +} + +// githubAsset represents a GitHub release asset +type githubAsset struct { + Name string `json:"name"` + BrowserDownloadURL string `json:"browser_download_url"` +} + +// pythonStandalonePattern matches filenames like: +// cpython-3.12.0+20231002-x86_64-unknown-linux-gnu-install_only.tar.gz +var pythonStandalonePattern = regexp.MustCompile( + `^cpython-(\d+\.\d+\.\d+)\+\d+-([^-]+-[^-]+-[^-]+(?:-[^-]+)?)-install_only\.(tar\.gz|tar\.zst)$`, +) + +func (s *PythonStandaloneSource) FetchVersions() ([]MirrorJob, error) { + // Fetch releases from GitHub API with retries + url := "https://api.github.com/repos/astral-sh/python-build-standalone/releases?per_page=100" + resp, err := httpGetWithRetry(url, 3) + if err != nil { + return nil, fmt.Errorf("fetching releases: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return nil, fmt.Errorf("fetching releases: HTTP %d", resp.StatusCode) + } + + var releases []githubRelease + if err := json.NewDecoder(resp.Body).Decode(&releases); err != nil { + return nil, fmt.Errorf("parsing releases: %w", err) + } + + var jobs []MirrorJob + seen := make(map[string]bool) + + for _, release := range releases { + // Also fetch SHA256 sums if available + shasums := s.fetchShasums(release) + + for _, asset := range release.Assets { + matches := pythonStandalonePattern.FindStringSubmatch(asset.Name) + if matches == nil { + continue + } + + version := matches[1] + triple := matches[2] + ext := "." + matches[3] + + platform := s.mapTripleToPlatform(triple) + if platform == "" { + continue + } + + // Skip duplicates (prefer first occurrence which is newest release) + key := version + "/" + platform + if seen[key] { + continue + } + seen[key] = true + + r2Key := fmt.Sprintf("python/%s/%s%s", version, platform, ext) + metaKey := fmt.Sprintf("python/%s/%s.meta.json", version, platform) + + jobs = append(jobs, MirrorJob{ + Runtime: "python", + Version: version, + Platform: platform, + URL: asset.BrowserDownloadURL, + UpstreamSHA256: shasums[asset.Name], + R2Key: r2Key, + MetaKey: metaKey, + }) + } + } + + return jobs, nil +} + +func (s *PythonStandaloneSource) fetchShasums(release githubRelease) map[string]string { + shasums := make(map[string]string) + + // Look for SHA256SUMS file in release assets + for _, asset := range release.Assets { + if asset.Name == "SHA256SUMS" { + resp, err := httpClient.Get(asset.BrowserDownloadURL) + if err != nil { + return shasums + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return shasums + } + + for _, line := range strings.Split(string(body), "\n") { + parts := strings.Fields(line) + if len(parts) >= 2 { + shasums[parts[1]] = parts[0] + } + } + break + } + } + + return shasums +} + +func (s *PythonStandaloneSource) mapTripleToPlatform(triple string) string { + // Map rust-style triples to our platform naming + switch { + // Linux + case strings.Contains(triple, "x86_64") && strings.Contains(triple, "linux"): + return "linux-amd64" + case strings.Contains(triple, "aarch64") && strings.Contains(triple, "linux"): + return "linux-arm64" + + // macOS + case strings.Contains(triple, "x86_64") && strings.Contains(triple, "apple"): + return "darwin-amd64" + case strings.Contains(triple, "aarch64") && strings.Contains(triple, "apple"): + return "darwin-arm64" + + // Windows + case strings.Contains(triple, "x86_64") && strings.Contains(triple, "windows"): + return "windows-amd64" + case strings.Contains(triple, "i686") && strings.Contains(triple, "windows"): + return "windows-386" + + default: + return "" + } +} + +// PythonOfficialSource fetches Python versions from python.org +// This is a fallback for versions not available in python-build-standalone +type PythonOfficialSource struct{} + +func (s *PythonOfficialSource) Name() string { + return "python.org" +} + +func (s *PythonOfficialSource) FetchVersions() ([]MirrorJob, error) { + // Python.org doesn't provide prebuilt binaries for most platforms + // Only Windows installers and source tarballs are available + // For now, we rely primarily on python-build-standalone + // This source can be expanded later if needed + + // The official FTP has a complex structure: + // https://www.python.org/ftp/python/3.12.0/ + // - Python-3.12.0.tar.xz (source) + // - python-3.12.0-amd64.exe (Windows installer - not a portable archive) + // - python-3.12.0-embed-amd64.zip (Windows embeddable - limited use) + + // For now, return empty - python-build-standalone covers our needs + return []MirrorJob{}, nil +} diff --git a/scripts/mirror-binaries/ruby.go b/scripts/mirror-binaries/ruby.go new file mode 100644 index 0000000..84c6e98 --- /dev/null +++ b/scripts/mirror-binaries/ruby.go @@ -0,0 +1,190 @@ +package main + +import ( + "encoding/json" + "fmt" + "regexp" + "strings" +) + +// RubyInstallerSource fetches Ruby versions from rubyinstaller2 (Windows) +type RubyInstallerSource struct{} + +func (s *RubyInstallerSource) Name() string { + return "rubyinstaller2" +} + +// rubyInstallerPattern matches filenames like: +// rubyinstaller-3.2.2-1-x64.7z +var rubyInstallerPattern = regexp.MustCompile( + `^rubyinstaller-(\d+\.\d+\.\d+)-\d+-([^.]+)\.(7z|zip)$`, +) + +func (s *RubyInstallerSource) FetchVersions() ([]MirrorJob, error) { + // Fetch releases from GitHub API with retries + url := "https://api.github.com/repos/oneclick/rubyinstaller2/releases?per_page=100" + resp, err := httpGetWithRetry(url, 3) + if err != nil { + return nil, fmt.Errorf("fetching releases: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return nil, fmt.Errorf("fetching releases: HTTP %d", resp.StatusCode) + } + + var releases []githubRelease + if err := json.NewDecoder(resp.Body).Decode(&releases); err != nil { + return nil, fmt.Errorf("parsing releases: %w", err) + } + + var jobs []MirrorJob + seen := make(map[string]bool) + + for _, release := range releases { + for _, asset := range release.Assets { + matches := rubyInstallerPattern.FindStringSubmatch(asset.Name) + if matches == nil { + continue + } + + version := matches[1] + arch := matches[2] + ext := "." + matches[3] + + platform := s.mapArchToPlatform(arch) + if platform == "" { + continue + } + + // Skip duplicates + key := version + "/" + platform + if seen[key] { + continue + } + seen[key] = true + + r2Key := fmt.Sprintf("ruby/%s/%s%s", version, platform, ext) + metaKey := fmt.Sprintf("ruby/%s/%s.meta.json", version, platform) + + jobs = append(jobs, MirrorJob{ + Runtime: "ruby", + Version: version, + Platform: platform, + URL: asset.BrowserDownloadURL, + UpstreamSHA256: "", // RubyInstaller doesn't provide checksums in releases + R2Key: r2Key, + MetaKey: metaKey, + }) + } + } + + return jobs, nil +} + +func (s *RubyInstallerSource) mapArchToPlatform(arch string) string { + switch arch { + case "x64": + return "windows-amd64" + case "x86": + return "windows-386" + default: + return "" + } +} + +// RubyBuilderSource fetches Ruby versions from ruby/ruby-builder (Linux/macOS) +type RubyBuilderSource struct{} + +func (s *RubyBuilderSource) Name() string { + return "ruby-builder" +} + +// rubyBuilderPattern matches filenames like: +// ruby-3.2.2-ubuntu-22.04.tar.gz +// ruby-3.2.2-macos-latest.tar.gz +// ruby-3.2.2-macos-13-arm64.tar.gz +var rubyBuilderPattern = regexp.MustCompile( + `^ruby-(\d+\.\d+\.\d+)-([^.]+(?:\.[^.]+)?(?:-arm64)?)\.(tar\.gz)$`, +) + +func (s *RubyBuilderSource) FetchVersions() ([]MirrorJob, error) { + // Fetch the toolcache release from GitHub API with retries + url := "https://api.github.com/repos/ruby/ruby-builder/releases/tags/toolcache" + resp, err := httpGetWithRetry(url, 3) + if err != nil { + return nil, fmt.Errorf("fetching release: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return nil, fmt.Errorf("fetching release: HTTP %d", resp.StatusCode) + } + + var release githubRelease + if err := json.NewDecoder(resp.Body).Decode(&release); err != nil { + return nil, fmt.Errorf("parsing release: %w", err) + } + + var jobs []MirrorJob + seen := make(map[string]bool) + + for _, asset := range release.Assets { + matches := rubyBuilderPattern.FindStringSubmatch(asset.Name) + if matches == nil { + continue + } + + version := matches[1] + osArch := matches[2] + ext := "." + matches[3] + + platform := s.mapOsArchToPlatform(osArch) + if platform == "" { + continue + } + + // Skip duplicates (prefer specific versions like ubuntu-22.04 over ubuntu-latest) + key := version + "/" + platform + if seen[key] { + continue + } + seen[key] = true + + r2Key := fmt.Sprintf("ruby/%s/%s%s", version, platform, ext) + metaKey := fmt.Sprintf("ruby/%s/%s.meta.json", version, platform) + + jobs = append(jobs, MirrorJob{ + Runtime: "ruby", + Version: version, + Platform: platform, + URL: asset.BrowserDownloadURL, + UpstreamSHA256: "", // ruby-builder doesn't provide checksums + R2Key: r2Key, + MetaKey: metaKey, + }) + } + + return jobs, nil +} + +func (s *RubyBuilderSource) mapOsArchToPlatform(osArch string) string { + switch { + // Linux (prefer ubuntu-22.04 as it's most compatible) + case strings.HasPrefix(osArch, "ubuntu"): + if strings.Contains(osArch, "arm64") { + return "linux-arm64" + } + return "linux-amd64" + + // macOS + case strings.HasPrefix(osArch, "macos"): + if strings.Contains(osArch, "arm64") { + return "darwin-arm64" + } + return "darwin-amd64" + + default: + return "" + } +} diff --git a/scripts/mirror-binaries/sources.go b/scripts/mirror-binaries/sources.go new file mode 100644 index 0000000..8ab5afb --- /dev/null +++ b/scripts/mirror-binaries/sources.go @@ -0,0 +1,104 @@ +package main + +import ( + "fmt" + "net/http" + "time" +) + +// UpstreamSource represents a source of runtime binaries +type UpstreamSource interface { + // Name returns a human-readable name for the source + Name() string + // FetchVersions fetches all available versions and their download info + FetchVersions() ([]MirrorJob, error) +} + +// httpClient is a shared HTTP client with reasonable timeouts +var httpClient = &http.Client{ + Timeout: 60 * time.Second, +} + +// httpGetWithRetry performs an HTTP GET with retries for transient failures +func httpGetWithRetry(url string, maxRetries int) (*http.Response, error) { + var lastErr error + for attempt := 1; attempt <= maxRetries; attempt++ { + resp, err := httpClient.Get(url) + if err != nil { + lastErr = err + if attempt < maxRetries { + time.Sleep(time.Duration(attempt) * 2 * time.Second) + } + continue + } + + // Retry on server errors (5xx) + if resp.StatusCode >= 500 { + resp.Body.Close() + lastErr = fmt.Errorf("HTTP %d", resp.StatusCode) + if attempt < maxRetries { + time.Sleep(time.Duration(attempt) * 2 * time.Second) + } + continue + } + + return resp, nil + } + return nil, lastErr +} + +// getUpstreamSources returns all upstream sources for a given runtime +func getUpstreamSources(runtime string) ([]UpstreamSource, error) { + switch runtime { + case "node": + return []UpstreamSource{ + &NodeOfficialSource{}, + }, nil + case "python": + return []UpstreamSource{ + &PythonStandaloneSource{}, + &PythonOfficialSource{}, + }, nil + case "ruby": + return []UpstreamSource{ + &RubyInstallerSource{}, + &RubyBuilderSource{}, + }, nil + default: + return nil, fmt.Errorf("unknown runtime: %s", runtime) + } +} + +// fetchJobsFromUpstream fetches all mirror jobs for a runtime from upstream sources +func fetchJobsFromUpstream(runtime string) ([]MirrorJob, error) { + sources, err := getUpstreamSources(runtime) + if err != nil { + return nil, err + } + + var allJobs []MirrorJob + seen := make(map[string]bool) // Track version+platform to avoid duplicates + + for _, source := range sources { + fmt.Printf(" Fetching from %s...\n", source.Name()) + jobs, err := source.FetchVersions() + if err != nil { + fmt.Printf(" Warning: failed to fetch from %s: %v\n", source.Name(), err) + continue + } + + // Add jobs, avoiding duplicates (first source wins) + added := 0 + for _, job := range jobs { + key := fmt.Sprintf("%s/%s", job.Version, job.Platform) + if !seen[key] { + seen[key] = true + allJobs = append(allJobs, job) + added++ + } + } + fmt.Printf(" Found %d versions from %s (%d new)\n", len(jobs), source.Name(), added) + } + + return allJobs, nil +}