From b4ee412218db0297fbc6ab75f3aa148aaa32657f Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 10:14:30 -0600 Subject: [PATCH 001/109] feat(remotes) Add remote repository import package why: Enable importing repositories from GitHub, GitLab, Codeberg/Gitea/Forgejo, and AWS CodeCommit into vcspull configuration. what: - Add base.py with RemoteRepo dataclass, ImportOptions, ImportMode enum - Add HTTPClient for stdlib-only HTTP requests (urllib) - Add error hierarchy: AuthenticationError, RateLimitError, NotFoundError, etc. - Add GitHubImporter with user/org/search modes - Add GitLabImporter with group/search support (auth required for search) - Add GiteaImporter supporting Codeberg, Gitea, Forgejo instances - Add CodeCommitImporter using AWS CLI subprocess calls - Add filter_repo() for client-side filtering by language, topics, stars --- src/vcspull/_internal/remotes/__init__.py | 39 ++ src/vcspull/_internal/remotes/base.py | 494 ++++++++++++++++++++ src/vcspull/_internal/remotes/codecommit.py | 276 +++++++++++ src/vcspull/_internal/remotes/gitea.py | 286 ++++++++++++ src/vcspull/_internal/remotes/github.py | 305 ++++++++++++ src/vcspull/_internal/remotes/gitlab.py | 303 ++++++++++++ 6 files changed, 1703 insertions(+) create mode 100644 src/vcspull/_internal/remotes/__init__.py create mode 100644 src/vcspull/_internal/remotes/base.py create mode 100644 src/vcspull/_internal/remotes/codecommit.py create mode 100644 src/vcspull/_internal/remotes/gitea.py create mode 100644 src/vcspull/_internal/remotes/github.py create mode 100644 src/vcspull/_internal/remotes/gitlab.py diff --git a/src/vcspull/_internal/remotes/__init__.py b/src/vcspull/_internal/remotes/__init__.py new file mode 100644 index 000000000..293c4ec2e --- /dev/null +++ b/src/vcspull/_internal/remotes/__init__.py @@ -0,0 +1,39 @@ +"""Remote repository importing for vcspull.""" + +from __future__ import annotations + +from .base import ( + AuthenticationError, + ConfigurationError, + DependencyError, + ImportMode, + ImportOptions, + NotFoundError, + RateLimitError, + RemoteImportError, + RemoteRepo, + ServiceUnavailableError, + filter_repo, +) +from .codecommit import CodeCommitImporter +from .gitea import GiteaImporter +from .github import GitHubImporter +from .gitlab import GitLabImporter + +__all__ = [ + "AuthenticationError", + "CodeCommitImporter", + "ConfigurationError", + "DependencyError", + "GitHubImporter", + "GitLabImporter", + "GiteaImporter", + "ImportMode", + "ImportOptions", + "NotFoundError", + "RateLimitError", + "RemoteImportError", + "RemoteRepo", + "ServiceUnavailableError", + "filter_repo", +] diff --git a/src/vcspull/_internal/remotes/base.py b/src/vcspull/_internal/remotes/base.py new file mode 100644 index 000000000..782adac79 --- /dev/null +++ b/src/vcspull/_internal/remotes/base.py @@ -0,0 +1,494 @@ +"""Base classes and utilities for remote repository importers.""" + +from __future__ import annotations + +import enum +import json +import logging +import os +import typing as t +import urllib.error +import urllib.parse +import urllib.request +from dataclasses import dataclass, field + +log = logging.getLogger(__name__) + + +class ImportMode(enum.Enum): + """Scraping mode for remote services.""" + + USER = "user" + ORG = "org" + SEARCH = "search" + + +class RemoteImportError(Exception): + """Base exception for remote import errors.""" + + def __init__(self, message: str, service: str | None = None) -> None: + """Initialize the error. + + Parameters + ---------- + message : str + Error message + service : str | None + Name of the service that raised the error + """ + super().__init__(message) + self.service = service + + +class AuthenticationError(RemoteImportError): + """Raised when authentication fails or is required.""" + + +class RateLimitError(RemoteImportError): + """Raised when API rate limit is exceeded.""" + + +class NotFoundError(RemoteImportError): + """Raised when a requested resource is not found.""" + + +class ServiceUnavailableError(RemoteImportError): + """Raised when the service is unavailable.""" + + +class ConfigurationError(RemoteImportError): + """Raised when there's a configuration error.""" + + +class DependencyError(RemoteImportError): + """Raised when a required dependency is missing.""" + + +@dataclass(frozen=True) +class RemoteRepo: + """Represents a repository from a remote service. + + Parameters + ---------- + name : str + Repository name (filesystem-safe) + clone_url : str + URL for cloning the repository + html_url : str + URL for viewing the repository in a browser + description : str | None + Repository description + language : str | None + Primary programming language + topics : list[str] + Repository topics/tags + stars : int + Star/favorite count + is_fork : bool + Whether this is a fork of another repository + is_archived : bool + Whether the repository is archived + default_branch : str + Default branch name + owner : str + Owner username or organization name + """ + + name: str + clone_url: str + html_url: str + description: str | None + language: str | None + topics: tuple[str, ...] + stars: int + is_fork: bool + is_archived: bool + default_branch: str + owner: str + + def to_vcspull_url(self) -> str: + """Return the URL formatted for vcspull config. + + Returns + ------- + str + Git URL with git+ prefix for vcspull config + + Examples + -------- + >>> repo = RemoteRepo( + ... name="test", + ... clone_url="https://github.com/user/test.git", + ... html_url="https://github.com/user/test", + ... description=None, + ... language=None, + ... topics=(), + ... stars=0, + ... is_fork=False, + ... is_archived=False, + ... default_branch="main", + ... owner="user", + ... ) + >>> repo.to_vcspull_url() + 'git+https://github.com/user/test.git' + """ + if self.clone_url.startswith("git+"): + return self.clone_url + return f"git+{self.clone_url}" + + def to_dict(self) -> dict[str, t.Any]: + """Convert to dictionary for JSON serialization. + + Returns + ------- + dict[str, t.Any] + Dictionary representation + + Examples + -------- + >>> repo = RemoteRepo( + ... name="test", + ... clone_url="https://github.com/user/test.git", + ... html_url="https://github.com/user/test", + ... description="A test repo", + ... language="Python", + ... topics=("cli", "tool"), + ... stars=100, + ... is_fork=False, + ... is_archived=False, + ... default_branch="main", + ... owner="user", + ... ) + >>> d = repo.to_dict() + >>> d["name"] + 'test' + >>> d["topics"] + ['cli', 'tool'] + """ + return { + "name": self.name, + "clone_url": self.clone_url, + "html_url": self.html_url, + "description": self.description, + "language": self.language, + "topics": list(self.topics), + "stars": self.stars, + "is_fork": self.is_fork, + "is_archived": self.is_archived, + "default_branch": self.default_branch, + "owner": self.owner, + } + + +@dataclass +class ImportOptions: + """Options for importing repositories from a remote service. + + Parameters + ---------- + mode : ImportMode + The importing mode (user, org, or search) + target : str + Target user, org, or search query + base_url : str | None + Base URL for self-hosted instances + token : str | None + API token for authentication + include_forks : bool + Whether to include forked repositories + include_archived : bool + Whether to include archived repositories + language : str | None + Filter by programming language + topics : list[str] + Filter by topics + min_stars : int + Minimum star count (for search mode) + limit : int + Maximum number of repositories to return + """ + + mode: ImportMode = ImportMode.USER + target: str = "" + base_url: str | None = None + token: str | None = None + include_forks: bool = False + include_archived: bool = False + language: str | None = None + topics: list[str] = field(default_factory=list) + min_stars: int = 0 + limit: int = 100 + + +class HTTPClient: + """Simple HTTP client using urllib for making API requests.""" + + def __init__( + self, + base_url: str, + *, + token: str | None = None, + auth_header: str = "Authorization", + auth_prefix: str = "Bearer", + user_agent: str = "vcspull", + timeout: int = 30, + ) -> None: + """Initialize the HTTP client. + + Parameters + ---------- + base_url : str + Base URL for API requests + token : str | None + Authentication token + auth_header : str + Header name for authentication + auth_prefix : str + Prefix for the token in the auth header + user_agent : str + User-Agent header value + timeout : int + Request timeout in seconds + """ + self.base_url = base_url.rstrip("/") + self.token = token + self.auth_header = auth_header + self.auth_prefix = auth_prefix + self.user_agent = user_agent + self.timeout = timeout + + def _build_headers(self) -> dict[str, str]: + """Build request headers. + + Returns + ------- + dict[str, str] + Request headers + """ + headers = { + "User-Agent": self.user_agent, + "Accept": "application/json", + } + if self.token: + if self.auth_prefix: + headers[self.auth_header] = f"{self.auth_prefix} {self.token}" + else: + headers[self.auth_header] = self.token + return headers + + def get( + self, + endpoint: str, + *, + params: dict[str, str | int] | None = None, + service_name: str = "remote", + ) -> tuple[t.Any, dict[str, str]]: + """Make a GET request to the API. + + Parameters + ---------- + endpoint : str + API endpoint (will be appended to base_url) + params : dict | None + Query parameters + service_name : str + Service name for error messages + + Returns + ------- + tuple[Any, dict[str, str]] + Parsed JSON response and response headers + + Raises + ------ + AuthenticationError + When authentication fails (401) + RateLimitError + When rate limit is exceeded (403/429) + NotFoundError + When resource is not found (404) + ServiceUnavailableError + When service is unavailable (5xx) + """ + url = f"{self.base_url}{endpoint}" + + if params: + query_string = "&".join( + f"{k}={urllib.parse.quote(str(v))}" for k, v in params.items() + ) + url = f"{url}?{query_string}" + + headers = self._build_headers() + request = urllib.request.Request(url, headers=headers) + + log.debug("GET %s", url) + + try: + with urllib.request.urlopen(request, timeout=self.timeout) as response: + body = response.read().decode("utf-8") + response_headers = {k.lower(): v for k, v in response.getheaders()} + return json.loads(body), response_headers + except urllib.error.HTTPError as exc: + self._handle_http_error(exc, service_name) + except urllib.error.URLError as exc: + msg = f"Connection error: {exc.reason}" + raise ServiceUnavailableError(msg, service=service_name) from exc + except json.JSONDecodeError as exc: + msg = f"Invalid JSON response from {service_name}" + raise ServiceUnavailableError(msg, service=service_name) from exc + + # Should never reach here, but for type checker + msg = "Unexpected error" + raise ServiceUnavailableError(msg, service=service_name) + + def _handle_http_error( + self, + exc: urllib.error.HTTPError, + service_name: str, + ) -> t.NoReturn: + """Handle HTTP error responses. + + Parameters + ---------- + exc : urllib.error.HTTPError + The HTTP error + service_name : str + Service name for error messages + + Raises + ------ + AuthenticationError + When authentication fails (401) + RateLimitError + When rate limit is exceeded (403/429) + NotFoundError + When resource is not found (404) + ServiceUnavailableError + When service is unavailable (5xx) + """ + try: + body = exc.read().decode("utf-8") + error_data = json.loads(body) + message = error_data.get("message", str(exc)) + except (json.JSONDecodeError, UnicodeDecodeError): + message = str(exc) + + if exc.code == 401: + msg = f"Authentication failed for {service_name}: {message}" + raise AuthenticationError(msg, service=service_name) from exc + + if exc.code == 403: + if "rate limit" in message.lower(): + msg = f"Rate limit exceeded for {service_name}: {message}" + raise RateLimitError(msg, service=service_name) from exc + msg = f"Access denied for {service_name}: {message}" + raise AuthenticationError(msg, service=service_name) from exc + + if exc.code == 404: + msg = f"Resource not found on {service_name}: {message}" + raise NotFoundError(msg, service=service_name) from exc + + if exc.code == 429: + msg = f"Rate limit exceeded for {service_name}: {message}" + raise RateLimitError(msg, service=service_name) from exc + + if exc.code >= 500: + msg = f"{service_name} service unavailable: {message}" + raise ServiceUnavailableError(msg, service=service_name) from exc + + msg = f"HTTP {exc.code} from {service_name}: {message}" + raise ServiceUnavailableError(msg, service=service_name) from exc + + +def get_token_from_env(*env_vars: str) -> str | None: + """Get an API token from environment variables. + + Parameters + ---------- + *env_vars : str + Environment variable names to check in order + + Returns + ------- + str | None + The token if found, None otherwise + + Examples + -------- + >>> import os + >>> os.environ["TEST_TOKEN"] = "secret" + >>> get_token_from_env("TEST_TOKEN", "OTHER_TOKEN") + 'secret' + >>> get_token_from_env("NONEXISTENT_TOKEN") + >>> del os.environ["TEST_TOKEN"] + """ + for var in env_vars: + token = os.environ.get(var) + if token: + return token + return None + + +def filter_repo( + repo: RemoteRepo, + options: ImportOptions, +) -> bool: + """Check if a repository passes the filter criteria. + + Parameters + ---------- + repo : RemoteRepo + The repository to check + options : ImportOptions + Filter options + + Returns + ------- + bool + True if the repository passes all filters + + Examples + -------- + >>> repo = RemoteRepo( + ... name="test", + ... clone_url="https://github.com/user/test.git", + ... html_url="https://github.com/user/test", + ... description=None, + ... language="Python", + ... topics=("cli",), + ... stars=50, + ... is_fork=False, + ... is_archived=False, + ... default_branch="main", + ... owner="user", + ... ) + >>> options = ImportOptions(include_forks=False, include_archived=False) + >>> filter_repo(repo, options) + True + >>> options = ImportOptions(language="JavaScript") + >>> filter_repo(repo, options) + False + """ + # Check fork filter + if repo.is_fork and not options.include_forks: + return False + + # Check archived filter + if repo.is_archived and not options.include_archived: + return False + + # Check language filter + if options.language and ( + not repo.language or repo.language.lower() != options.language.lower() + ): + return False + + # Check topics filter + if options.topics: + repo_topics_lower = {t.lower() for t in repo.topics} + required_topics_lower = {t.lower() for t in options.topics} + if not required_topics_lower.issubset(repo_topics_lower): + return False + + # Check minimum stars + return not (options.min_stars > 0 and repo.stars < options.min_stars) diff --git a/src/vcspull/_internal/remotes/codecommit.py b/src/vcspull/_internal/remotes/codecommit.py new file mode 100644 index 000000000..a7804160e --- /dev/null +++ b/src/vcspull/_internal/remotes/codecommit.py @@ -0,0 +1,276 @@ +"""AWS CodeCommit repository importer for vcspull.""" + +from __future__ import annotations + +import json +import logging +import subprocess +import typing as t + +from .base import ( + AuthenticationError, + ConfigurationError, + DependencyError, + ImportOptions, + RemoteRepo, +) + +log = logging.getLogger(__name__) + + +class CodeCommitImporter: + """Importer for AWS CodeCommit repositories. + + Uses AWS CLI to list and fetch repository information. + Requires AWS CLI to be installed and configured. + + Examples + -------- + >>> importer = CodeCommitImporter(region="us-east-1") + >>> importer.service_name + 'CodeCommit' + """ + + service_name: str = "CodeCommit" + + def __init__( + self, + region: str | None = None, + profile: str | None = None, + ) -> None: + """Initialize the CodeCommit importer. + + Parameters + ---------- + region : str | None + AWS region. If not provided, uses AWS CLI default. + profile : str | None + AWS profile name. If not provided, uses default profile. + """ + self._region = region + self._profile = profile + self._check_aws_cli() + + def _check_aws_cli(self) -> None: + """Check if AWS CLI is installed and accessible. + + Raises + ------ + DependencyError + When AWS CLI is not installed + """ + try: + result = subprocess.run( + ["aws", "--version"], + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + msg = ( + "AWS CLI not installed or not accessible. " + "Please install it with: pip install awscli" + ) + raise DependencyError(msg, service=self.service_name) + except FileNotFoundError as exc: + msg = "AWS CLI not installed. Please install it with: pip install awscli" + raise DependencyError(msg, service=self.service_name) from exc + + def _build_aws_command(self, *args: str) -> list[str]: + """Build AWS CLI command with region and profile options. + + Parameters + ---------- + *args : str + AWS CLI arguments + + Returns + ------- + list[str] + Complete command list + """ + cmd = ["aws"] + if self._region: + cmd.extend(["--region", self._region]) + if self._profile: + cmd.extend(["--profile", self._profile]) + cmd.extend(args) + return cmd + + def _run_aws_command(self, *args: str) -> dict[str, t.Any]: + """Run an AWS CLI command and return parsed JSON output. + + Parameters + ---------- + *args : str + AWS CLI arguments + + Returns + ------- + dict + Parsed JSON output + + Raises + ------ + AuthenticationError + When AWS credentials are missing or invalid + ConfigurationError + When region is invalid or endpoint unreachable + """ + cmd = self._build_aws_command(*args) + log.debug("Running: %s", " ".join(cmd)) + + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=False, + ) + except FileNotFoundError as exc: + msg = "AWS CLI not found" + raise DependencyError(msg, service=self.service_name) from exc + + if result.returncode != 0: + stderr = result.stderr.lower() + if "unable to locate credentials" in stderr: + msg = ( + "AWS credentials not configured. Run 'aws configure' or " + "set AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY." + ) + raise AuthenticationError(msg, service=self.service_name) + if "could not connect to the endpoint" in stderr: + msg = ( + f"Could not connect to CodeCommit. Check your region setting. " + f"Error: {result.stderr}" + ) + raise ConfigurationError(msg, service=self.service_name) + if "invalid" in stderr and "region" in stderr: + msg = f"Invalid AWS region. Error: {result.stderr}" + raise ConfigurationError(msg, service=self.service_name) + msg = f"AWS CLI error: {result.stderr}" + raise ConfigurationError(msg, service=self.service_name) + + try: + return json.loads(result.stdout) if result.stdout.strip() else {} + except json.JSONDecodeError as exc: + msg = f"Invalid JSON from AWS CLI: {result.stdout}" + raise ConfigurationError(msg, service=self.service_name) from exc + + @property + def is_authenticated(self) -> bool: + """Check if AWS credentials are configured. + + Returns + ------- + bool + True if credentials appear to be configured + """ + try: + self._run_aws_command("sts", "get-caller-identity") + except (AuthenticationError, ConfigurationError): + return False + else: + return True + + def fetch_repos(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: + """Fetch repositories from AWS CodeCommit. + + Parameters + ---------- + options : ImportOptions + Import options (target is used as optional name filter) + + Yields + ------ + RemoteRepo + Repository information + + Raises + ------ + AuthenticationError + When AWS credentials are missing + ConfigurationError + When region is invalid + DependencyError + When AWS CLI is not installed + """ + # List all repositories + data = self._run_aws_command("codecommit", "list-repositories") + repositories = data.get("repositories", []) + + if not repositories: + return + + # Filter by name if target is provided + if options.target: + target_lower = options.target.lower() + repositories = [ + r + for r in repositories + if target_lower in r.get("repositoryName", "").lower() + ] + + # Batch get repository details (up to 25 at a time) + count = 0 + batch_size = 25 + + for i in range(0, len(repositories), batch_size): + if count >= options.limit: + break + + batch = repositories[i : i + batch_size] + repo_names = [r["repositoryName"] for r in batch] + + # Get detailed info for batch + details = self._run_aws_command( + "codecommit", + "batch-get-repositories", + "--repository-names", + *repo_names, + ) + + for repo_metadata in details.get("repositories", []): + if count >= options.limit: + break + + repo = self._parse_repo(repo_metadata) + yield repo + count += 1 + + def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo: + """Parse CodeCommit repository metadata into RemoteRepo. + + Parameters + ---------- + data : dict + CodeCommit repository metadata + + Returns + ------- + RemoteRepo + Parsed repository information + """ + repo_name = data.get("repositoryName", "") + account_id = data.get("accountId", "") + + # Build console URL + region = self._region or "us-east-1" + html_url = ( + f"https://{region}.console.aws.amazon.com/codesuite/codecommit/" + f"repositories/{repo_name}/browse" + ) + + return RemoteRepo( + name=repo_name, + clone_url=data.get("cloneUrlHttp", ""), + html_url=html_url, + description=data.get("repositoryDescription"), + language=None, # CodeCommit doesn't track language + topics=(), # CodeCommit doesn't have topics + stars=0, # CodeCommit doesn't have stars + is_fork=False, # CodeCommit doesn't have forks + is_archived=False, # CodeCommit doesn't have archived state + default_branch=data.get("defaultBranch", "main"), + owner=account_id, + ) diff --git a/src/vcspull/_internal/remotes/gitea.py b/src/vcspull/_internal/remotes/gitea.py new file mode 100644 index 000000000..a76beb2d0 --- /dev/null +++ b/src/vcspull/_internal/remotes/gitea.py @@ -0,0 +1,286 @@ +"""Gitea/Forgejo/Codeberg repository importer for vcspull.""" + +from __future__ import annotations + +import logging +import typing as t + +from .base import ( + HTTPClient, + ImportMode, + ImportOptions, + RemoteRepo, + filter_repo, + get_token_from_env, +) + +log = logging.getLogger(__name__) + +CODEBERG_API_URL = "https://codeberg.org" +DEFAULT_PER_PAGE = 50 # Gitea's default is 50 + + +class GiteaImporter: + """Importer for Gitea, Forgejo, and Codeberg repositories. + + Supports three modes: + - USER: Fetch repositories for a user + - ORG: Fetch repositories for an organization + - SEARCH: Search for repositories by query + + Examples + -------- + >>> importer = GiteaImporter(base_url="https://codeberg.org") + >>> importer.service_name + 'Gitea' + """ + + service_name: str = "Gitea" + + def __init__( + self, + token: str | None = None, + base_url: str | None = None, + ) -> None: + """Initialize the Gitea/Forgejo/Codeberg importer. + + Parameters + ---------- + token : str | None + API token. If not provided, will try service-specific env vars. + base_url : str | None + Base URL for the Gitea instance. Required for generic Gitea. + Defaults to Codeberg if not specified. + """ + self._base_url = (base_url or CODEBERG_API_URL).rstrip("/") + + # Determine token from environment based on service + self._token: str | None + if token: + self._token = token + elif "codeberg.org" in self._base_url.lower(): + self._token = get_token_from_env("CODEBERG_TOKEN", "GITEA_TOKEN") + elif "forgejo" in self._base_url.lower(): + self._token = get_token_from_env("FORGEJO_TOKEN", "GITEA_TOKEN") + else: + self._token = get_token_from_env("GITEA_TOKEN") + + self._client = HTTPClient( + f"{self._base_url}/api/v1", + token=self._token, + auth_header="Authorization", + auth_prefix="token", # Gitea uses "token " + user_agent="vcspull", + ) + + @property + def is_authenticated(self) -> bool: + """Check if the importer has authentication configured. + + Returns + ------- + bool + True if a token is configured + """ + return self._token is not None + + def fetch_repos(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: + """Fetch repositories from Gitea/Forgejo/Codeberg. + + Parameters + ---------- + options : ImportOptions + Scraping options + + Yields + ------ + RemoteRepo + Repository information + + Raises + ------ + AuthenticationError + When authentication fails + RateLimitError + When rate limit is exceeded + NotFoundError + When user/org is not found + """ + if options.mode == ImportMode.USER: + yield from self._fetch_user(options) + elif options.mode == ImportMode.ORG: + yield from self._fetch_org(options) + elif options.mode == ImportMode.SEARCH: + yield from self._fetch_search(options) + + def _fetch_user(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: + """Fetch repositories for a user. + + Parameters + ---------- + options : ImportOptions + Scraping options + + Yields + ------ + RemoteRepo + Repository information + """ + endpoint = f"/users/{options.target}/repos" + yield from self._paginate_repos(endpoint, options) + + def _fetch_org(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: + """Fetch repositories for an organization. + + Parameters + ---------- + options : ImportOptions + Scraping options + + Yields + ------ + RemoteRepo + Repository information + """ + endpoint = f"/orgs/{options.target}/repos" + yield from self._paginate_repos(endpoint, options) + + def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: + """Search for repositories. + + Parameters + ---------- + options : ImportOptions + Scraping options + + Yields + ------ + RemoteRepo + Repository information + """ + endpoint = "/repos/search" + page = 1 + count = 0 + + while count < options.limit: + params: dict[str, str | int] = { + "q": options.target, + "limit": min(DEFAULT_PER_PAGE, options.limit - count), + "page": page, + "sort": "stars", + "order": "desc", + } + + if not options.include_archived: + params["archived"] = "false" + + if not options.include_forks: + params["fork"] = "false" + + data, _headers = self._client.get( + endpoint, + params=params, + service_name=self.service_name, + ) + + # Gitea search returns {"ok": true, "data": [...]} or just [...] + items = data.get("data", []) if isinstance(data, dict) else data + + if not items: + break + + for item in items: + if count >= options.limit: + break + + repo = self._parse_repo(item) + if filter_repo(repo, options): + yield repo + count += 1 + + # Check if there are more pages + if len(items) < DEFAULT_PER_PAGE: + break + + page += 1 + + def _paginate_repos( + self, + endpoint: str, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + """Paginate through repository listing endpoints. + + Parameters + ---------- + endpoint : str + API endpoint + options : ImportOptions + Scraping options + + Yields + ------ + RemoteRepo + Repository information + """ + page = 1 + count = 0 + + while count < options.limit: + params: dict[str, str | int] = { + "limit": min(DEFAULT_PER_PAGE, options.limit - count), + "page": page, + } + + data, _headers = self._client.get( + endpoint, + params=params, + service_name=self.service_name, + ) + + if not data: + break + + for item in data: + if count >= options.limit: + break + + repo = self._parse_repo(item) + if filter_repo(repo, options): + yield repo + count += 1 + + # Check if there are more pages + if len(data) < DEFAULT_PER_PAGE: + break + + page += 1 + + def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo: + """Parse Gitea API response into RemoteRepo. + + Parameters + ---------- + data : dict + Gitea API repository data + + Returns + ------- + RemoteRepo + Parsed repository information + """ + owner_data = data.get("owner", {}) + + return RemoteRepo( + name=data.get("name", ""), + clone_url=data.get("clone_url", ""), + html_url=data.get("html_url", ""), + description=data.get("description"), + language=data.get("language"), + topics=tuple(data.get("topics", [])), + stars=data.get("stars_count", 0), # Note: Gitea uses stars_count + is_fork=data.get("fork", False), + is_archived=data.get("archived", False), + default_branch=data.get("default_branch", "main"), + owner=owner_data.get("login", owner_data.get("username", "")), + ) diff --git a/src/vcspull/_internal/remotes/github.py b/src/vcspull/_internal/remotes/github.py new file mode 100644 index 000000000..dbd6d2b06 --- /dev/null +++ b/src/vcspull/_internal/remotes/github.py @@ -0,0 +1,305 @@ +"""GitHub repository importer for vcspull.""" + +from __future__ import annotations + +import logging +import typing as t + +from .base import ( + HTTPClient, + ImportMode, + ImportOptions, + RemoteRepo, + filter_repo, + get_token_from_env, +) + +log = logging.getLogger(__name__) + +GITHUB_API_URL = "https://api.github.com" +DEFAULT_PER_PAGE = 100 + + +class GitHubImporter: + """Importer for GitHub repositories. + + Supports three modes: + - USER: Fetch repositories for a user + - ORG: Fetch repositories for an organization + - SEARCH: Search for repositories by query + + Examples + -------- + >>> importer = GitHubImporter() + >>> importer.service_name + 'GitHub' + """ + + service_name: str = "GitHub" + + def __init__( + self, + token: str | None = None, + base_url: str | None = None, + ) -> None: + """Initialize the GitHub importer. + + Parameters + ---------- + token : str | None + GitHub API token. If not provided, will try GITHUB_TOKEN env var. + base_url : str | None + Base URL for GitHub Enterprise. Defaults to api.github.com. + """ + self._token = token or get_token_from_env("GITHUB_TOKEN", "GH_TOKEN") + self._base_url = (base_url or GITHUB_API_URL).rstrip("/") + self._client = HTTPClient( + self._base_url, + token=self._token, + auth_header="Authorization", + auth_prefix="Bearer", + user_agent="vcspull", + ) + + @property + def is_authenticated(self) -> bool: + """Check if the importer has authentication configured. + + Returns + ------- + bool + True if a token is configured + """ + return self._token is not None + + def fetch_repos(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: + """Fetch repositories from GitHub. + + Parameters + ---------- + options : ImportOptions + Scraping options + + Yields + ------ + RemoteRepo + Repository information + + Raises + ------ + AuthenticationError + When authentication fails + RateLimitError + When rate limit is exceeded + NotFoundError + When user/org is not found + """ + if options.mode == ImportMode.USER: + yield from self._fetch_user(options) + elif options.mode == ImportMode.ORG: + yield from self._fetch_org(options) + elif options.mode == ImportMode.SEARCH: + yield from self._fetch_search(options) + + def _fetch_user(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: + """Fetch repositories for a user. + + Parameters + ---------- + options : ImportOptions + Scraping options + + Yields + ------ + RemoteRepo + Repository information + """ + endpoint = f"/users/{options.target}/repos" + yield from self._paginate_repos(endpoint, options) + + def _fetch_org(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: + """Fetch repositories for an organization. + + Parameters + ---------- + options : ImportOptions + Scraping options + + Yields + ------ + RemoteRepo + Repository information + """ + endpoint = f"/orgs/{options.target}/repos" + yield from self._paginate_repos(endpoint, options) + + def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: + """Search for repositories. + + Parameters + ---------- + options : ImportOptions + Scraping options + + Yields + ------ + RemoteRepo + Repository information + """ + query_parts = [options.target] + + if options.language: + query_parts.append(f"language:{options.language}") + + if options.min_stars > 0: + query_parts.append(f"stars:>={options.min_stars}") + + query = " ".join(query_parts) + endpoint = "/search/repositories" + page = 1 + count = 0 + + while count < options.limit: + params: dict[str, str | int] = { + "q": query, + "per_page": min(DEFAULT_PER_PAGE, options.limit - count), + "page": page, + "sort": "stars", + "order": "desc", + } + + data, headers = self._client.get( + endpoint, + params=params, + service_name=self.service_name, + ) + + self._log_rate_limit(headers) + + items = data.get("items", []) + if not items: + break + + for item in items: + if count >= options.limit: + break + + repo = self._parse_repo(item) + if filter_repo(repo, options): + yield repo + count += 1 + + # Check if there are more pages + if len(items) < DEFAULT_PER_PAGE: + break + + page += 1 + + def _paginate_repos( + self, + endpoint: str, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + """Paginate through repository listing endpoints. + + Parameters + ---------- + endpoint : str + API endpoint + options : ImportOptions + Scraping options + + Yields + ------ + RemoteRepo + Repository information + """ + page = 1 + count = 0 + + while count < options.limit: + params: dict[str, str | int] = { + "per_page": min(DEFAULT_PER_PAGE, options.limit - count), + "page": page, + "sort": "updated", + "direction": "desc", + } + + data, headers = self._client.get( + endpoint, + params=params, + service_name=self.service_name, + ) + + self._log_rate_limit(headers) + + if not data: + break + + for item in data: + if count >= options.limit: + break + + repo = self._parse_repo(item) + if filter_repo(repo, options): + yield repo + count += 1 + + # Check if there are more pages + if len(data) < DEFAULT_PER_PAGE: + break + + page += 1 + + def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo: + """Parse GitHub API response into RemoteRepo. + + Parameters + ---------- + data : dict + GitHub API repository data + + Returns + ------- + RemoteRepo + Parsed repository information + """ + return RemoteRepo( + name=data["name"], + clone_url=data["clone_url"], + html_url=data["html_url"], + description=data.get("description"), + language=data.get("language"), + topics=tuple(data.get("topics", [])), + stars=data.get("stargazers_count", 0), + is_fork=data.get("fork", False), + is_archived=data.get("archived", False), + default_branch=data.get("default_branch", "main"), + owner=data.get("owner", {}).get("login", ""), + ) + + def _log_rate_limit(self, headers: dict[str, str]) -> None: + """Log rate limit information from response headers. + + Parameters + ---------- + headers : dict[str, str] + Response headers + """ + remaining = headers.get("x-ratelimit-remaining") + limit = headers.get("x-ratelimit-limit") + + if remaining is not None and limit is not None: + remaining_int = int(remaining) + if remaining_int < 10: + log.warning( + "GitHub API rate limit low: %s/%s remaining", + remaining, + limit, + ) + else: + log.debug( + "GitHub API rate limit: %s/%s remaining", + remaining, + limit, + ) diff --git a/src/vcspull/_internal/remotes/gitlab.py b/src/vcspull/_internal/remotes/gitlab.py new file mode 100644 index 000000000..c39477c68 --- /dev/null +++ b/src/vcspull/_internal/remotes/gitlab.py @@ -0,0 +1,303 @@ +"""GitLab repository importer for vcspull.""" + +from __future__ import annotations + +import logging +import typing as t + +from .base import ( + AuthenticationError, + HTTPClient, + ImportMode, + ImportOptions, + RemoteRepo, + filter_repo, + get_token_from_env, +) + +log = logging.getLogger(__name__) + +GITLAB_API_URL = "https://gitlab.com" +DEFAULT_PER_PAGE = 100 + + +class GitLabImporter: + """Importer for GitLab repositories. + + Supports three modes: + - USER: Fetch repositories for a user + - ORG: Fetch repositories for a group (organization) + - SEARCH: Search for repositories (requires authentication) + + Examples + -------- + >>> importer = GitLabImporter() + >>> importer.service_name + 'GitLab' + """ + + service_name: str = "GitLab" + + def __init__( + self, + token: str | None = None, + base_url: str | None = None, + ) -> None: + """Initialize the GitLab importer. + + Parameters + ---------- + token : str | None + GitLab API token. If not provided, will try GITLAB_TOKEN env var. + base_url : str | None + Base URL for self-hosted GitLab instances. Defaults to gitlab.com. + """ + self._token = token or get_token_from_env("GITLAB_TOKEN", "GL_TOKEN") + self._base_url = (base_url or GITLAB_API_URL).rstrip("/") + self._client = HTTPClient( + f"{self._base_url}/api/v4", + token=self._token, + auth_header="PRIVATE-TOKEN", + auth_prefix="", # GitLab uses token directly without prefix + user_agent="vcspull", + ) + + @property + def is_authenticated(self) -> bool: + """Check if the importer has authentication configured. + + Returns + ------- + bool + True if a token is configured + """ + return self._token is not None + + def fetch_repos(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: + """Fetch repositories from GitLab. + + Parameters + ---------- + options : ImportOptions + Scraping options + + Yields + ------ + RemoteRepo + Repository information + + Raises + ------ + AuthenticationError + When authentication fails or is required for search + RateLimitError + When rate limit is exceeded + NotFoundError + When user/group is not found + """ + if options.mode == ImportMode.USER: + yield from self._fetch_user(options) + elif options.mode == ImportMode.ORG: + yield from self._fetch_group(options) + elif options.mode == ImportMode.SEARCH: + yield from self._fetch_search(options) + + def _fetch_user(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: + """Fetch repositories for a user. + + Parameters + ---------- + options : ImportOptions + Scraping options + + Yields + ------ + RemoteRepo + Repository information + """ + endpoint = f"/users/{options.target}/projects" + yield from self._paginate_repos(endpoint, options) + + def _fetch_group(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: + """Fetch repositories for a group (organization). + + Parameters + ---------- + options : ImportOptions + Scraping options + + Yields + ------ + RemoteRepo + Repository information + """ + # URL-encode the group name in case it contains slashes (subgroups) + target = options.target.replace("/", "%2F") + endpoint = f"/groups/{target}/projects" + yield from self._paginate_repos(endpoint, options, include_subgroups=True) + + def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: + """Search for repositories. + + Note: GitLab search API requires authentication. + + Parameters + ---------- + options : ImportOptions + Scraping options + + Yields + ------ + RemoteRepo + Repository information + + Raises + ------ + AuthenticationError + When not authenticated (GitLab search requires auth) + """ + if not self.is_authenticated: + msg = ( + "GitLab search API requires authentication. Please provide " + "a token via --token or GITLAB_TOKEN environment variable." + ) + raise AuthenticationError(msg, service=self.service_name) + + endpoint = "/search" + page = 1 + count = 0 + + while count < options.limit: + params: dict[str, str | int] = { + "scope": "projects", + "search": options.target, + "per_page": min(DEFAULT_PER_PAGE, options.limit - count), + "page": page, + } + + data, _headers = self._client.get( + endpoint, + params=params, + service_name=self.service_name, + ) + + if not data: + break + + for item in data: + if count >= options.limit: + break + + repo = self._parse_repo(item) + if filter_repo(repo, options): + yield repo + count += 1 + + # Check if there are more pages + if len(data) < DEFAULT_PER_PAGE: + break + + page += 1 + + def _paginate_repos( + self, + endpoint: str, + options: ImportOptions, + *, + include_subgroups: bool = False, + ) -> t.Iterator[RemoteRepo]: + """Paginate through project listing endpoints. + + Parameters + ---------- + endpoint : str + API endpoint + options : ImportOptions + Scraping options + include_subgroups : bool + Whether to include projects from subgroups + + Yields + ------ + RemoteRepo + Repository information + """ + page = 1 + count = 0 + + while count < options.limit: + params: dict[str, str | int] = { + "per_page": min(DEFAULT_PER_PAGE, options.limit - count), + "page": page, + "order_by": "last_activity_at", + "sort": "desc", + } + + if include_subgroups: + params["include_subgroups"] = "true" + + if options.include_archived: + params["archived"] = "true" + else: + params["archived"] = "false" + + data, _headers = self._client.get( + endpoint, + params=params, + service_name=self.service_name, + ) + + if not data: + break + + for item in data: + if count >= options.limit: + break + + repo = self._parse_repo(item) + if filter_repo(repo, options): + yield repo + count += 1 + + # Check if there are more pages + if len(data) < DEFAULT_PER_PAGE: + break + + page += 1 + + def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo: + """Parse GitLab API response into RemoteRepo. + + Parameters + ---------- + data : dict + GitLab API project data + + Returns + ------- + RemoteRepo + Parsed repository information + """ + # Use 'path' instead of 'name' for filesystem-safe name + name = data.get("path", data.get("name", "")) + + # Determine owner from namespace + namespace = data.get("namespace", {}) + owner = namespace.get("path", namespace.get("name", "")) + + # Check if it's a fork + is_fork = data.get("forked_from_project") is not None + + return RemoteRepo( + name=name, + clone_url=data.get("http_url_to_repo", ""), + html_url=data.get("web_url", ""), + description=data.get("description"), + language=None, # GitLab doesn't return language in list endpoints + topics=tuple(data.get("topics", data.get("tag_list", []))), + stars=data.get("star_count", 0), + is_fork=is_fork, + is_archived=data.get("archived", False), + default_branch=data.get("default_branch", "main"), + owner=owner, + ) From 8262e9ec2f9060c1699f98436721096b757f778c Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 10:14:39 -0600 Subject: [PATCH 002/109] feat(cli/import) Add vcspull import command why: Allow users to import repositories from remote services directly into their vcspull configuration without manual entry. what: - Add create_import_subparser() for CLI argument handling - Add import_repos() main function with full import workflow - Support services: github, gitlab, codeberg, gitea, forgejo, codecommit - Add service aliases (gh, gl, cb, cc, aws) - Add filtering: --language, --topics, --min-stars, --archived, --forks - Add output modes: human-readable, --json, --ndjson - Add --dry-run and --yes options for confirmation control - Require --workspace flag (no default guessing) --- src/vcspull/cli/import_repos.py | 600 ++++++++++++++++++++++++++++++++ 1 file changed, 600 insertions(+) create mode 100644 src/vcspull/cli/import_repos.py diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py new file mode 100644 index 000000000..d5983a033 --- /dev/null +++ b/src/vcspull/cli/import_repos.py @@ -0,0 +1,600 @@ +"""Import repositories from remote services for vcspull.""" + +from __future__ import annotations + +import argparse +import logging +import pathlib +import typing as t + +from colorama import Fore, Style + +from vcspull._internal.private_path import PrivatePath +from vcspull._internal.remotes import ( + AuthenticationError, + CodeCommitImporter, + ConfigurationError, + DependencyError, + GiteaImporter, + GitHubImporter, + GitLabImporter, + ImportMode, + ImportOptions, + NotFoundError, + RateLimitError, + RemoteImportError, + RemoteRepo, + ServiceUnavailableError, +) +from vcspull.config import ( + find_home_config_files, + save_config_yaml, + workspace_root_label, +) + +from ._colors import Colors, get_color_mode +from ._output import OutputFormatter, get_output_mode + +log = logging.getLogger(__name__) + +SERVICE_ALIASES: dict[str, str] = { + "github": "github", + "gh": "github", + "gitlab": "gitlab", + "gl": "gitlab", + "codeberg": "codeberg", + "cb": "codeberg", + "gitea": "gitea", + "forgejo": "forgejo", + "codecommit": "codecommit", + "cc": "codecommit", + "aws": "codecommit", +} + +SERVICES_REQUIRING_URL = {"gitea", "forgejo"} + + +def create_import_subparser(parser: argparse.ArgumentParser) -> None: + """Create ``vcspull import`` argument subparser. + + Parameters + ---------- + parser : argparse.ArgumentParser + The parser to configure + """ + parser.add_argument( + "service", + metavar="SERVICE", + help="Remote service: github, gitlab, codeberg, gitea, forgejo, codecommit", + ) + parser.add_argument( + "target", + metavar="TARGET", + nargs="?", + default="", + help="User, org name, or search query (optional for codecommit)", + ) + parser.add_argument( + "-w", + "--workspace", + dest="workspace", + metavar="DIR", + required=True, + help="Workspace root directory (REQUIRED)", + ) + parser.add_argument( + "-m", + "--mode", + dest="mode", + choices=["user", "org", "search"], + default="user", + help="Import mode: user (default), org, or search", + ) + parser.add_argument( + "--url", + dest="base_url", + metavar="URL", + help="Base URL for self-hosted instances (required for gitea/forgejo)", + ) + parser.add_argument( + "--token", + dest="token", + metavar="TOKEN", + help="API token (overrides environment variable)", + ) + parser.add_argument( + "--region", + dest="region", + metavar="REGION", + help="AWS region for CodeCommit", + ) + parser.add_argument( + "--profile", + dest="profile", + metavar="PROFILE", + help="AWS profile for CodeCommit", + ) + + # Filtering options + filter_group = parser.add_argument_group("filtering") + filter_group.add_argument( + "-l", + "--language", + dest="language", + metavar="LANG", + help="Filter by programming language", + ) + filter_group.add_argument( + "--topics", + dest="topics", + metavar="TOPICS", + help="Filter by topics (comma-separated)", + ) + filter_group.add_argument( + "--min-stars", + dest="min_stars", + type=int, + default=0, + metavar="N", + help="Minimum stars (for search mode)", + ) + filter_group.add_argument( + "--archived", + dest="include_archived", + action="store_true", + help="Include archived repositories", + ) + filter_group.add_argument( + "--forks", + dest="include_forks", + action="store_true", + help="Include forked repositories", + ) + filter_group.add_argument( + "--limit", + dest="limit", + type=int, + default=100, + metavar="N", + help="Maximum repositories to fetch (default: 100)", + ) + + # Output options + output_group = parser.add_argument_group("output") + output_group.add_argument( + "-f", + "--file", + dest="config", + metavar="FILE", + help="Config file to write to (default: ~/.vcspull.yaml)", + ) + output_group.add_argument( + "--dry-run", + "-n", + action="store_true", + help="Preview without writing to config file", + ) + output_group.add_argument( + "--yes", + "-y", + action="store_true", + help="Skip confirmation prompt", + ) + output_group.add_argument( + "--json", + action="store_true", + dest="output_json", + help="Output as JSON", + ) + output_group.add_argument( + "--ndjson", + action="store_true", + dest="output_ndjson", + help="Output as NDJSON (one JSON per line)", + ) + output_group.add_argument( + "--color", + choices=["auto", "always", "never"], + default="auto", + help="When to use colors (default: auto)", + ) + + +def _get_importer( + service: str, + *, + token: str | None, + base_url: str | None, + region: str | None, + profile: str | None, +) -> GitHubImporter | GitLabImporter | GiteaImporter | CodeCommitImporter: + """Create the appropriate importer for the service. + + Parameters + ---------- + service : str + Service name + token : str | None + API token + base_url : str | None + Base URL for self-hosted instances + region : str | None + AWS region (for CodeCommit) + profile : str | None + AWS profile (for CodeCommit) + + Returns + ------- + Importer instance + + Raises + ------ + ValueError + When service is unknown or missing required arguments + """ + normalized = SERVICE_ALIASES.get(service.lower()) + if normalized is None: + msg = f"Unknown service: {service}" + raise ValueError(msg) + + if normalized == "github": + return GitHubImporter(token=token, base_url=base_url) + + if normalized == "gitlab": + return GitLabImporter(token=token, base_url=base_url) + + if normalized == "codeberg": + return GiteaImporter(token=token, base_url="https://codeberg.org") + + if normalized in ("gitea", "forgejo"): + if not base_url: + msg = f"--url is required for {normalized}" + raise ValueError(msg) + return GiteaImporter(token=token, base_url=base_url) + + if normalized == "codecommit": + return CodeCommitImporter(region=region, profile=profile) + + msg = f"Unknown service: {service}" + raise ValueError(msg) + + +def _resolve_config_file(config_path_str: str | None) -> pathlib.Path: + """Resolve the config file path. + + Parameters + ---------- + config_path_str : str | None + Config file path from user, or None for default + + Returns + ------- + pathlib.Path + Resolved config file path + """ + if config_path_str: + return pathlib.Path(config_path_str).expanduser().resolve() + + home_configs = find_home_config_files(filetype=["yaml"]) + if home_configs: + return home_configs[0] + + return pathlib.Path.home() / ".vcspull.yaml" + + +def import_repos( + service: str, + target: str, + workspace: str, + mode: str, + base_url: str | None, + token: str | None, + region: str | None, + profile: str | None, + language: str | None, + topics: str | None, + min_stars: int, + include_archived: bool, + include_forks: bool, + limit: int, + config_path_str: str | None, + dry_run: bool, + yes: bool, + output_json: bool, + output_ndjson: bool, + color: str, +) -> None: + """Import repositories from a remote service. + + Parameters + ---------- + service : str + Remote service name + target : str + User, org, or search query + workspace : str + Workspace root directory + mode : str + Import mode (user, org, search) + base_url : str | None + Base URL for self-hosted instances + token : str | None + API token + region : str | None + AWS region (for CodeCommit) + profile : str | None + AWS profile (for CodeCommit) + language : str | None + Language filter + topics : str | None + Topics filter (comma-separated) + min_stars : int + Minimum stars filter + include_archived : bool + Include archived repositories + include_forks : bool + Include forked repositories + limit : int + Maximum repositories to fetch + config_path_str : str | None + Config file path + dry_run : bool + Preview without writing + yes : bool + Skip confirmation + output_json : bool + Output as JSON + output_ndjson : bool + Output as NDJSON + color : str + Color mode + """ + output_mode = get_output_mode(output_json, output_ndjson) + formatter = OutputFormatter(output_mode) + colors = Colors(get_color_mode(color)) + + # Validate service and create importer + try: + importer = _get_importer( + service, + token=token, + base_url=base_url, + region=region, + profile=profile, + ) + except ValueError as exc: + log.error("%s✗%s %s", Fore.RED, Style.RESET_ALL, exc) # noqa: TRY400 + return + except DependencyError as exc: + log.error("%s✗%s %s", Fore.RED, Style.RESET_ALL, exc) # noqa: TRY400 + return + + # Validate target for non-CodeCommit services + normalized_service = SERVICE_ALIASES.get(service.lower(), service.lower()) + if normalized_service != "codecommit" and not target: + log.error( + "%s✗%s TARGET is required for %s", + Fore.RED, + Style.RESET_ALL, + service, + ) + return + + # Build import options + import_mode = ImportMode(mode) + topic_list = [t.strip() for t in topics.split(",")] if topics else [] + + options = ImportOptions( + mode=import_mode, + target=target, + base_url=base_url, + token=token, + include_forks=include_forks, + include_archived=include_archived, + language=language, + topics=topic_list, + min_stars=min_stars, + limit=limit, + ) + + # Resolve workspace path + workspace_path = pathlib.Path(workspace).expanduser().resolve() + cwd = pathlib.Path.cwd() + home = pathlib.Path.home() + workspace_label = workspace_root_label(workspace_path, cwd=cwd, home=home) + + # Resolve config file + config_file_path = _resolve_config_file(config_path_str) + display_config_path = str(PrivatePath(config_file_path)) + + # Fetch repositories + log.info( + "%s→%s Fetching repositories from %s%s%s...", + Fore.CYAN, + Style.RESET_ALL, + Fore.MAGENTA, + importer.service_name, + Style.RESET_ALL, + ) + + repos: list[RemoteRepo] = [] + try: + for repo in importer.fetch_repos(options): + repos.append(repo) + + # Emit for JSON/NDJSON output + formatter.emit(repo.to_dict()) + + # Log progress for human output + if output_mode.value == "human" and len(repos) % 10 == 0: + log.info( + "%s•%s Fetched %s%d%s repositories...", + Fore.BLUE, + Style.RESET_ALL, + Fore.CYAN, + len(repos), + Style.RESET_ALL, + ) + + except AuthenticationError as exc: + log.error( # noqa: TRY400 + "%s✗%s Authentication error: %s", Fore.RED, Style.RESET_ALL, exc + ) + formatter.finalize() + return + except RateLimitError as exc: + log.error( # noqa: TRY400 + "%s✗%s Rate limit exceeded: %s", Fore.RED, Style.RESET_ALL, exc + ) + formatter.finalize() + return + except NotFoundError as exc: + log.error("%s✗%s Not found: %s", Fore.RED, Style.RESET_ALL, exc) # noqa: TRY400 + formatter.finalize() + return + except ServiceUnavailableError as exc: + log.error( # noqa: TRY400 + "%s✗%s Service unavailable: %s", Fore.RED, Style.RESET_ALL, exc + ) + formatter.finalize() + return + except ConfigurationError as exc: + log.error( # noqa: TRY400 + "%s✗%s Configuration error: %s", Fore.RED, Style.RESET_ALL, exc + ) + formatter.finalize() + return + except RemoteImportError as exc: + log.error("%s✗%s Error: %s", Fore.RED, Style.RESET_ALL, exc) # noqa: TRY400 + formatter.finalize() + return + + if not repos: + log.info( + "%s!%s No repositories found matching criteria.", + Fore.YELLOW, + Style.RESET_ALL, + ) + formatter.finalize() + return + + log.info( + "\n%s✓%s Found %s%d%s repositories", + Fore.GREEN, + Style.RESET_ALL, + Fore.CYAN, + len(repos), + Style.RESET_ALL, + ) + + # Show preview in human mode + if output_mode.value == "human": + for repo in repos[:10]: # Show first 10 + stars_str = f" ★{repo.stars}" if repo.stars > 0 else "" + lang_str = f" [{repo.language}]" if repo.language else "" + log.info( + " %s+%s %s%s%s%s%s", + Fore.GREEN, + Style.RESET_ALL, + Fore.CYAN, + repo.name, + Style.RESET_ALL, + colors.muted(lang_str), + colors.muted(stars_str), + ) + if len(repos) > 10: + log.info( + " %s...%s and %s%d%s more", + Fore.BLUE, + Style.RESET_ALL, + Fore.CYAN, + len(repos) - 10, + Style.RESET_ALL, + ) + + formatter.finalize() + + # Handle dry-run + if dry_run: + log.info( + "\n%s→%s Dry run complete. Would write to %s%s%s", + Fore.YELLOW, + Style.RESET_ALL, + Fore.BLUE, + display_config_path, + Style.RESET_ALL, + ) + return + + # Confirm with user + if not yes and output_mode.value == "human": + confirm = input( + f"\n{Fore.CYAN}Import {len(repos)} repositories to " + f"{display_config_path}? [y/N]: {Style.RESET_ALL}", + ).lower() + if confirm not in {"y", "yes"}: + log.info("%s✗%s Aborted by user.", Fore.RED, Style.RESET_ALL) + return + + # Load existing config or create new + raw_config: dict[str, t.Any] + if config_file_path.exists(): + import yaml + + try: + with config_file_path.open() as f: + raw_config = yaml.safe_load(f) or {} + except Exception: + log.exception("Error loading config file") + return + else: + raw_config = {} + + # Add repositories to config + if workspace_label not in raw_config: + raw_config[workspace_label] = {} + + added_count = 0 + skipped_count = 0 + + for repo in repos: + if repo.name in raw_config[workspace_label]: + skipped_count += 1 + continue + + raw_config[workspace_label][repo.name] = {"repo": repo.to_vcspull_url()} + added_count += 1 + + if added_count == 0: + log.info( + "%s✓%s All repositories already exist in config. Nothing to add.", + Fore.GREEN, + Style.RESET_ALL, + ) + return + + # Save config + try: + save_config_yaml(config_file_path, raw_config) + log.info( + "%s✓%s Added %s%d%s repositories to %s%s%s", + Fore.GREEN, + Style.RESET_ALL, + Fore.CYAN, + added_count, + Style.RESET_ALL, + Fore.BLUE, + display_config_path, + Style.RESET_ALL, + ) + if skipped_count > 0: + log.info( + "%s!%s Skipped %s%d%s existing repositories", + Fore.YELLOW, + Style.RESET_ALL, + Fore.CYAN, + skipped_count, + Style.RESET_ALL, + ) + except Exception: + log.exception("Error saving config to %s", display_config_path) From 9c809ed3fae84dd2d78811c939895e0efe4acaed Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 10:14:46 -0600 Subject: [PATCH 003/109] feat(cli[__init__]) Register import command why: Make the import command accessible via vcspull CLI. what: - Import create_import_subparser, import_repos from import_repos module - Add IMPORT_DESCRIPTION with usage examples - Add import subparser to CLI - Add handler for import subparser in cli() function --- src/vcspull/cli/__init__.py | 65 +++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/src/vcspull/cli/__init__.py b/src/vcspull/cli/__init__.py index 4a73e8f36..ebbb65226 100644 --- a/src/vcspull/cli/__init__.py +++ b/src/vcspull/cli/__init__.py @@ -18,6 +18,7 @@ from .add import add_repo, create_add_subparser, handle_add_command from .discover import create_discover_subparser, discover_repos from .fmt import create_fmt_subparser, format_config_file +from .import_repos import create_import_subparser, import_repos from .list import create_list_subparser, list_repos from .search import create_search_subparser, search_repos from .status import create_status_subparser, status_repos @@ -105,6 +106,15 @@ def build_description( "vcspull fmt --all", ], ), + ( + "import", + [ + "vcspull import github torvalds -w ~/repos/linux --mode user", + "vcspull import github django -w ~/study/python --mode org", + "vcspull import gitlab myuser -w ~/work --dry-run", + "vcspull import codeberg user -w ~/oss --json", + ], + ), ), ) @@ -234,6 +244,27 @@ def build_description( ), ) +IMPORT_DESCRIPTION = build_description( + """ + Import repositories from remote services. + + Fetches repository lists from GitHub, GitLab, Codeberg/Gitea/Forgejo, + or AWS CodeCommit and adds them to the vcspull configuration. + """, + ( + ( + None, + [ + "vcspull import github torvalds -w ~/repos/linux --mode user", + "vcspull import github django -w ~/study/python --mode org", + "vcspull import gitlab myuser -w ~/work --url https://gitlab.company.com", + "vcspull import codeberg user -w ~/oss --dry-run", + "vcspull import codecommit -w ~/work/aws --region us-east-1", + ], + ), + ), +) + @overload def create_parser( @@ -333,6 +364,15 @@ def create_parser( ) create_fmt_subparser(fmt_parser) + # Import command + import_parser = subparsers.add_parser( + "import", + help="import repositories from remote services", + formatter_class=VcspullHelpFormatter, + description=IMPORT_DESCRIPTION, + ) + create_import_subparser(import_parser) + if return_subparsers: # Return all parsers needed by cli() function return parser, ( @@ -343,6 +383,7 @@ def create_parser( add_parser, discover_parser, fmt_parser, + import_parser, ) return parser @@ -358,6 +399,7 @@ def cli(_args: list[str] | None = None) -> None: add_parser, discover_parser, _fmt_parser, + _import_parser, ) = subparsers args = parser.parse_args(_args) @@ -453,3 +495,26 @@ def cli(_args: list[str] | None = None) -> None: args.all, merge_roots=args.merge_roots, ) + elif args.subparser_name == "import": + import_repos( + service=args.service, + target=args.target, + workspace=args.workspace, + mode=args.mode, + base_url=getattr(args, "base_url", None), + token=getattr(args, "token", None), + region=getattr(args, "region", None), + profile=getattr(args, "profile", None), + language=getattr(args, "language", None), + topics=getattr(args, "topics", None), + min_stars=getattr(args, "min_stars", 0), + include_archived=getattr(args, "include_archived", False), + include_forks=getattr(args, "include_forks", False), + limit=getattr(args, "limit", 100), + config_path_str=getattr(args, "config", None), + dry_run=getattr(args, "dry_run", False), + yes=getattr(args, "yes", False), + output_json=getattr(args, "output_json", False), + output_ndjson=getattr(args, "output_ndjson", False), + color=getattr(args, "color", "auto"), + ) From d77ed6411ed588b544fe2db9b3b446eca24e41f2 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 10:14:53 -0600 Subject: [PATCH 004/109] test(remotes) Add tests for remote importers why: Ensure remote repository import functionality works correctly. what: - Add conftest.py with mock_urlopen fixture and sample API responses - Add test_base.py with 12 filter_repo tests and RemoteRepo tests - Add test_github.py with 9 tests for user/org/search modes - Add test_gitlab.py with 8 tests including auth requirement for search - Add test_gitea.py with 8 tests covering wrapped and array responses - Total: 48 tests covering all importers and edge cases --- tests/_internal/remotes/__init__.py | 1 + tests/_internal/remotes/conftest.py | 237 ++++++++++++++++ tests/_internal/remotes/test_base.py | 342 +++++++++++++++++++++++ tests/_internal/remotes/test_gitea.py | 183 +++++++++++++ tests/_internal/remotes/test_github.py | 364 +++++++++++++++++++++++++ tests/_internal/remotes/test_gitlab.py | 158 +++++++++++ 6 files changed, 1285 insertions(+) create mode 100644 tests/_internal/remotes/__init__.py create mode 100644 tests/_internal/remotes/conftest.py create mode 100644 tests/_internal/remotes/test_base.py create mode 100644 tests/_internal/remotes/test_gitea.py create mode 100644 tests/_internal/remotes/test_github.py create mode 100644 tests/_internal/remotes/test_gitlab.py diff --git a/tests/_internal/remotes/__init__.py b/tests/_internal/remotes/__init__.py new file mode 100644 index 000000000..933ac6f57 --- /dev/null +++ b/tests/_internal/remotes/__init__.py @@ -0,0 +1 @@ +"""Tests for vcspull._internal.remotes package.""" diff --git a/tests/_internal/remotes/conftest.py b/tests/_internal/remotes/conftest.py new file mode 100644 index 000000000..bb3fa5da9 --- /dev/null +++ b/tests/_internal/remotes/conftest.py @@ -0,0 +1,237 @@ +"""Shared fixtures for remotes tests.""" + +from __future__ import annotations + +import json +import typing as t +import urllib.error + +import pytest + + +class MockHTTPResponse: + """Mock HTTP response for testing.""" + + def __init__( + self, + body: bytes, + headers: dict[str, str] | None = None, + status: int = 200, + ) -> None: + """Initialize mock response.""" + self._body = body + self._headers = headers or {} + self.status = status + self.code = status + + def read(self) -> bytes: + """Return response body.""" + return self._body + + def getheaders(self) -> list[tuple[str, str]]: + """Return response headers as list of tuples.""" + return list(self._headers.items()) + + def __enter__(self) -> MockHTTPResponse: + """Context manager entry.""" + return self + + def __exit__(self, *args: t.Any) -> None: + """Context manager exit.""" + pass + + +@pytest.fixture +def mock_urlopen(monkeypatch: pytest.MonkeyPatch) -> t.Callable[..., None]: + """Create factory fixture to mock urllib.request.urlopen responses. + + Parameters + ---------- + monkeypatch : pytest.MonkeyPatch + Pytest monkeypatch fixture + + Returns + ------- + Callable + Function to set up mock responses + """ + + def _mock( + responses: list[tuple[bytes, dict[str, str], int]] | None = None, + error: urllib.error.HTTPError | None = None, + ) -> None: + """Set up mock responses. + + Parameters + ---------- + responses : list[tuple[bytes, dict[str, str], int]] | None + List of (body, headers, status) tuples for sequential responses + error : urllib.error.HTTPError | None + Error to raise instead of returning response + """ + call_count = 0 + responses = responses or [] + + def urlopen_side_effect( + request: t.Any, + timeout: int | None = None, + ) -> MockHTTPResponse: + nonlocal call_count + if error: + raise error + if not responses: + return MockHTTPResponse(b"[]", {}, 200) + body, headers, status = responses[call_count % len(responses)] + call_count += 1 + return MockHTTPResponse(body, headers, status) + + monkeypatch.setattr("urllib.request.urlopen", urlopen_side_effect) + + return _mock + + +@pytest.fixture +def github_user_repos_response() -> bytes: + """Return standard GitHub user repos API response.""" + return json.dumps( + [ + { + "name": "repo1", + "clone_url": "https://github.com/testuser/repo1.git", + "html_url": "https://github.com/testuser/repo1", + "description": "Test repo 1", + "language": "Python", + "topics": ["cli", "tool"], + "stargazers_count": 100, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "testuser"}, + }, + { + "name": "repo2", + "clone_url": "https://github.com/testuser/repo2.git", + "html_url": "https://github.com/testuser/repo2", + "description": "Test repo 2", + "language": "JavaScript", + "topics": [], + "stargazers_count": 50, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "testuser"}, + }, + ] + ).encode() + + +@pytest.fixture +def github_forked_repo_response() -> bytes: + """GitHub repo that is a fork.""" + return json.dumps( + [ + { + "name": "forked-repo", + "clone_url": "https://github.com/testuser/forked-repo.git", + "html_url": "https://github.com/testuser/forked-repo", + "description": "A forked repo", + "language": "Python", + "topics": [], + "stargazers_count": 10, + "fork": True, + "archived": False, + "default_branch": "main", + "owner": {"login": "testuser"}, + } + ] + ).encode() + + +@pytest.fixture +def github_archived_repo_response() -> bytes: + """GitHub repo that is archived.""" + return json.dumps( + [ + { + "name": "archived-repo", + "clone_url": "https://github.com/testuser/archived-repo.git", + "html_url": "https://github.com/testuser/archived-repo", + "description": "An archived repo", + "language": "Python", + "topics": [], + "stargazers_count": 5, + "fork": False, + "archived": True, + "default_branch": "main", + "owner": {"login": "testuser"}, + } + ] + ).encode() + + +@pytest.fixture +def gitlab_user_projects_response() -> bytes: + """Return standard GitLab user projects API response.""" + return json.dumps( + [ + { + "path": "project1", + "name": "Project 1", + "http_url_to_repo": "https://gitlab.com/testuser/project1.git", + "web_url": "https://gitlab.com/testuser/project1", + "description": "Test project 1", + "topics": ["python"], + "star_count": 20, + "archived": False, + "default_branch": "main", + "namespace": {"path": "testuser"}, + }, + ] + ).encode() + + +@pytest.fixture +def gitea_user_repos_response() -> bytes: + """Return standard Gitea user repos API response.""" + return json.dumps( + [ + { + "name": "repo1", + "clone_url": "https://codeberg.org/testuser/repo1.git", + "html_url": "https://codeberg.org/testuser/repo1", + "description": "Test repo 1", + "language": "Python", + "topics": [], + "stars_count": 15, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "testuser"}, + }, + ] + ).encode() + + +@pytest.fixture +def gitea_search_response() -> bytes: + """Gitea search API response with wrapped data.""" + return json.dumps( + { + "ok": True, + "data": [ + { + "name": "search-result", + "clone_url": "https://codeberg.org/user/search-result.git", + "html_url": "https://codeberg.org/user/search-result", + "description": "Found by search", + "language": "Go", + "topics": ["search"], + "stars_count": 30, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "user"}, + }, + ], + } + ).encode() diff --git a/tests/_internal/remotes/test_base.py b/tests/_internal/remotes/test_base.py new file mode 100644 index 000000000..b72da007e --- /dev/null +++ b/tests/_internal/remotes/test_base.py @@ -0,0 +1,342 @@ +"""Tests for vcspull._internal.remotes.base module.""" + +from __future__ import annotations + +import typing as t + +import pytest + +from vcspull._internal.remotes.base import ( + ImportMode, + ImportOptions, + RemoteRepo, + filter_repo, +) + + +class FilterRepoFixture(t.NamedTuple): + """Fixture for filter_repo test cases.""" + + test_id: str + repo_kwargs: dict[str, t.Any] + options_kwargs: dict[str, t.Any] + expected: bool + + +FILTER_REPO_FIXTURES: list[FilterRepoFixture] = [ + FilterRepoFixture( + test_id="passes-all-defaults", + repo_kwargs={ + "name": "test", + "clone_url": "https://github.com/user/test.git", + "html_url": "https://github.com/user/test", + "description": None, + "language": "Python", + "topics": (), + "stars": 50, + "is_fork": False, + "is_archived": False, + "default_branch": "main", + "owner": "user", + }, + options_kwargs={}, + expected=True, + ), + FilterRepoFixture( + test_id="excludes-fork-by-default", + repo_kwargs={ + "name": "fork", + "clone_url": "https://github.com/user/fork.git", + "html_url": "https://github.com/user/fork", + "description": None, + "language": "Python", + "topics": (), + "stars": 10, + "is_fork": True, + "is_archived": False, + "default_branch": "main", + "owner": "user", + }, + options_kwargs={"include_forks": False}, + expected=False, + ), + FilterRepoFixture( + test_id="includes-fork-when-enabled", + repo_kwargs={ + "name": "fork", + "clone_url": "https://github.com/user/fork.git", + "html_url": "https://github.com/user/fork", + "description": None, + "language": "Python", + "topics": (), + "stars": 10, + "is_fork": True, + "is_archived": False, + "default_branch": "main", + "owner": "user", + }, + options_kwargs={"include_forks": True}, + expected=True, + ), + FilterRepoFixture( + test_id="excludes-archived-by-default", + repo_kwargs={ + "name": "archived", + "clone_url": "https://github.com/user/archived.git", + "html_url": "https://github.com/user/archived", + "description": None, + "language": "Python", + "topics": (), + "stars": 5, + "is_fork": False, + "is_archived": True, + "default_branch": "main", + "owner": "user", + }, + options_kwargs={"include_archived": False}, + expected=False, + ), + FilterRepoFixture( + test_id="includes-archived-when-enabled", + repo_kwargs={ + "name": "archived", + "clone_url": "https://github.com/user/archived.git", + "html_url": "https://github.com/user/archived", + "description": None, + "language": "Python", + "topics": (), + "stars": 5, + "is_fork": False, + "is_archived": True, + "default_branch": "main", + "owner": "user", + }, + options_kwargs={"include_archived": True}, + expected=True, + ), + FilterRepoFixture( + test_id="filters-by-language-match", + repo_kwargs={ + "name": "python-repo", + "clone_url": "https://github.com/user/python-repo.git", + "html_url": "https://github.com/user/python-repo", + "description": None, + "language": "Python", + "topics": (), + "stars": 50, + "is_fork": False, + "is_archived": False, + "default_branch": "main", + "owner": "user", + }, + options_kwargs={"language": "Python"}, + expected=True, + ), + FilterRepoFixture( + test_id="filters-by-language-mismatch", + repo_kwargs={ + "name": "python-repo", + "clone_url": "https://github.com/user/python-repo.git", + "html_url": "https://github.com/user/python-repo", + "description": None, + "language": "Python", + "topics": (), + "stars": 50, + "is_fork": False, + "is_archived": False, + "default_branch": "main", + "owner": "user", + }, + options_kwargs={"language": "JavaScript"}, + expected=False, + ), + FilterRepoFixture( + test_id="filters-by-language-case-insensitive", + repo_kwargs={ + "name": "python-repo", + "clone_url": "https://github.com/user/python-repo.git", + "html_url": "https://github.com/user/python-repo", + "description": None, + "language": "Python", + "topics": (), + "stars": 50, + "is_fork": False, + "is_archived": False, + "default_branch": "main", + "owner": "user", + }, + options_kwargs={"language": "python"}, + expected=True, + ), + FilterRepoFixture( + test_id="filters-by-min-stars-pass", + repo_kwargs={ + "name": "popular", + "clone_url": "https://github.com/user/popular.git", + "html_url": "https://github.com/user/popular", + "description": None, + "language": "Python", + "topics": (), + "stars": 100, + "is_fork": False, + "is_archived": False, + "default_branch": "main", + "owner": "user", + }, + options_kwargs={"min_stars": 50}, + expected=True, + ), + FilterRepoFixture( + test_id="filters-by-min-stars-fail", + repo_kwargs={ + "name": "unpopular", + "clone_url": "https://github.com/user/unpopular.git", + "html_url": "https://github.com/user/unpopular", + "description": None, + "language": "Python", + "topics": (), + "stars": 10, + "is_fork": False, + "is_archived": False, + "default_branch": "main", + "owner": "user", + }, + options_kwargs={"min_stars": 50}, + expected=False, + ), + FilterRepoFixture( + test_id="filters-by-topics-match", + repo_kwargs={ + "name": "cli-tool", + "clone_url": "https://github.com/user/cli-tool.git", + "html_url": "https://github.com/user/cli-tool", + "description": None, + "language": "Python", + "topics": ("cli", "tool", "python"), + "stars": 50, + "is_fork": False, + "is_archived": False, + "default_branch": "main", + "owner": "user", + }, + options_kwargs={"topics": ["cli", "python"]}, + expected=True, + ), + FilterRepoFixture( + test_id="filters-by-topics-mismatch", + repo_kwargs={ + "name": "web-app", + "clone_url": "https://github.com/user/web-app.git", + "html_url": "https://github.com/user/web-app", + "description": None, + "language": "Python", + "topics": ("web", "django"), + "stars": 50, + "is_fork": False, + "is_archived": False, + "default_branch": "main", + "owner": "user", + }, + options_kwargs={"topics": ["cli"]}, + expected=False, + ), +] + + +@pytest.mark.parametrize( + list(FilterRepoFixture._fields), + FILTER_REPO_FIXTURES, + ids=[f.test_id for f in FILTER_REPO_FIXTURES], +) +def test_filter_repo( + test_id: str, + repo_kwargs: dict[str, t.Any], + options_kwargs: dict[str, t.Any], + expected: bool, +) -> None: + """Test filter_repo with various filter combinations.""" + repo = RemoteRepo(**repo_kwargs) + options = ImportOptions(**options_kwargs) + assert filter_repo(repo, options) == expected + + +def test_remote_repo_to_vcspull_url() -> None: + """Test RemoteRepo.to_vcspull_url adds git+ prefix.""" + repo = RemoteRepo( + name="test", + clone_url="https://github.com/user/test.git", + html_url="https://github.com/user/test", + description=None, + language=None, + topics=(), + stars=0, + is_fork=False, + is_archived=False, + default_branch="main", + owner="user", + ) + assert repo.to_vcspull_url() == "git+https://github.com/user/test.git" + + +def test_remote_repo_to_vcspull_url_already_prefixed() -> None: + """Test RemoteRepo.to_vcspull_url doesn't double-prefix.""" + repo = RemoteRepo( + name="test", + clone_url="git+https://github.com/user/test.git", + html_url="https://github.com/user/test", + description=None, + language=None, + topics=(), + stars=0, + is_fork=False, + is_archived=False, + default_branch="main", + owner="user", + ) + assert repo.to_vcspull_url() == "git+https://github.com/user/test.git" + + +def test_remote_repo_to_dict() -> None: + """Test RemoteRepo.to_dict serialization.""" + repo = RemoteRepo( + name="test", + clone_url="https://github.com/user/test.git", + html_url="https://github.com/user/test", + description="A test repo", + language="Python", + topics=("cli", "tool"), + stars=100, + is_fork=False, + is_archived=False, + default_branch="main", + owner="user", + ) + d = repo.to_dict() + assert d["name"] == "test" + assert d["clone_url"] == "https://github.com/user/test.git" + assert d["language"] == "Python" + assert d["topics"] == ["cli", "tool"] + assert d["stars"] == 100 + assert d["is_fork"] is False + + +def test_import_options_defaults() -> None: + """Test ImportOptions default values.""" + options = ImportOptions() + assert options.mode == ImportMode.USER + assert options.target == "" + assert options.base_url is None + assert options.token is None + assert options.include_forks is False + assert options.include_archived is False + assert options.language is None + assert options.topics == [] + assert options.min_stars == 0 + assert options.limit == 100 + + +def test_import_mode_values() -> None: + """Test ImportMode enum values.""" + assert ImportMode.USER.value == "user" + assert ImportMode.ORG.value == "org" + assert ImportMode.SEARCH.value == "search" diff --git a/tests/_internal/remotes/test_gitea.py b/tests/_internal/remotes/test_gitea.py new file mode 100644 index 000000000..c82e5c5a6 --- /dev/null +++ b/tests/_internal/remotes/test_gitea.py @@ -0,0 +1,183 @@ +"""Tests for vcspull._internal.remotes.gitea module.""" + +from __future__ import annotations + +import json +import typing as t + +from vcspull._internal.remotes.base import ImportMode, ImportOptions +from vcspull._internal.remotes.gitea import GiteaImporter + + +def test_gitea_fetch_user( + mock_urlopen: t.Callable[..., None], + gitea_user_repos_response: bytes, +) -> None: + """Test Gitea user repository fetching.""" + mock_urlopen([(gitea_user_repos_response, {}, 200)]) + importer = GiteaImporter(base_url="https://codeberg.org") + options = ImportOptions(mode=ImportMode.USER, target="testuser") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].name == "repo1" + assert repos[0].owner == "testuser" + assert repos[0].stars == 15 + + +def test_gitea_fetch_org( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test Gitea org repository fetching.""" + response_json = [ + { + "name": "org-repo", + "clone_url": "https://codeberg.org/testorg/org-repo.git", + "html_url": "https://codeberg.org/testorg/org-repo", + "description": "Org repo", + "language": "Go", + "topics": [], + "stars_count": 100, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "testorg"}, + } + ] + mock_urlopen([(json.dumps(response_json).encode(), {}, 200)]) + importer = GiteaImporter(base_url="https://codeberg.org") + options = ImportOptions(mode=ImportMode.ORG, target="testorg") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].name == "org-repo" + + +def test_gitea_search_with_wrapped_response( + mock_urlopen: t.Callable[..., None], + gitea_search_response: bytes, +) -> None: + """Test Gitea search handles wrapped response format.""" + mock_urlopen([(gitea_search_response, {}, 200)]) + importer = GiteaImporter(base_url="https://codeberg.org") + options = ImportOptions(mode=ImportMode.SEARCH, target="test") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].name == "search-result" + + +def test_gitea_search_with_array_response( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test Gitea search handles plain array response format.""" + # Some Gitea instances return plain array instead of {"ok": true, "data": [...]} + response_json = [ + { + "name": "plain-result", + "clone_url": "https://gitea.example.com/user/plain-result.git", + "html_url": "https://gitea.example.com/user/plain-result", + "description": "Plain array result", + "language": "Python", + "topics": [], + "stars_count": 20, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "user"}, + } + ] + mock_urlopen([(json.dumps(response_json).encode(), {}, 200)]) + importer = GiteaImporter(base_url="https://gitea.example.com") + options = ImportOptions(mode=ImportMode.SEARCH, target="test") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].name == "plain-result" + + +def test_gitea_importer_defaults_to_codeberg() -> None: + """Test GiteaImporter defaults to Codeberg URL.""" + importer = GiteaImporter() + assert "codeberg.org" in importer._base_url + + +def test_gitea_importer_service_name() -> None: + """Test service_name property.""" + importer = GiteaImporter() + assert importer.service_name == "Gitea" + + +def test_gitea_importer_is_authenticated_without_token() -> None: + """Test is_authenticated returns False without token.""" + importer = GiteaImporter(token=None) + assert importer.is_authenticated is False + + +def test_gitea_importer_is_authenticated_with_token() -> None: + """Test is_authenticated returns True with token.""" + importer = GiteaImporter(token="test-token") + assert importer.is_authenticated is True + + +def test_gitea_uses_stars_count_field( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test Gitea correctly reads stars_count (not stargazers_count).""" + response_json = [ + { + "name": "starred-repo", + "clone_url": "https://codeberg.org/user/starred-repo.git", + "html_url": "https://codeberg.org/user/starred-repo", + "description": "Popular repo", + "language": "Rust", + "topics": [], + "stars_count": 500, # Gitea uses stars_count + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "user"}, + } + ] + mock_urlopen([(json.dumps(response_json).encode(), {}, 200)]) + importer = GiteaImporter(base_url="https://codeberg.org") + options = ImportOptions(mode=ImportMode.USER, target="user") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].stars == 500 + + +def test_gitea_filters_by_language( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test Gitea language filter works.""" + response_json = [ + { + "name": "go-repo", + "clone_url": "https://codeberg.org/user/go-repo.git", + "html_url": "https://codeberg.org/user/go-repo", + "description": "Go repo", + "language": "Go", + "topics": [], + "stars_count": 50, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "user"}, + }, + { + "name": "rust-repo", + "clone_url": "https://codeberg.org/user/rust-repo.git", + "html_url": "https://codeberg.org/user/rust-repo", + "description": "Rust repo", + "language": "Rust", + "topics": [], + "stars_count": 30, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "user"}, + }, + ] + mock_urlopen([(json.dumps(response_json).encode(), {}, 200)]) + importer = GiteaImporter(base_url="https://codeberg.org") + options = ImportOptions(mode=ImportMode.USER, target="user", language="Rust") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].name == "rust-repo" diff --git a/tests/_internal/remotes/test_github.py b/tests/_internal/remotes/test_github.py new file mode 100644 index 000000000..06ea94de9 --- /dev/null +++ b/tests/_internal/remotes/test_github.py @@ -0,0 +1,364 @@ +"""Tests for vcspull._internal.remotes.github module.""" + +from __future__ import annotations + +import json +import typing as t + +import pytest + +from vcspull._internal.remotes.base import ImportMode, ImportOptions +from vcspull._internal.remotes.github import GitHubImporter + + +class GitHubUserFixture(t.NamedTuple): + """Fixture for GitHub user import test cases.""" + + test_id: str + response_json: list[dict[str, t.Any]] + options_kwargs: dict[str, t.Any] + expected_count: int + expected_names: list[str] + + +GITHUB_USER_FIXTURES: list[GitHubUserFixture] = [ + GitHubUserFixture( + test_id="single-repo-user", + response_json=[ + { + "name": "repo1", + "clone_url": "https://github.com/testuser/repo1.git", + "html_url": "https://github.com/testuser/repo1", + "description": "Test repo", + "language": "Python", + "topics": [], + "stargazers_count": 10, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "testuser"}, + } + ], + options_kwargs={"mode": ImportMode.USER, "target": "testuser"}, + expected_count=1, + expected_names=["repo1"], + ), + GitHubUserFixture( + test_id="multiple-repos-forks-excluded", + response_json=[ + { + "name": "original", + "clone_url": "https://github.com/testuser/original.git", + "html_url": "https://github.com/testuser/original", + "description": "Original repo", + "language": "Python", + "topics": [], + "stargazers_count": 100, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "testuser"}, + }, + { + "name": "forked", + "clone_url": "https://github.com/testuser/forked.git", + "html_url": "https://github.com/testuser/forked", + "description": "Forked repo", + "language": "Python", + "topics": [], + "stargazers_count": 5, + "fork": True, + "archived": False, + "default_branch": "main", + "owner": {"login": "testuser"}, + }, + ], + options_kwargs={ + "mode": ImportMode.USER, + "target": "testuser", + "include_forks": False, + }, + expected_count=1, + expected_names=["original"], + ), + GitHubUserFixture( + test_id="multiple-repos-forks-included", + response_json=[ + { + "name": "original", + "clone_url": "https://github.com/testuser/original.git", + "html_url": "https://github.com/testuser/original", + "description": "Original repo", + "language": "Python", + "topics": [], + "stargazers_count": 100, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "testuser"}, + }, + { + "name": "forked", + "clone_url": "https://github.com/testuser/forked.git", + "html_url": "https://github.com/testuser/forked", + "description": "Forked repo", + "language": "Python", + "topics": [], + "stargazers_count": 5, + "fork": True, + "archived": False, + "default_branch": "main", + "owner": {"login": "testuser"}, + }, + ], + options_kwargs={ + "mode": ImportMode.USER, + "target": "testuser", + "include_forks": True, + }, + expected_count=2, + expected_names=["original", "forked"], + ), + GitHubUserFixture( + test_id="archived-excluded-by-default", + response_json=[ + { + "name": "active", + "clone_url": "https://github.com/testuser/active.git", + "html_url": "https://github.com/testuser/active", + "description": "Active repo", + "language": "Python", + "topics": [], + "stargazers_count": 50, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "testuser"}, + }, + { + "name": "archived", + "clone_url": "https://github.com/testuser/archived.git", + "html_url": "https://github.com/testuser/archived", + "description": "Archived repo", + "language": "Python", + "topics": [], + "stargazers_count": 10, + "fork": False, + "archived": True, + "default_branch": "main", + "owner": {"login": "testuser"}, + }, + ], + options_kwargs={ + "mode": ImportMode.USER, + "target": "testuser", + "include_archived": False, + }, + expected_count=1, + expected_names=["active"], + ), + GitHubUserFixture( + test_id="language-filter-applied", + response_json=[ + { + "name": "python-repo", + "clone_url": "https://github.com/testuser/python-repo.git", + "html_url": "https://github.com/testuser/python-repo", + "description": "Python repo", + "language": "Python", + "topics": [], + "stargazers_count": 50, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "testuser"}, + }, + { + "name": "js-repo", + "clone_url": "https://github.com/testuser/js-repo.git", + "html_url": "https://github.com/testuser/js-repo", + "description": "JavaScript repo", + "language": "JavaScript", + "topics": [], + "stargazers_count": 30, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "testuser"}, + }, + ], + options_kwargs={ + "mode": ImportMode.USER, + "target": "testuser", + "language": "Python", + }, + expected_count=1, + expected_names=["python-repo"], + ), + GitHubUserFixture( + test_id="empty-response-returns-empty-list", + response_json=[], + options_kwargs={"mode": ImportMode.USER, "target": "emptyuser"}, + expected_count=0, + expected_names=[], + ), +] + + +@pytest.mark.parametrize( + list(GitHubUserFixture._fields), + GITHUB_USER_FIXTURES, + ids=[f.test_id for f in GITHUB_USER_FIXTURES], +) +def test_github_fetch_user( + test_id: str, + response_json: list[dict[str, t.Any]], + options_kwargs: dict[str, t.Any], + expected_count: int, + expected_names: list[str], + mock_urlopen: t.Callable[..., None], +) -> None: + """Test GitHub user repository fetching with various scenarios.""" + mock_urlopen( + [ + ( + json.dumps(response_json).encode(), + {"x-ratelimit-remaining": "100", "x-ratelimit-limit": "60"}, + 200, + ) + ] + ) + importer = GitHubImporter() + options = ImportOptions(**options_kwargs) + repos = list(importer.fetch_repos(options)) + assert len(repos) == expected_count + assert [r.name for r in repos] == expected_names + + +def test_github_fetch_org( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test GitHub org repository fetching.""" + response_json = [ + { + "name": "org-repo", + "clone_url": "https://github.com/testorg/org-repo.git", + "html_url": "https://github.com/testorg/org-repo", + "description": "Org repo", + "language": "Python", + "topics": [], + "stargazers_count": 200, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "testorg"}, + } + ] + mock_urlopen( + [ + ( + json.dumps(response_json).encode(), + {"x-ratelimit-remaining": "100", "x-ratelimit-limit": "60"}, + 200, + ) + ] + ) + importer = GitHubImporter() + options = ImportOptions(mode=ImportMode.ORG, target="testorg") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].name == "org-repo" + assert repos[0].owner == "testorg" + + +def test_github_fetch_search( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test GitHub search repository fetching.""" + search_response = { + "total_count": 1, + "items": [ + { + "name": "search-result", + "clone_url": "https://github.com/user/search-result.git", + "html_url": "https://github.com/user/search-result", + "description": "Found by search", + "language": "Python", + "topics": ["machine-learning"], + "stargazers_count": 1000, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "user"}, + } + ], + } + mock_urlopen( + [ + ( + json.dumps(search_response).encode(), + {"x-ratelimit-remaining": "100", "x-ratelimit-limit": "60"}, + 200, + ) + ] + ) + importer = GitHubImporter() + options = ImportOptions(mode=ImportMode.SEARCH, target="machine learning") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].name == "search-result" + assert repos[0].stars == 1000 + + +def test_github_importer_is_authenticated_without_token() -> None: + """Test is_authenticated returns False without token.""" + importer = GitHubImporter(token=None) + assert importer.is_authenticated is False + + +def test_github_importer_is_authenticated_with_token() -> None: + """Test is_authenticated returns True with token.""" + importer = GitHubImporter(token="test-token") + assert importer.is_authenticated is True + + +def test_github_importer_service_name() -> None: + """Test service_name property.""" + importer = GitHubImporter() + assert importer.service_name == "GitHub" + + +def test_github_limit_respected( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test that limit option is respected.""" + # Create response with 5 repos + response_json = [ + { + "name": f"repo{i}", + "clone_url": f"https://github.com/user/repo{i}.git", + "html_url": f"https://github.com/user/repo{i}", + "description": f"Repo {i}", + "language": "Python", + "topics": [], + "stargazers_count": 10, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "user"}, + } + for i in range(5) + ] + mock_urlopen( + [ + ( + json.dumps(response_json).encode(), + {"x-ratelimit-remaining": "100", "x-ratelimit-limit": "60"}, + 200, + ) + ] + ) + importer = GitHubImporter() + options = ImportOptions(mode=ImportMode.USER, target="user", limit=3) + repos = list(importer.fetch_repos(options)) + assert len(repos) == 3 diff --git a/tests/_internal/remotes/test_gitlab.py b/tests/_internal/remotes/test_gitlab.py new file mode 100644 index 000000000..6c89de4d3 --- /dev/null +++ b/tests/_internal/remotes/test_gitlab.py @@ -0,0 +1,158 @@ +"""Tests for vcspull._internal.remotes.gitlab module.""" + +from __future__ import annotations + +import json +import typing as t + +import pytest + +from vcspull._internal.remotes.base import ImportMode, ImportOptions +from vcspull._internal.remotes.gitlab import GitLabImporter + + +def test_gitlab_fetch_user( + mock_urlopen: t.Callable[..., None], + gitlab_user_projects_response: bytes, +) -> None: + """Test GitLab user project fetching.""" + mock_urlopen([(gitlab_user_projects_response, {}, 200)]) + importer = GitLabImporter() + options = ImportOptions(mode=ImportMode.USER, target="testuser") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].name == "project1" + assert repos[0].owner == "testuser" + + +def test_gitlab_fetch_group( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test GitLab group (org) project fetching.""" + response_json = [ + { + "path": "group-project", + "name": "Group Project", + "http_url_to_repo": "https://gitlab.com/testgroup/group-project.git", + "web_url": "https://gitlab.com/testgroup/group-project", + "description": "Group project", + "topics": [], + "star_count": 50, + "archived": False, + "default_branch": "main", + "namespace": {"path": "testgroup"}, + } + ] + mock_urlopen([(json.dumps(response_json).encode(), {}, 200)]) + importer = GitLabImporter() + options = ImportOptions(mode=ImportMode.ORG, target="testgroup") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].name == "group-project" + + +def test_gitlab_search_requires_auth() -> None: + """Test GitLab search raises error without authentication.""" + from vcspull._internal.remotes.base import AuthenticationError + + importer = GitLabImporter(token=None) + options = ImportOptions(mode=ImportMode.SEARCH, target="test") + with pytest.raises(AuthenticationError, match="requires authentication"): + list(importer.fetch_repos(options)) + + +def test_gitlab_search_with_auth( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test GitLab search works with authentication.""" + search_response = [ + { + "path": "search-result", + "name": "Search Result", + "http_url_to_repo": "https://gitlab.com/user/search-result.git", + "web_url": "https://gitlab.com/user/search-result", + "description": "Found", + "topics": [], + "star_count": 100, + "archived": False, + "default_branch": "main", + "namespace": {"path": "user"}, + } + ] + mock_urlopen([(json.dumps(search_response).encode(), {}, 200)]) + importer = GitLabImporter(token="test-token") + options = ImportOptions(mode=ImportMode.SEARCH, target="test") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].name == "search-result" + + +def test_gitlab_importer_is_authenticated_without_token() -> None: + """Test is_authenticated returns False without token.""" + importer = GitLabImporter(token=None) + assert importer.is_authenticated is False + + +def test_gitlab_importer_is_authenticated_with_token() -> None: + """Test is_authenticated returns True with token.""" + importer = GitLabImporter(token="test-token") + assert importer.is_authenticated is True + + +def test_gitlab_importer_service_name() -> None: + """Test service_name property.""" + importer = GitLabImporter() + assert importer.service_name == "GitLab" + + +def test_gitlab_handles_forked_project( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test GitLab correctly identifies forked projects.""" + response_json = [ + { + "path": "forked-project", + "name": "Forked Project", + "http_url_to_repo": "https://gitlab.com/user/forked-project.git", + "web_url": "https://gitlab.com/user/forked-project", + "description": "A fork", + "topics": [], + "star_count": 5, + "archived": False, + "default_branch": "main", + "namespace": {"path": "user"}, + "forked_from_project": {"id": 123}, + } + ] + mock_urlopen([(json.dumps(response_json).encode(), {}, 200)]) + importer = GitLabImporter() + options = ImportOptions(mode=ImportMode.USER, target="user", include_forks=False) + repos = list(importer.fetch_repos(options)) + # Fork should be filtered out + assert len(repos) == 0 + + +def test_gitlab_uses_path_not_name( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test GitLab uses 'path' for filesystem-safe names, not 'name'.""" + response_json = [ + { + "path": "my-project", + "name": "My Project With Spaces", # This should NOT be used + "http_url_to_repo": "https://gitlab.com/user/my-project.git", + "web_url": "https://gitlab.com/user/my-project", + "description": "Project with spaces in name", + "topics": [], + "star_count": 10, + "archived": False, + "default_branch": "main", + "namespace": {"path": "user"}, + } + ] + mock_urlopen([(json.dumps(response_json).encode(), {}, 200)]) + importer = GitLabImporter() + options = ImportOptions(mode=ImportMode.USER, target="user") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].name == "my-project" # Uses 'path', not 'name' From cf2a460f3599806522d5ea743e29dd24be1bc86d Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 10:14:58 -0600 Subject: [PATCH 005/109] test(cli[import]) Add tests for import command why: Ensure CLI import functionality handles all scenarios correctly. what: - Add 14 tests for _get_importer with all services and aliases - Add 4 tests for _resolve_config_file path resolution - Add 10 parametrized tests for import_repos main function - Add 12 edge case tests: abort, skip existing, JSON/NDJSON output, etc. - Total: 40 tests covering argument handling, errors, and output modes --- tests/cli/test_import_repos.py | 1208 ++++++++++++++++++++++++++++++++ 1 file changed, 1208 insertions(+) create mode 100644 tests/cli/test_import_repos.py diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py new file mode 100644 index 000000000..e4558b032 --- /dev/null +++ b/tests/cli/test_import_repos.py @@ -0,0 +1,1208 @@ +"""Tests for vcspull import command.""" + +from __future__ import annotations + +import json +import logging +import pathlib +import sys +import typing as t + +import pytest + +from vcspull._internal.remotes import ( + AuthenticationError, + ConfigurationError, + ImportOptions, + NotFoundError, + RateLimitError, + RemoteRepo, + ServiceUnavailableError, +) +from vcspull.cli.import_repos import ( + SERVICE_ALIASES, + _get_importer, + _resolve_config_file, + import_repos, +) + +# Get the actual module (not the function from __init__.py) +import_repos_mod = sys.modules["vcspull.cli.import_repos"] + +if t.TYPE_CHECKING: + from _pytest.monkeypatch import MonkeyPatch + + +def _make_repo( + name: str, + owner: str = "testuser", + stars: int = 10, + language: str = "Python", +) -> RemoteRepo: + """Create a RemoteRepo for testing.""" + return RemoteRepo( + name=name, + clone_url=f"https://github.com/{owner}/{name}.git", + html_url=f"https://github.com/{owner}/{name}", + description=f"Test repo {name}", + language=language, + topics=(), + stars=stars, + is_fork=False, + is_archived=False, + default_branch="main", + owner=owner, + ) + + +class GetImporterFixture(t.NamedTuple): + """Fixture for _get_importer test cases.""" + + test_id: str + service: str + token: str | None + base_url: str | None + region: str | None + profile: str | None + expected_type_name: str + expected_error: str | None + + +GET_IMPORTER_FIXTURES: list[GetImporterFixture] = [ + GetImporterFixture( + test_id="github-direct", + service="github", + token=None, + base_url=None, + region=None, + profile=None, + expected_type_name="GitHubImporter", + expected_error=None, + ), + GetImporterFixture( + test_id="github-alias-gh", + service="gh", + token=None, + base_url=None, + region=None, + profile=None, + expected_type_name="GitHubImporter", + expected_error=None, + ), + GetImporterFixture( + test_id="gitlab-direct", + service="gitlab", + token="test-token", + base_url=None, + region=None, + profile=None, + expected_type_name="GitLabImporter", + expected_error=None, + ), + GetImporterFixture( + test_id="gitlab-alias-gl", + service="gl", + token=None, + base_url=None, + region=None, + profile=None, + expected_type_name="GitLabImporter", + expected_error=None, + ), + GetImporterFixture( + test_id="codeberg-direct", + service="codeberg", + token=None, + base_url=None, + region=None, + profile=None, + expected_type_name="GiteaImporter", + expected_error=None, + ), + GetImporterFixture( + test_id="codeberg-alias-cb", + service="cb", + token=None, + base_url=None, + region=None, + profile=None, + expected_type_name="GiteaImporter", + expected_error=None, + ), + GetImporterFixture( + test_id="gitea-with-url", + service="gitea", + token=None, + base_url="https://gitea.example.com", + region=None, + profile=None, + expected_type_name="GiteaImporter", + expected_error=None, + ), + GetImporterFixture( + test_id="gitea-without-url-fails", + service="gitea", + token=None, + base_url=None, + region=None, + profile=None, + expected_type_name="", + expected_error="--url is required for gitea", + ), + GetImporterFixture( + test_id="forgejo-with-url", + service="forgejo", + token=None, + base_url="https://forgejo.example.com", + region=None, + profile=None, + expected_type_name="GiteaImporter", + expected_error=None, + ), + GetImporterFixture( + test_id="forgejo-without-url-fails", + service="forgejo", + token=None, + base_url=None, + region=None, + profile=None, + expected_type_name="", + expected_error="--url is required for forgejo", + ), + GetImporterFixture( + test_id="codecommit-direct", + service="codecommit", + token=None, + base_url=None, + region="us-east-1", + profile=None, + expected_type_name="CodeCommitImporter", + expected_error=None, + ), + GetImporterFixture( + test_id="codecommit-alias-cc", + service="cc", + token=None, + base_url=None, + region=None, + profile="myprofile", + expected_type_name="CodeCommitImporter", + expected_error=None, + ), + GetImporterFixture( + test_id="codecommit-alias-aws", + service="aws", + token=None, + base_url=None, + region=None, + profile=None, + expected_type_name="CodeCommitImporter", + expected_error=None, + ), + GetImporterFixture( + test_id="unknown-service-fails", + service="unknown", + token=None, + base_url=None, + region=None, + profile=None, + expected_type_name="", + expected_error="Unknown service: unknown", + ), +] + + +@pytest.mark.parametrize( + list(GetImporterFixture._fields), + GET_IMPORTER_FIXTURES, + ids=[f.test_id for f in GET_IMPORTER_FIXTURES], +) +def test_get_importer( + test_id: str, + service: str, + token: str | None, + base_url: str | None, + region: str | None, + profile: str | None, + expected_type_name: str, + expected_error: str | None, +) -> None: + """Test _get_importer creates the correct importer type.""" + if expected_error: + with pytest.raises(ValueError, match=expected_error): + _get_importer( + service, + token=token, + base_url=base_url, + region=region, + profile=profile, + ) + else: + importer = _get_importer( + service, + token=token, + base_url=base_url, + region=region, + profile=profile, + ) + assert type(importer).__name__ == expected_type_name + + +def test_service_aliases_coverage() -> None: + """Test that SERVICE_ALIASES covers expected services.""" + expected_aliases = { + "github", + "gh", + "gitlab", + "gl", + "codeberg", + "cb", + "gitea", + "forgejo", + "codecommit", + "cc", + "aws", + } + assert set(SERVICE_ALIASES.keys()) == expected_aliases + + +class ResolveConfigFixture(t.NamedTuple): + """Fixture for _resolve_config_file test cases.""" + + test_id: str + config_path_str: str | None + home_configs: list[str] + expected_suffix: str + + +RESOLVE_CONFIG_FIXTURES: list[ResolveConfigFixture] = [ + ResolveConfigFixture( + test_id="explicit-path-used", + config_path_str="/custom/config.yaml", + home_configs=[], + expected_suffix="config.yaml", + ), + ResolveConfigFixture( + test_id="tilde-expanded", + config_path_str="~/myconfig.yaml", + home_configs=[], + expected_suffix="myconfig.yaml", + ), + ResolveConfigFixture( + test_id="home-config-found", + config_path_str=None, + home_configs=["existing.yaml"], + expected_suffix="existing.yaml", + ), + ResolveConfigFixture( + test_id="default-when-no-home-config", + config_path_str=None, + home_configs=[], + expected_suffix=".vcspull.yaml", + ), +] + + +@pytest.mark.parametrize( + list(ResolveConfigFixture._fields), + RESOLVE_CONFIG_FIXTURES, + ids=[f.test_id for f in RESOLVE_CONFIG_FIXTURES], +) +def test_resolve_config_file( + test_id: str, + config_path_str: str | None, + home_configs: list[str], + expected_suffix: str, + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, +) -> None: + """Test _resolve_config_file handles various config scenarios.""" + monkeypatch.setenv("HOME", str(tmp_path)) + + # Create home config files if needed + full_paths = [] + for cfg in home_configs: + cfg_path = tmp_path / cfg + cfg_path.touch() + full_paths.append(cfg_path) + + monkeypatch.setattr( + import_repos_mod, + "find_home_config_files", + lambda filetype=None: full_paths, + ) + + result = _resolve_config_file(config_path_str) + assert result.name == expected_suffix + + +class ImportReposFixture(t.NamedTuple): + """Fixture for import_repos test cases.""" + + test_id: str + service: str + target: str + mode: str + base_url: str | None + dry_run: bool + yes: bool + output_json: bool + mock_repos: list[RemoteRepo] + mock_error: Exception | None + expected_log_contains: list[str] + expected_config_repos: int + + +IMPORT_REPOS_FIXTURES: list[ImportReposFixture] = [ + ImportReposFixture( + test_id="basic-github-user-dry-run", + service="github", + target="testuser", + mode="user", + base_url=None, + dry_run=True, + yes=True, + output_json=False, + mock_repos=[_make_repo("repo1"), _make_repo("repo2")], + mock_error=None, + expected_log_contains=["Found 2 repositories", "Dry run complete"], + expected_config_repos=0, + ), + ImportReposFixture( + test_id="github-user-writes-config", + service="github", + target="testuser", + mode="user", + base_url=None, + dry_run=False, + yes=True, + output_json=False, + mock_repos=[_make_repo("repo1")], + mock_error=None, + expected_log_contains=["Added 1 repositories"], + expected_config_repos=1, + ), + ImportReposFixture( + test_id="no-repos-found", + service="github", + target="emptyuser", + mode="user", + base_url=None, + dry_run=False, + yes=True, + output_json=False, + mock_repos=[], + mock_error=None, + expected_log_contains=["No repositories found"], + expected_config_repos=0, + ), + ImportReposFixture( + test_id="authentication-error", + service="github", + target="testuser", + mode="user", + base_url=None, + dry_run=False, + yes=True, + output_json=False, + mock_repos=[], + mock_error=AuthenticationError("Bad credentials"), + expected_log_contains=["Authentication error"], + expected_config_repos=0, + ), + ImportReposFixture( + test_id="rate-limit-error", + service="github", + target="testuser", + mode="user", + base_url=None, + dry_run=False, + yes=True, + output_json=False, + mock_repos=[], + mock_error=RateLimitError("Rate limit exceeded"), + expected_log_contains=["Rate limit exceeded"], + expected_config_repos=0, + ), + ImportReposFixture( + test_id="not-found-error", + service="github", + target="nosuchuser", + mode="user", + base_url=None, + dry_run=False, + yes=True, + output_json=False, + mock_repos=[], + mock_error=NotFoundError("User not found"), + expected_log_contains=["Not found"], + expected_config_repos=0, + ), + ImportReposFixture( + test_id="service-unavailable-error", + service="github", + target="testuser", + mode="user", + base_url=None, + dry_run=False, + yes=True, + output_json=False, + mock_repos=[], + mock_error=ServiceUnavailableError("Server error"), + expected_log_contains=["Service unavailable"], + expected_config_repos=0, + ), + ImportReposFixture( + test_id="configuration-error", + service="codecommit", + target="", + mode="user", + base_url=None, + dry_run=False, + yes=True, + output_json=False, + mock_repos=[], + mock_error=ConfigurationError("Invalid region"), + expected_log_contains=["Configuration error"], + expected_config_repos=0, + ), + ImportReposFixture( + test_id="gitlab-org-mode", + service="gitlab", + target="testgroup", + mode="org", + base_url=None, + dry_run=True, + yes=True, + output_json=False, + mock_repos=[_make_repo("group-project")], + mock_error=None, + expected_log_contains=["Found 1 repositories"], + expected_config_repos=0, + ), + ImportReposFixture( + test_id="codeberg-search-mode", + service="codeberg", + target="python cli", + mode="search", + base_url=None, + dry_run=True, + yes=True, + output_json=False, + mock_repos=[_make_repo("cli-tool", stars=100)], + mock_error=None, + expected_log_contains=["Found 1 repositories"], + expected_config_repos=0, + ), +] + + +@pytest.mark.parametrize( + list(ImportReposFixture._fields), + IMPORT_REPOS_FIXTURES, + ids=[f.test_id for f in IMPORT_REPOS_FIXTURES], +) +def test_import_repos( + test_id: str, + service: str, + target: str, + mode: str, + base_url: str | None, + dry_run: bool, + yes: bool, + output_json: bool, + mock_repos: list[RemoteRepo], + mock_error: Exception | None, + expected_log_contains: list[str], + expected_config_repos: int, + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos with various scenarios.""" + caplog.set_level(logging.INFO) + + monkeypatch.setenv("HOME", str(tmp_path)) + monkeypatch.chdir(tmp_path) + + workspace = tmp_path / "repos" + workspace.mkdir() + config_file = tmp_path / ".vcspull.yaml" + + # Mock the importer + class MockImporter: + service_name = "MockService" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + if mock_error: + raise mock_error + yield from mock_repos + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service=service, + target=target, + workspace=str(workspace), + mode=mode, + base_url=base_url, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(config_file), + dry_run=dry_run, + yes=yes, + output_json=output_json, + output_ndjson=False, + color="never", + ) + + for expected_text in expected_log_contains: + assert expected_text in caplog.text, ( + f"Expected '{expected_text}' in log, got: {caplog.text}" + ) + + if expected_config_repos > 0: + assert config_file.exists() + import yaml + + with config_file.open() as f: + config = yaml.safe_load(f) + assert config is not None + total_repos = sum(len(repos) for repos in config.values()) + assert total_repos == expected_config_repos + + +def test_import_repos_missing_target( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos fails when target is missing for non-codecommit.""" + caplog.set_level(logging.ERROR) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + + import_repos( + service="github", + target="", # Empty target + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(tmp_path / "config.yaml"), + dry_run=False, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + assert "TARGET is required" in caplog.text + + +def test_import_repos_unknown_service( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos fails for unknown service.""" + caplog.set_level(logging.ERROR) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + + import_repos( + service="unknownservice", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(tmp_path / "config.yaml"), + dry_run=False, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + assert "Unknown service" in caplog.text + + +def test_import_repos_user_abort( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos aborts when user declines confirmation.""" + caplog.set_level(logging.INFO) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + config_file = tmp_path / ".vcspull.yaml" + + # Mock user input + monkeypatch.setattr("builtins.input", lambda _: "n") + + # Mock the importer + class MockImporter: + service_name = "MockService" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + yield _make_repo("repo1") + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service="github", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(config_file), + dry_run=False, + yes=False, # Require confirmation + output_json=False, + output_ndjson=False, + color="never", + ) + + assert "Aborted by user" in caplog.text + assert not config_file.exists() + + +def test_import_repos_skips_existing( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos skips repositories already in config.""" + import yaml + + caplog.set_level(logging.INFO) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + config_file = tmp_path / ".vcspull.yaml" + + # Create existing config with repo1 + existing_config = { + "~/repos/": { + "repo1": {"repo": "git+https://github.com/testuser/repo1.git"}, + } + } + config_file.write_text(yaml.dump(existing_config), encoding="utf-8") + + # Mock the importer to return repo1 (existing) and repo2 (new) + class MockImporter: + service_name = "MockService" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + yield _make_repo("repo1") + yield _make_repo("repo2") + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service="github", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(config_file), + dry_run=False, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + assert "Added 1 repositories" in caplog.text + assert "Skipped 1 existing" in caplog.text + + with config_file.open() as f: + final_config = yaml.safe_load(f) + + assert "repo1" in final_config["~/repos/"] + assert "repo2" in final_config["~/repos/"] + + +def test_import_repos_all_existing( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos handles all repos already existing.""" + import yaml + + caplog.set_level(logging.INFO) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + config_file = tmp_path / ".vcspull.yaml" + + # Create existing config with repo1 + existing_config = { + "~/repos/": { + "repo1": {"repo": "git+https://github.com/testuser/repo1.git"}, + } + } + config_file.write_text(yaml.dump(existing_config), encoding="utf-8") + + # Mock the importer to return only repo1 (existing) + class MockImporter: + service_name = "MockService" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + yield _make_repo("repo1") + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service="github", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(config_file), + dry_run=False, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + assert "All repositories already exist" in caplog.text + + +def test_import_repos_json_output( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + """Test import_repos JSON output format.""" + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + + # Mock the importer + class MockImporter: + service_name = "MockService" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + yield _make_repo("repo1", stars=50) + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service="github", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(tmp_path / "config.yaml"), + dry_run=True, + yes=True, + output_json=True, + output_ndjson=False, + color="never", + ) + + captured = capsys.readouterr() + output = json.loads(captured.out) + + assert isinstance(output, list) + assert len(output) == 1 + assert output[0]["name"] == "repo1" + assert output[0]["stars"] == 50 + + +def test_import_repos_ndjson_output( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + """Test import_repos NDJSON output format.""" + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + + # Mock the importer + class MockImporter: + service_name = "MockService" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + yield _make_repo("repo1") + yield _make_repo("repo2") + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service="github", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(tmp_path / "config.yaml"), + dry_run=True, + yes=True, + output_json=False, + output_ndjson=True, + color="never", + ) + + captured = capsys.readouterr() + lines = captured.out.strip().split("\n") + + assert len(lines) == 2 + assert json.loads(lines[0])["name"] == "repo1" + assert json.loads(lines[1])["name"] == "repo2" + + +def test_import_repos_topics_filter( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos passes topics filter correctly.""" + caplog.set_level(logging.INFO) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + + received_options: list[ImportOptions] = [] + + class MockImporter: + service_name = "MockService" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + received_options.append(options) + return iter([]) + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service="github", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language="Python", + topics="cli,tool,python", + min_stars=50, + include_archived=True, + include_forks=True, + limit=200, + config_path_str=str(tmp_path / "config.yaml"), + dry_run=True, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + assert len(received_options) == 1 + opts = received_options[0] + assert opts.language == "Python" + assert opts.topics == ["cli", "tool", "python"] + assert opts.min_stars == 50 + assert opts.include_archived is True + assert opts.include_forks is True + assert opts.limit == 200 + + +def test_import_repos_codecommit_no_target_required( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos allows empty target for codecommit.""" + caplog.set_level(logging.INFO) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + + class MockImporter: + service_name = "CodeCommit" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + yield _make_repo("aws-repo") + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service="codecommit", + target="", # Empty target is OK for CodeCommit + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region="us-east-1", + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(tmp_path / "config.yaml"), + dry_run=True, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + # Should succeed and find repos + assert "Found 1 repositories" in caplog.text + # Should NOT have target required error + assert "TARGET is required" not in caplog.text + + +def test_import_repos_many_repos_truncates_preview( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos shows '...and X more' when many repos.""" + caplog.set_level(logging.INFO) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + + # Create 15 repos + many_repos = [_make_repo(f"repo{i}") for i in range(15)] + + class MockImporter: + service_name = "MockService" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + yield from many_repos + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service="github", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(tmp_path / "config.yaml"), + dry_run=True, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + assert "Found 15 repositories" in caplog.text + assert "and 5 more" in caplog.text + + +def test_import_repos_config_load_error( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos handles config load errors.""" + caplog.set_level(logging.ERROR) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + + # Create an invalid YAML file + config_file = tmp_path / ".vcspull.yaml" + config_file.write_text("invalid: yaml: content: [", encoding="utf-8") + + class MockImporter: + service_name = "MockService" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + yield _make_repo("repo1") + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service="github", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(config_file), + dry_run=False, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + assert "Error loading config" in caplog.text From 407e8b6c96d2a5dd544f98004341282455c3d95c Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 10:15:05 -0600 Subject: [PATCH 006/109] fix(test[test_log]) Add import_repos logger to expected list why: The new import_repos module creates a logger that must be included in the expected logger names test. what: - Add "vcspull.cli.import_repos" to EXPECTED_LOGGER_NAMES --- tests/test_log.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_log.py b/tests/test_log.py index 1ba779bc0..672a843ca 100644 --- a/tests/test_log.py +++ b/tests/test_log.py @@ -432,6 +432,7 @@ def test_get_cli_logger_names_includes_base() -> None: "vcspull.cli.add", "vcspull.cli.discover", "vcspull.cli.fmt", + "vcspull.cli.import_repos", "vcspull.cli.list", "vcspull.cli.search", "vcspull.cli.status", From dcd9b764b62673f6ff46474a36a95f63fb040fdb Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 10:18:14 -0600 Subject: [PATCH 007/109] docs(CHANGES) Note vcspull import command (#510) why: Document the new import feature for the changelog. what: - Add New features section for v1.51.x unreleased - Document vcspull import command with usage examples - List supported services, aliases, and filtering options --- CHANGES | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/CHANGES b/CHANGES index b203566d4..96ca69a59 100644 --- a/CHANGES +++ b/CHANGES @@ -33,6 +33,56 @@ $ uvx --from 'vcspull' --prerelease allow vcspull _Notes on upcoming releases will be added here_ +### New features + +#### `vcspull import` command for remote repository discovery (#510) + +Import repositories from GitHub, GitLab, Codeberg/Gitea/Forgejo, and AWS +CodeCommit directly into your vcspull configuration. + +Import a user's repositories: + +```console +$ vcspull import github torvalds -w ~/repos/linux --mode user +``` + +Import an organization's repositories: + +```console +$ vcspull import github django -w ~/study/python --mode org +``` + +Search and import repositories: + +```console +$ vcspull import github "machine learning" -w ~/ml-repos --mode search --min-stars 1000 +``` + +Use with self-hosted GitLab: + +```console +$ vcspull import gitlab myuser -w ~/work --url https://gitlab.company.com +``` + +Preview without writing (dry run): + +```console +$ vcspull import codeberg user -w ~/oss --dry-run +``` + +Import from AWS CodeCommit: + +```console +$ vcspull import codecommit -w ~/work/aws --region us-east-1 +``` + +Features: + +- Service aliases: `gh`, `gl`, `cb`, `cc`, `aws` +- Filtering: `--language`, `--topics`, `--min-stars`, `--archived`, `--forks` +- Output modes: human-readable (default), `--json`, `--ndjson` +- No new dependencies (uses stdlib `urllib` for HTTP) + ### Tests - Fix `pytest-asyncio` deprecation warning in isolated `pytester` runs by From 2a1dcadc098c86915453279ff376b4f9909cafca Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 10:33:38 -0600 Subject: [PATCH 008/109] fix(remotes[gitea]) Use proper URL parsing for host detection why: GitHub Advanced Security flagged substring matching as vulnerable. A malicious URL like "https://evil.com/codeberg.org/path" would pass the previous "codeberg.org" in url check. what: - Add urllib.parse import - Use urlparse().netloc to extract hostname for exact matching - Replace substring check with exact hostname comparison for codeberg.org --- src/vcspull/_internal/remotes/gitea.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/vcspull/_internal/remotes/gitea.py b/src/vcspull/_internal/remotes/gitea.py index a76beb2d0..603fafc71 100644 --- a/src/vcspull/_internal/remotes/gitea.py +++ b/src/vcspull/_internal/remotes/gitea.py @@ -4,6 +4,7 @@ import logging import typing as t +import urllib.parse from .base import ( HTTPClient, @@ -54,13 +55,17 @@ def __init__( """ self._base_url = (base_url or CODEBERG_API_URL).rstrip("/") - # Determine token from environment based on service + # Determine token from environment based on service. + # Use proper URL parsing to extract hostname to avoid substring attacks. + parsed_url = urllib.parse.urlparse(self._base_url.lower()) + hostname = parsed_url.netloc + self._token: str | None if token: self._token = token - elif "codeberg.org" in self._base_url.lower(): + elif hostname == "codeberg.org": self._token = get_token_from_env("CODEBERG_TOKEN", "GITEA_TOKEN") - elif "forgejo" in self._base_url.lower(): + elif "forgejo" in hostname: self._token = get_token_from_env("FORGEJO_TOKEN", "GITEA_TOKEN") else: self._token = get_token_from_env("GITEA_TOKEN") From 4689286232e7aaab05ef2ef1d6662003bc5a41d5 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 10:33:39 -0600 Subject: [PATCH 009/109] fix(test[test_gitea]) Use exact URL assertion why: Test used same vulnerable substring matching pattern as the code. what: - Replace "codeberg.org" in importer._base_url with exact URL comparison --- tests/_internal/remotes/test_gitea.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/_internal/remotes/test_gitea.py b/tests/_internal/remotes/test_gitea.py index c82e5c5a6..e7d9c519b 100644 --- a/tests/_internal/remotes/test_gitea.py +++ b/tests/_internal/remotes/test_gitea.py @@ -95,7 +95,7 @@ def test_gitea_search_with_array_response( def test_gitea_importer_defaults_to_codeberg() -> None: """Test GiteaImporter defaults to Codeberg URL.""" importer = GiteaImporter() - assert "codeberg.org" in importer._base_url + assert importer._base_url == "https://codeberg.org" def test_gitea_importer_service_name() -> None: From dd00cb55dbc5e7b1296e7cbe688d77b7c3123dad Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 10:54:20 -0600 Subject: [PATCH 010/109] refactor(cli/import) Remove unused SERVICES_REQUIRING_URL constant why: Dead code - defined but never used what: - Remove SERVICES_REQUIRING_URL constant at line 54 --- src/vcspull/cli/import_repos.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index d5983a033..1fdbf2c39 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -51,8 +51,6 @@ "aws": "codecommit", } -SERVICES_REQUIRING_URL = {"gitea", "forgejo"} - def create_import_subparser(parser: argparse.ArgumentParser) -> None: """Create ``vcspull import`` argument subparser. From 376db3c5397451325a41a72e01c1ddb67c7cefdc Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 10:54:43 -0600 Subject: [PATCH 011/109] fix(cli/import[topic_list]) Filter empty strings from topic list why: Comma-separated topics like "python,,rust" would produce empty strings what: - Add if t.strip() filter to exclude empty strings after stripping --- src/vcspull/cli/import_repos.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index 1fdbf2c39..223ef91f5 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -380,7 +380,7 @@ def import_repos( # Build import options import_mode = ImportMode(mode) - topic_list = [t.strip() for t in topics.split(",")] if topics else [] + topic_list = [t.strip() for t in topics.split(",") if t.strip()] if topics else [] options = ImportOptions( mode=import_mode, From fa06089b024dd3811302aa22ddb713f9fb2b36c9 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 10:55:13 -0600 Subject: [PATCH 012/109] fix(cli/import[output]) Gate log messages behind human output mode why: Log messages corrupt JSON/NDJSON output when piping to other tools what: - Wrap "Fetching repositories..." message in human output check - Wrap "No repositories found" message in human output check - Wrap "Found X repositories" message in human output check --- src/vcspull/cli/import_repos.py | 45 ++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index 223ef91f5..343a28c71 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -406,14 +406,15 @@ def import_repos( display_config_path = str(PrivatePath(config_file_path)) # Fetch repositories - log.info( - "%s→%s Fetching repositories from %s%s%s...", - Fore.CYAN, - Style.RESET_ALL, - Fore.MAGENTA, - importer.service_name, - Style.RESET_ALL, - ) + if output_mode.value == "human": + log.info( + "%s→%s Fetching repositories from %s%s%s...", + Fore.CYAN, + Style.RESET_ALL, + Fore.MAGENTA, + importer.service_name, + Style.RESET_ALL, + ) repos: list[RemoteRepo] = [] try: @@ -468,22 +469,24 @@ def import_repos( return if not repos: - log.info( - "%s!%s No repositories found matching criteria.", - Fore.YELLOW, - Style.RESET_ALL, - ) + if output_mode.value == "human": + log.info( + "%s!%s No repositories found matching criteria.", + Fore.YELLOW, + Style.RESET_ALL, + ) formatter.finalize() return - log.info( - "\n%s✓%s Found %s%d%s repositories", - Fore.GREEN, - Style.RESET_ALL, - Fore.CYAN, - len(repos), - Style.RESET_ALL, - ) + if output_mode.value == "human": + log.info( + "\n%s✓%s Found %s%d%s repositories", + Fore.GREEN, + Style.RESET_ALL, + Fore.CYAN, + len(repos), + Style.RESET_ALL, + ) # Show preview in human mode if output_mode.value == "human": From 1b8893596d70e4c33fbddcfb680bc8b16e64c40d Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 10:55:51 -0600 Subject: [PATCH 013/109] fix(remotes/github[pagination]) Fix early termination in pagination why: Comparing len(items) < DEFAULT_PER_PAGE causes premature exit when actual per_page is smaller near the limit what: - Store per_page value before use in _fetch_search - Compare against actual per_page instead of DEFAULT_PER_PAGE - Apply same fix to _paginate_repos --- src/vcspull/_internal/remotes/github.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/vcspull/_internal/remotes/github.py b/src/vcspull/_internal/remotes/github.py index dbd6d2b06..3deb4f911 100644 --- a/src/vcspull/_internal/remotes/github.py +++ b/src/vcspull/_internal/remotes/github.py @@ -160,9 +160,10 @@ def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: count = 0 while count < options.limit: + per_page = min(DEFAULT_PER_PAGE, options.limit - count) params: dict[str, str | int] = { "q": query, - "per_page": min(DEFAULT_PER_PAGE, options.limit - count), + "per_page": per_page, "page": page, "sort": "stars", "order": "desc", @@ -190,7 +191,7 @@ def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: count += 1 # Check if there are more pages - if len(items) < DEFAULT_PER_PAGE: + if len(items) < per_page: break page += 1 @@ -218,8 +219,9 @@ def _paginate_repos( count = 0 while count < options.limit: + per_page = min(DEFAULT_PER_PAGE, options.limit - count) params: dict[str, str | int] = { - "per_page": min(DEFAULT_PER_PAGE, options.limit - count), + "per_page": per_page, "page": page, "sort": "updated", "direction": "desc", @@ -246,7 +248,7 @@ def _paginate_repos( count += 1 # Check if there are more pages - if len(data) < DEFAULT_PER_PAGE: + if len(data) < per_page: break page += 1 From 9e2f82d6118ce5a7cba5b4027b64faf24d4e2a3f Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 10:56:21 -0600 Subject: [PATCH 014/109] fix(remotes/gitlab[pagination]) Fix early termination in pagination why: Comparing len(data) < DEFAULT_PER_PAGE causes premature exit when actual per_page is smaller near the limit what: - Store per_page value before use in _fetch_search - Compare against actual per_page instead of DEFAULT_PER_PAGE - Apply same fix to _paginate_repos --- src/vcspull/_internal/remotes/gitlab.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/vcspull/_internal/remotes/gitlab.py b/src/vcspull/_internal/remotes/gitlab.py index c39477c68..ec32acbac 100644 --- a/src/vcspull/_internal/remotes/gitlab.py +++ b/src/vcspull/_internal/remotes/gitlab.py @@ -168,10 +168,11 @@ def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: count = 0 while count < options.limit: + per_page = min(DEFAULT_PER_PAGE, options.limit - count) params: dict[str, str | int] = { "scope": "projects", "search": options.target, - "per_page": min(DEFAULT_PER_PAGE, options.limit - count), + "per_page": per_page, "page": page, } @@ -194,7 +195,7 @@ def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: count += 1 # Check if there are more pages - if len(data) < DEFAULT_PER_PAGE: + if len(data) < per_page: break page += 1 @@ -226,8 +227,9 @@ def _paginate_repos( count = 0 while count < options.limit: + per_page = min(DEFAULT_PER_PAGE, options.limit - count) params: dict[str, str | int] = { - "per_page": min(DEFAULT_PER_PAGE, options.limit - count), + "per_page": per_page, "page": page, "order_by": "last_activity_at", "sort": "desc", @@ -260,7 +262,7 @@ def _paginate_repos( count += 1 # Check if there are more pages - if len(data) < DEFAULT_PER_PAGE: + if len(data) < per_page: break page += 1 From 4549d81d9406a79e9b1a2b0ff4fa149bac30f591 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 10:56:54 -0600 Subject: [PATCH 015/109] fix(remotes/gitea[pagination]) Fix early termination in pagination why: Comparing len(items) < DEFAULT_PER_PAGE causes premature exit when actual page_limit is smaller near the limit what: - Store page_limit value before use in _fetch_search - Compare against actual page_limit instead of DEFAULT_PER_PAGE - Apply same fix to _paginate_repos --- src/vcspull/_internal/remotes/gitea.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/vcspull/_internal/remotes/gitea.py b/src/vcspull/_internal/remotes/gitea.py index 603fafc71..dea80db13 100644 --- a/src/vcspull/_internal/remotes/gitea.py +++ b/src/vcspull/_internal/remotes/gitea.py @@ -168,9 +168,10 @@ def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: count = 0 while count < options.limit: + page_limit = min(DEFAULT_PER_PAGE, options.limit - count) params: dict[str, str | int] = { "q": options.target, - "limit": min(DEFAULT_PER_PAGE, options.limit - count), + "limit": page_limit, "page": page, "sort": "stars", "order": "desc", @@ -204,7 +205,7 @@ def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: count += 1 # Check if there are more pages - if len(items) < DEFAULT_PER_PAGE: + if len(items) < page_limit: break page += 1 @@ -232,8 +233,9 @@ def _paginate_repos( count = 0 while count < options.limit: + page_limit = min(DEFAULT_PER_PAGE, options.limit - count) params: dict[str, str | int] = { - "limit": min(DEFAULT_PER_PAGE, options.limit - count), + "limit": page_limit, "page": page, } @@ -256,7 +258,7 @@ def _paginate_repos( count += 1 # Check if there are more pages - if len(data) < DEFAULT_PER_PAGE: + if len(data) < page_limit: break page += 1 From 9af17ec76f71da494e863e52ab0fbaf4559ad835 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 12:24:45 -0600 Subject: [PATCH 016/109] test(remotes[pagination]) Add xfail regression test for pagination duplicates why: The pagination duplicate bug occurs when client-side filtering (excluding forks/archived repos) causes the per_page/limit parameter to vary between API pages, leading to offset misalignment and duplicate repositories. what: - Add test_pagination_duplicates.py with xfail-marked tests - Tests verify that per_page/limit values are consistent across all pagination requests - Cover both GitHub and Gitea importers --- .../remotes/test_pagination_duplicates.py | 259 ++++++++++++++++++ 1 file changed, 259 insertions(+) create mode 100644 tests/_internal/remotes/test_pagination_duplicates.py diff --git a/tests/_internal/remotes/test_pagination_duplicates.py b/tests/_internal/remotes/test_pagination_duplicates.py new file mode 100644 index 000000000..4c8a46384 --- /dev/null +++ b/tests/_internal/remotes/test_pagination_duplicates.py @@ -0,0 +1,259 @@ +"""Regression tests for pagination duplicate bug. + +The pagination duplicate bug occurs when client-side filtering (excluding forks/archived +repos) causes the per_page/limit parameter to vary between API pages. This causes offset +misalignment because: + +1. Page 1: per_page=10, returns items 0-9 +2. Client-side filtering removes some items, count becomes less than per_page +3. Page 2: per_page=5 (recalculated), API interprets as items 5-9 instead of 10-14 +4. Result: Items 5-9 appear twice (duplicates) + +The fix is to always use a consistent per_page value across all pagination requests. +""" + +from __future__ import annotations + +import json +import typing as t +import urllib.parse +import urllib.request + +import pytest + +from vcspull._internal.remotes.base import ImportMode, ImportOptions +from vcspull._internal.remotes.gitea import ( + DEFAULT_PER_PAGE as GITEA_DEFAULT_PER_PAGE, + GiteaImporter, +) +from vcspull._internal.remotes.github import ( + DEFAULT_PER_PAGE as GITHUB_DEFAULT_PER_PAGE, + GitHubImporter, +) + + +def _make_github_repo( + name: str, + *, + fork: bool = False, + archived: bool = False, +) -> dict[str, t.Any]: + """Create a GitHub API repo response object.""" + return { + "name": name, + "clone_url": f"https://github.com/testuser/{name}.git", + "html_url": f"https://github.com/testuser/{name}", + "description": f"Repo {name}", + "language": "Python", + "topics": [], + "stargazers_count": 10, + "fork": fork, + "archived": archived, + "default_branch": "main", + "owner": {"login": "testuser"}, + } + + +def _make_gitea_repo( + name: str, + *, + fork: bool = False, + archived: bool = False, +) -> dict[str, t.Any]: + """Create a Gitea API repo response object.""" + return { + "name": name, + "clone_url": f"https://codeberg.org/testuser/{name}.git", + "html_url": f"https://codeberg.org/testuser/{name}", + "description": f"Repo {name}", + "language": "Python", + "topics": [], + "stars_count": 10, + "fork": fork, + "archived": archived, + "default_branch": "main", + "owner": {"login": "testuser"}, + } + + +class MockHTTPResponse: + """Mock HTTP response for testing.""" + + def __init__( + self, + body: bytes, + headers: dict[str, str] | None = None, + status: int = 200, + ) -> None: + """Initialize mock response.""" + self._body = body + self._headers = headers or {} + self.status = status + self.code = status + + def read(self) -> bytes: + """Return response body.""" + return self._body + + def getheaders(self) -> list[tuple[str, str]]: + """Return response headers as list of tuples.""" + return list(self._headers.items()) + + def __enter__(self) -> MockHTTPResponse: + """Context manager entry.""" + return self + + def __exit__(self, *args: t.Any) -> None: + """Context manager exit.""" + pass + + +@pytest.mark.xfail( + reason="Pagination uses inconsistent per_page when client-side filtering is active" +) +def test_github_pagination_consistent_per_page( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test that GitHub pagination uses consistent per_page across all requests. + + When client-side filtering removes items, the per_page parameter should NOT + be recalculated based on remaining count - it should stay constant to maintain + proper pagination offsets. + """ + captured_requests: list[urllib.request.Request] = [] + + # Create repos: mix of regular and forked + page1_repos = [ + _make_github_repo("repo1"), + _make_github_repo("repo2"), + _make_github_repo("fork1", fork=True), + _make_github_repo("fork2", fork=True), + _make_github_repo("fork3", fork=True), + ] + + page2_repos = [ + _make_github_repo("repo3"), + _make_github_repo("repo4"), + _make_github_repo("repo5"), + ] + + responses = [ + ( + json.dumps(page1_repos).encode(), + {"x-ratelimit-remaining": "100", "x-ratelimit-limit": "60"}, + 200, + ), + ( + json.dumps(page2_repos).encode(), + {"x-ratelimit-remaining": "99", "x-ratelimit-limit": "60"}, + 200, + ), + ] + call_count = 0 + + def urlopen_capture( + request: urllib.request.Request, + timeout: int | None = None, + ) -> MockHTTPResponse: + nonlocal call_count + captured_requests.append(request) + body, headers, status = responses[call_count % len(responses)] + call_count += 1 + return MockHTTPResponse(body, headers, status) + + monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) + + importer = GitHubImporter() + options = ImportOptions( + mode=ImportMode.USER, + target="testuser", + limit=5, + include_forks=False, # Filter out forks client-side + ) + list(importer.fetch_repos(options)) + + # Extract per_page values from all requests + per_page_values = [] + for req in captured_requests: + parsed = urllib.parse.urlparse(req.full_url) + params = urllib.parse.parse_qs(parsed.query) + if "per_page" in params: + per_page_values.append(int(params["per_page"][0])) + + # All per_page values should be identical (consistent pagination) + assert len(per_page_values) >= 2, "Expected at least 2 API requests" + assert all(v == GITHUB_DEFAULT_PER_PAGE for v in per_page_values), ( + f"Expected all per_page values to be {GITHUB_DEFAULT_PER_PAGE}, " + f"got: {per_page_values}" + ) + + +@pytest.mark.xfail( + reason="Pagination uses inconsistent limit when client-side filtering is active" +) +def test_gitea_pagination_consistent_limit( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test that Gitea pagination uses consistent limit across all requests. + + When client-side filtering removes items, the limit parameter should NOT + be recalculated based on remaining count - it should stay constant to maintain + proper pagination offsets. + """ + captured_requests: list[urllib.request.Request] = [] + + # Create repos: mix of regular and forked + page1_repos = [ + _make_gitea_repo("repo1"), + _make_gitea_repo("repo2"), + _make_gitea_repo("fork1", fork=True), + _make_gitea_repo("fork2", fork=True), + _make_gitea_repo("fork3", fork=True), + ] + + page2_repos = [ + _make_gitea_repo("repo3"), + _make_gitea_repo("repo4"), + _make_gitea_repo("repo5"), + ] + + responses: list[tuple[bytes, dict[str, str], int]] = [ + (json.dumps(page1_repos).encode(), {}, 200), + (json.dumps(page2_repos).encode(), {}, 200), + ] + call_count = 0 + + def urlopen_capture( + request: urllib.request.Request, + timeout: int | None = None, + ) -> MockHTTPResponse: + nonlocal call_count + captured_requests.append(request) + body, headers, status = responses[call_count % len(responses)] + call_count += 1 + return MockHTTPResponse(body, headers, status) + + monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) + + importer = GiteaImporter(base_url="https://codeberg.org") + options = ImportOptions( + mode=ImportMode.USER, + target="testuser", + limit=5, + include_forks=False, # Filter out forks client-side + ) + list(importer.fetch_repos(options)) + + # Extract limit values from all requests + limit_values = [] + for req in captured_requests: + parsed = urllib.parse.urlparse(req.full_url) + params = urllib.parse.parse_qs(parsed.query) + if "limit" in params: + limit_values.append(int(params["limit"][0])) + + # All limit values should be identical (consistent pagination) + assert len(limit_values) >= 2, "Expected at least 2 API requests" + assert all(v == GITEA_DEFAULT_PER_PAGE for v in limit_values), ( + f"Expected all limit values to be {GITEA_DEFAULT_PER_PAGE}, got: {limit_values}" + ) From aa28913cd35bdfb49a735aaeb0d94205497f5e16 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 12:27:20 -0600 Subject: [PATCH 017/109] fix(remotes/github[pagination]) Use consistent per_page for pagination why: Changing per_page between API pages causes offset misalignment, resulting in duplicate repositories when client-side filtering removes some items. what: - Always use DEFAULT_PER_PAGE instead of recalculating based on remaining count - Fix both _paginate_repos and _fetch_search methods - Update early termination checks to use DEFAULT_PER_PAGE --- src/vcspull/_internal/remotes/github.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/vcspull/_internal/remotes/github.py b/src/vcspull/_internal/remotes/github.py index 3deb4f911..d2303e5bf 100644 --- a/src/vcspull/_internal/remotes/github.py +++ b/src/vcspull/_internal/remotes/github.py @@ -160,10 +160,11 @@ def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: count = 0 while count < options.limit: - per_page = min(DEFAULT_PER_PAGE, options.limit - count) + # Always use DEFAULT_PER_PAGE to maintain consistent pagination offset. + # Changing per_page between pages causes offset misalignment and duplicates. params: dict[str, str | int] = { "q": query, - "per_page": per_page, + "per_page": DEFAULT_PER_PAGE, "page": page, "sort": "stars", "order": "desc", @@ -191,7 +192,7 @@ def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: count += 1 # Check if there are more pages - if len(items) < per_page: + if len(items) < DEFAULT_PER_PAGE: break page += 1 @@ -219,9 +220,10 @@ def _paginate_repos( count = 0 while count < options.limit: - per_page = min(DEFAULT_PER_PAGE, options.limit - count) + # Always use DEFAULT_PER_PAGE to maintain consistent pagination offset. + # Changing per_page between pages causes offset misalignment and duplicates. params: dict[str, str | int] = { - "per_page": per_page, + "per_page": DEFAULT_PER_PAGE, "page": page, "sort": "updated", "direction": "desc", @@ -248,7 +250,7 @@ def _paginate_repos( count += 1 # Check if there are more pages - if len(data) < per_page: + if len(data) < DEFAULT_PER_PAGE: break page += 1 From d2d22092d295a658c428d316173fd1c1af754161 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 12:28:09 -0600 Subject: [PATCH 018/109] fix(remotes/gitea[pagination]) Use consistent limit for pagination why: Changing limit between API pages causes offset misalignment, resulting in duplicate repositories when client-side filtering removes some items. what: - Always use DEFAULT_PER_PAGE instead of recalculating based on remaining count - Fix both _paginate_repos and _fetch_search methods - Update early termination checks to use DEFAULT_PER_PAGE --- src/vcspull/_internal/remotes/gitea.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/vcspull/_internal/remotes/gitea.py b/src/vcspull/_internal/remotes/gitea.py index dea80db13..7837858ac 100644 --- a/src/vcspull/_internal/remotes/gitea.py +++ b/src/vcspull/_internal/remotes/gitea.py @@ -168,10 +168,11 @@ def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: count = 0 while count < options.limit: - page_limit = min(DEFAULT_PER_PAGE, options.limit - count) + # Always use DEFAULT_PER_PAGE to maintain consistent pagination offset. + # Changing limit between pages causes offset misalignment and duplicates. params: dict[str, str | int] = { "q": options.target, - "limit": page_limit, + "limit": DEFAULT_PER_PAGE, "page": page, "sort": "stars", "order": "desc", @@ -205,7 +206,7 @@ def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: count += 1 # Check if there are more pages - if len(items) < page_limit: + if len(items) < DEFAULT_PER_PAGE: break page += 1 @@ -233,9 +234,10 @@ def _paginate_repos( count = 0 while count < options.limit: - page_limit = min(DEFAULT_PER_PAGE, options.limit - count) + # Always use DEFAULT_PER_PAGE to maintain consistent pagination offset. + # Changing limit between pages causes offset misalignment and duplicates. params: dict[str, str | int] = { - "limit": page_limit, + "limit": DEFAULT_PER_PAGE, "page": page, } @@ -258,7 +260,7 @@ def _paginate_repos( count += 1 # Check if there are more pages - if len(data) < page_limit: + if len(data) < DEFAULT_PER_PAGE: break page += 1 From 78d7443dd51135ad86309608930e0760f96a5a54 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 12:28:37 -0600 Subject: [PATCH 019/109] test(remotes[pagination]) Remove xfail markers from pagination tests why: The pagination duplicate bug has been fixed in both GitHub and Gitea importers. what: - Remove @pytest.mark.xfail decorators from both tests - Tests now pass and verify consistent per_page/limit across all API requests --- .../remotes/test_pagination_duplicates.py | 50 +++++++++---------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/tests/_internal/remotes/test_pagination_duplicates.py b/tests/_internal/remotes/test_pagination_duplicates.py index 4c8a46384..09044853c 100644 --- a/tests/_internal/remotes/test_pagination_duplicates.py +++ b/tests/_internal/remotes/test_pagination_duplicates.py @@ -108,9 +108,6 @@ def __exit__(self, *args: t.Any) -> None: pass -@pytest.mark.xfail( - reason="Pagination uses inconsistent per_page when client-side filtering is active" -) def test_github_pagination_consistent_per_page( monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -122,19 +119,19 @@ def test_github_pagination_consistent_per_page( """ captured_requests: list[urllib.request.Request] = [] - # Create repos: mix of regular and forked + # Create page 1 with exactly DEFAULT_PER_PAGE items to force pagination. + # Half regular repos, half forks - forks will be filtered out client-side. page1_repos = [ - _make_github_repo("repo1"), - _make_github_repo("repo2"), - _make_github_repo("fork1", fork=True), - _make_github_repo("fork2", fork=True), - _make_github_repo("fork3", fork=True), + _make_github_repo(f"repo{i}") for i in range(GITHUB_DEFAULT_PER_PAGE // 2) ] + page1_repos.extend( + _make_github_repo(f"fork{i}", fork=True) + for i in range(GITHUB_DEFAULT_PER_PAGE // 2) + ) + # Page 2 has more repos page2_repos = [ - _make_github_repo("repo3"), - _make_github_repo("repo4"), - _make_github_repo("repo5"), + _make_github_repo(f"repo{GITHUB_DEFAULT_PER_PAGE // 2 + i}") for i in range(10) ] responses = [ @@ -164,10 +161,12 @@ def urlopen_capture( monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) importer = GitHubImporter() + # Request more repos than page 1 provides after filtering (50 regular repos) + # This forces pagination to continue to page 2 options = ImportOptions( mode=ImportMode.USER, target="testuser", - limit=5, + limit=60, # More than 50 regular repos in page 1 include_forks=False, # Filter out forks client-side ) list(importer.fetch_repos(options)) @@ -188,9 +187,6 @@ def urlopen_capture( ) -@pytest.mark.xfail( - reason="Pagination uses inconsistent limit when client-side filtering is active" -) def test_gitea_pagination_consistent_limit( monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -202,19 +198,19 @@ def test_gitea_pagination_consistent_limit( """ captured_requests: list[urllib.request.Request] = [] - # Create repos: mix of regular and forked + # Create page 1 with exactly DEFAULT_PER_PAGE items to force pagination. + # Half regular repos, half forks - forks will be filtered out client-side. page1_repos = [ - _make_gitea_repo("repo1"), - _make_gitea_repo("repo2"), - _make_gitea_repo("fork1", fork=True), - _make_gitea_repo("fork2", fork=True), - _make_gitea_repo("fork3", fork=True), + _make_gitea_repo(f"repo{i}") for i in range(GITEA_DEFAULT_PER_PAGE // 2) ] + page1_repos.extend( + _make_gitea_repo(f"fork{i}", fork=True) + for i in range(GITEA_DEFAULT_PER_PAGE // 2) + ) + # Page 2 has more repos page2_repos = [ - _make_gitea_repo("repo3"), - _make_gitea_repo("repo4"), - _make_gitea_repo("repo5"), + _make_gitea_repo(f"repo{GITEA_DEFAULT_PER_PAGE // 2 + i}") for i in range(10) ] responses: list[tuple[bytes, dict[str, str], int]] = [ @@ -236,10 +232,12 @@ def urlopen_capture( monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) importer = GiteaImporter(base_url="https://codeberg.org") + # Request more repos than page 1 provides after filtering (25 regular repos) + # This forces pagination to continue to page 2 options = ImportOptions( mode=ImportMode.USER, target="testuser", - limit=5, + limit=35, # More than 25 regular repos in page 1 include_forks=False, # Filter out forks client-side ) list(importer.fetch_repos(options)) From 613da159277a86e39b4b4fdf51bc8fb88ddebadf Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 12:39:37 -0600 Subject: [PATCH 020/109] fix(remotes/gitlab[pagination]) Use consistent per_page for pagination why: Changing per_page between API pages causes offset misalignment, resulting in duplicate repositories when client-side filtering removes some items. what: - Always use DEFAULT_PER_PAGE instead of recalculating based on remaining count - Fix both _paginate_repos and _fetch_search methods - Update early termination checks to use DEFAULT_PER_PAGE - Add GitLab pagination test to test_pagination_duplicates.py --- src/vcspull/_internal/remotes/gitlab.py | 14 +-- .../remotes/test_pagination_duplicates.py | 97 +++++++++++++++++++ 2 files changed, 105 insertions(+), 6 deletions(-) diff --git a/src/vcspull/_internal/remotes/gitlab.py b/src/vcspull/_internal/remotes/gitlab.py index ec32acbac..9d1f29b7d 100644 --- a/src/vcspull/_internal/remotes/gitlab.py +++ b/src/vcspull/_internal/remotes/gitlab.py @@ -168,11 +168,12 @@ def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: count = 0 while count < options.limit: - per_page = min(DEFAULT_PER_PAGE, options.limit - count) + # Always use DEFAULT_PER_PAGE to maintain consistent pagination offset. + # Changing per_page between pages causes offset misalignment and duplicates. params: dict[str, str | int] = { "scope": "projects", "search": options.target, - "per_page": per_page, + "per_page": DEFAULT_PER_PAGE, "page": page, } @@ -195,7 +196,7 @@ def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: count += 1 # Check if there are more pages - if len(data) < per_page: + if len(data) < DEFAULT_PER_PAGE: break page += 1 @@ -227,9 +228,10 @@ def _paginate_repos( count = 0 while count < options.limit: - per_page = min(DEFAULT_PER_PAGE, options.limit - count) + # Always use DEFAULT_PER_PAGE to maintain consistent pagination offset. + # Changing per_page between pages causes offset misalignment and duplicates. params: dict[str, str | int] = { - "per_page": per_page, + "per_page": DEFAULT_PER_PAGE, "page": page, "order_by": "last_activity_at", "sort": "desc", @@ -262,7 +264,7 @@ def _paginate_repos( count += 1 # Check if there are more pages - if len(data) < per_page: + if len(data) < DEFAULT_PER_PAGE: break page += 1 diff --git a/tests/_internal/remotes/test_pagination_duplicates.py b/tests/_internal/remotes/test_pagination_duplicates.py index 09044853c..fd0711eed 100644 --- a/tests/_internal/remotes/test_pagination_duplicates.py +++ b/tests/_internal/remotes/test_pagination_duplicates.py @@ -30,6 +30,10 @@ DEFAULT_PER_PAGE as GITHUB_DEFAULT_PER_PAGE, GitHubImporter, ) +from vcspull._internal.remotes.gitlab import ( + DEFAULT_PER_PAGE as GITLAB_DEFAULT_PER_PAGE, + GitLabImporter, +) def _make_github_repo( @@ -76,6 +80,28 @@ def _make_gitea_repo( } +def _make_gitlab_repo( + name: str, + *, + fork: bool = False, + archived: bool = False, +) -> dict[str, t.Any]: + """Create a GitLab API project response object.""" + return { + "path": name, + "name": name, + "http_url_to_repo": f"https://gitlab.com/testuser/{name}.git", + "web_url": f"https://gitlab.com/testuser/{name}", + "description": f"Project {name}", + "topics": [], + "star_count": 10, + "forked_from_project": {"id": 123} if fork else None, + "archived": archived, + "default_branch": "main", + "namespace": {"path": "testuser"}, + } + + class MockHTTPResponse: """Mock HTTP response for testing.""" @@ -255,3 +281,74 @@ def urlopen_capture( assert all(v == GITEA_DEFAULT_PER_PAGE for v in limit_values), ( f"Expected all limit values to be {GITEA_DEFAULT_PER_PAGE}, got: {limit_values}" ) + + +def test_gitlab_pagination_consistent_per_page( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test that GitLab pagination uses consistent per_page across all requests. + + When client-side filtering removes items, the per_page parameter should NOT + be recalculated based on remaining count - it should stay constant to maintain + proper pagination offsets. + """ + captured_requests: list[urllib.request.Request] = [] + + # Create page 1 with exactly DEFAULT_PER_PAGE items to force pagination. + # Half regular repos, half forks - forks will be filtered out client-side. + page1_repos = [ + _make_gitlab_repo(f"repo{i}") for i in range(GITLAB_DEFAULT_PER_PAGE // 2) + ] + page1_repos.extend( + _make_gitlab_repo(f"fork{i}", fork=True) + for i in range(GITLAB_DEFAULT_PER_PAGE // 2) + ) + + # Page 2 has more repos + page2_repos = [ + _make_gitlab_repo(f"repo{GITLAB_DEFAULT_PER_PAGE // 2 + i}") for i in range(10) + ] + + responses: list[tuple[bytes, dict[str, str], int]] = [ + (json.dumps(page1_repos).encode(), {}, 200), + (json.dumps(page2_repos).encode(), {}, 200), + ] + call_count = 0 + + def urlopen_capture( + request: urllib.request.Request, + timeout: int | None = None, + ) -> MockHTTPResponse: + nonlocal call_count + captured_requests.append(request) + body, headers, status = responses[call_count % len(responses)] + call_count += 1 + return MockHTTPResponse(body, headers, status) + + monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) + + importer = GitLabImporter() + # Request more repos than page 1 provides after filtering (50 regular repos) + # This forces pagination to continue to page 2 + options = ImportOptions( + mode=ImportMode.ORG, + target="testgroup", + limit=60, # More than 50 regular repos in page 1 + include_forks=False, # Filter out forks client-side + ) + list(importer.fetch_repos(options)) + + # Extract per_page values from all requests + per_page_values = [] + for req in captured_requests: + parsed = urllib.parse.urlparse(req.full_url) + params = urllib.parse.parse_qs(parsed.query) + if "per_page" in params: + per_page_values.append(int(params["per_page"][0])) + + # All per_page values should be identical (consistent pagination) + assert len(per_page_values) >= 2, "Expected at least 2 API requests" + assert all(v == GITLAB_DEFAULT_PER_PAGE for v in per_page_values), ( + f"Expected all per_page values to be {GITLAB_DEFAULT_PER_PAGE}, " + f"got: {per_page_values}" + ) From 0633eda54714964a10618f3c44d6801905844c36 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 13:01:47 -0600 Subject: [PATCH 021/109] docs(cli/import) Document GitLab subgroup support in help text why: Users should know they can import from GitLab subgroups using slash notation (e.g., gitlab-org/ci-cd). what: - Add note about subgroup support to import command description - Add example showing subgroup import: gitlab-org/ci-cd - Update TARGET argument help to mention subgroup slash notation --- src/vcspull/cli/__init__.py | 5 ++++- src/vcspull/cli/import_repos.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/vcspull/cli/__init__.py b/src/vcspull/cli/__init__.py index ebbb65226..4019c1321 100644 --- a/src/vcspull/cli/__init__.py +++ b/src/vcspull/cli/__init__.py @@ -111,7 +111,7 @@ def build_description( [ "vcspull import github torvalds -w ~/repos/linux --mode user", "vcspull import github django -w ~/study/python --mode org", - "vcspull import gitlab myuser -w ~/work --dry-run", + "vcspull import gitlab gitlab-org/ci-cd -w ~/work --mode org", "vcspull import codeberg user -w ~/oss --json", ], ), @@ -250,6 +250,8 @@ def build_description( Fetches repository lists from GitHub, GitLab, Codeberg/Gitea/Forgejo, or AWS CodeCommit and adds them to the vcspull configuration. + + For GitLab, you can specify subgroups using slash notation (e.g., parent/child). """, ( ( @@ -257,6 +259,7 @@ def build_description( [ "vcspull import github torvalds -w ~/repos/linux --mode user", "vcspull import github django -w ~/study/python --mode org", + "vcspull import gitlab gitlab-org/ci-cd -w ~/work --mode org", "vcspull import gitlab myuser -w ~/work --url https://gitlab.company.com", "vcspull import codeberg user -w ~/oss --dry-run", "vcspull import codecommit -w ~/work/aws --region us-east-1", diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index 343a28c71..26d7a07ca 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -70,7 +70,10 @@ def create_import_subparser(parser: argparse.ArgumentParser) -> None: metavar="TARGET", nargs="?", default="", - help="User, org name, or search query (optional for codecommit)", + help=( + "User, org name, or search query (optional for codecommit). " + "For GitLab, supports subgroups with slash notation (e.g., parent/child)." + ), ) parser.add_argument( "-w", From c78bcd974b3c11d5504280423aaf10aedbd3e67d Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 13:04:37 -0600 Subject: [PATCH 022/109] test(remotes/gitlab) Add tests for GitLab subgroup support why: Verify that GitLab subgroups with slash notation are correctly URL-encoded as %2F in API requests. what: - Add test_gitlab_subgroup_url_encoding for parent/child paths - Add test_gitlab_deeply_nested_subgroup for a/b/c/d paths - Both tests verify the URL contains properly encoded %2F sequences --- tests/_internal/remotes/test_gitlab.py | 118 +++++++++++++++++++++++++ 1 file changed, 118 insertions(+) diff --git a/tests/_internal/remotes/test_gitlab.py b/tests/_internal/remotes/test_gitlab.py index 6c89de4d3..690ed011f 100644 --- a/tests/_internal/remotes/test_gitlab.py +++ b/tests/_internal/remotes/test_gitlab.py @@ -4,6 +4,7 @@ import json import typing as t +import urllib.request import pytest @@ -156,3 +157,120 @@ def test_gitlab_uses_path_not_name( repos = list(importer.fetch_repos(options)) assert len(repos) == 1 assert repos[0].name == "my-project" # Uses 'path', not 'name' + + +class MockHTTPResponse: + """Mock HTTP response for subgroup test.""" + + def __init__(self, body: bytes, headers: dict[str, str] | None = None) -> None: + self._body = body + self._headers = headers or {} + self.status = 200 + self.code = 200 + + def read(self) -> bytes: + return self._body + + def getheaders(self) -> list[tuple[str, str]]: + return list(self._headers.items()) + + def __enter__(self) -> MockHTTPResponse: + return self + + def __exit__(self, *args: t.Any) -> None: + pass + + +def test_gitlab_subgroup_url_encoding( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test that GitLab subgroups are URL-encoded correctly. + + Subgroups use slash notation (e.g., parent/child) which must be + URL-encoded as %2F in API requests. + """ + captured_urls: list[str] = [] + + response_json = [ + { + "path": "subgroup-project", + "name": "Subgroup Project", + "http_url_to_repo": "https://gitlab.com/parent/child/subgroup-project.git", + "web_url": "https://gitlab.com/parent/child/subgroup-project", + "description": "Project in subgroup", + "topics": [], + "star_count": 10, + "archived": False, + "default_branch": "main", + "namespace": {"path": "parent/child"}, + } + ] + + def urlopen_capture( + request: urllib.request.Request, + timeout: int | None = None, + ) -> MockHTTPResponse: + captured_urls.append(request.full_url) + return MockHTTPResponse(json.dumps(response_json).encode()) + + monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) + + importer = GitLabImporter() + options = ImportOptions(mode=ImportMode.ORG, target="parent/child") + repos = list(importer.fetch_repos(options)) + + # Verify the URL was encoded correctly + assert len(captured_urls) == 1 + assert "parent%2Fchild" in captured_urls[0], ( + f"Expected URL-encoded subgroup path 'parent%2Fchild', got: {captured_urls[0]}" + ) + assert "/groups/parent%2Fchild/projects" in captured_urls[0] + + # Verify repos were returned + assert len(repos) == 1 + assert repos[0].name == "subgroup-project" + + +def test_gitlab_deeply_nested_subgroup( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test that deeply nested subgroups (multiple slashes) work correctly.""" + captured_urls: list[str] = [] + + response_json = [ + { + "path": "deep-project", + "name": "Deep Project", + "http_url_to_repo": "https://gitlab.com/a/b/c/d/deep-project.git", + "web_url": "https://gitlab.com/a/b/c/d/deep-project", + "description": "Deeply nested project", + "topics": [], + "star_count": 5, + "archived": False, + "default_branch": "main", + "namespace": {"path": "a/b/c/d"}, + } + ] + + def urlopen_capture( + request: urllib.request.Request, + timeout: int | None = None, + ) -> MockHTTPResponse: + captured_urls.append(request.full_url) + return MockHTTPResponse(json.dumps(response_json).encode()) + + monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) + + importer = GitLabImporter() + # Test with 4 levels of nesting: a/b/c/d + options = ImportOptions(mode=ImportMode.ORG, target="a/b/c/d") + repos = list(importer.fetch_repos(options)) + + # Verify URL encoding - each slash should become %2F + assert len(captured_urls) == 1 + assert "a%2Fb%2Fc%2Fd" in captured_urls[0], ( + f"Expected URL-encoded path 'a%2Fb%2Fc%2Fd', got: {captured_urls[0]}" + ) + + assert len(repos) == 1 + assert repos[0].name == "deep-project" From 898e98718ca59f6e71ee432df69c2c54558e0538 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 13:09:06 -0600 Subject: [PATCH 023/109] fix(test/remotes) Clear env tokens in authentication tests why: Tests for is_authenticated=False were failing because tokens from environment variables (GITHUB_TOKEN, GITLAB_TOKEN, etc.) were being detected. what: - Add monkeypatch to clear token env vars in test_*_is_authenticated_without_token - Fix test_gitlab_search_requires_auth to clear GITLAB_TOKEN/GL_TOKEN - Add pytest import to test_gitea.py --- tests/_internal/remotes/test_gitea.py | 10 +++++++++- tests/_internal/remotes/test_github.py | 7 ++++++- tests/_internal/remotes/test_gitlab.py | 14 ++++++++++++-- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/tests/_internal/remotes/test_gitea.py b/tests/_internal/remotes/test_gitea.py index e7d9c519b..c20699e33 100644 --- a/tests/_internal/remotes/test_gitea.py +++ b/tests/_internal/remotes/test_gitea.py @@ -5,6 +5,8 @@ import json import typing as t +import pytest + from vcspull._internal.remotes.base import ImportMode, ImportOptions from vcspull._internal.remotes.gitea import GiteaImporter @@ -104,8 +106,14 @@ def test_gitea_importer_service_name() -> None: assert importer.service_name == "Gitea" -def test_gitea_importer_is_authenticated_without_token() -> None: +def test_gitea_importer_is_authenticated_without_token( + monkeypatch: pytest.MonkeyPatch, +) -> None: """Test is_authenticated returns False without token.""" + # Clear environment variables that could provide a token + monkeypatch.delenv("CODEBERG_TOKEN", raising=False) + monkeypatch.delenv("GITEA_TOKEN", raising=False) + monkeypatch.delenv("FORGEJO_TOKEN", raising=False) importer = GiteaImporter(token=None) assert importer.is_authenticated is False diff --git a/tests/_internal/remotes/test_github.py b/tests/_internal/remotes/test_github.py index 06ea94de9..bcc39ac17 100644 --- a/tests/_internal/remotes/test_github.py +++ b/tests/_internal/remotes/test_github.py @@ -310,8 +310,13 @@ def test_github_fetch_search( assert repos[0].stars == 1000 -def test_github_importer_is_authenticated_without_token() -> None: +def test_github_importer_is_authenticated_without_token( + monkeypatch: pytest.MonkeyPatch, +) -> None: """Test is_authenticated returns False without token.""" + # Clear environment variables that could provide a token + monkeypatch.delenv("GITHUB_TOKEN", raising=False) + monkeypatch.delenv("GH_TOKEN", raising=False) importer = GitHubImporter(token=None) assert importer.is_authenticated is False diff --git a/tests/_internal/remotes/test_gitlab.py b/tests/_internal/remotes/test_gitlab.py index 690ed011f..d51708b50 100644 --- a/tests/_internal/remotes/test_gitlab.py +++ b/tests/_internal/remotes/test_gitlab.py @@ -52,10 +52,15 @@ def test_gitlab_fetch_group( assert repos[0].name == "group-project" -def test_gitlab_search_requires_auth() -> None: +def test_gitlab_search_requires_auth( + monkeypatch: pytest.MonkeyPatch, +) -> None: """Test GitLab search raises error without authentication.""" from vcspull._internal.remotes.base import AuthenticationError + # Clear environment variables that could provide a token + monkeypatch.delenv("GITLAB_TOKEN", raising=False) + monkeypatch.delenv("GL_TOKEN", raising=False) importer = GitLabImporter(token=None) options = ImportOptions(mode=ImportMode.SEARCH, target="test") with pytest.raises(AuthenticationError, match="requires authentication"): @@ -88,8 +93,13 @@ def test_gitlab_search_with_auth( assert repos[0].name == "search-result" -def test_gitlab_importer_is_authenticated_without_token() -> None: +def test_gitlab_importer_is_authenticated_without_token( + monkeypatch: pytest.MonkeyPatch, +) -> None: """Test is_authenticated returns False without token.""" + # Clear environment variables that could provide a token + monkeypatch.delenv("GITLAB_TOKEN", raising=False) + monkeypatch.delenv("GL_TOKEN", raising=False) importer = GitLabImporter(token=None) assert importer.is_authenticated is False From e592d30eb7a51835dc188172c6e14cd1b78a89b9 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 13:34:07 -0600 Subject: [PATCH 024/109] feat(cli/import) Show help when import called without required args why: Running `vcspull import` without arguments showed an error instead of help, unlike `vcspull import --help`. what: - Make service positional arg optional with nargs="?" and default=None - Make workspace arg optional with default=None instead of required=True - Check for missing args in handler and print help instead of proceeding - Add tests for help display behavior --- src/vcspull/cli/__init__.py | 4 ++++ src/vcspull/cli/import_repos.py | 4 +++- tests/cli/test_import_repos.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/vcspull/cli/__init__.py b/src/vcspull/cli/__init__.py index 4019c1321..d497c7145 100644 --- a/src/vcspull/cli/__init__.py +++ b/src/vcspull/cli/__init__.py @@ -499,6 +499,10 @@ def cli(_args: list[str] | None = None) -> None: merge_roots=args.merge_roots, ) elif args.subparser_name == "import": + # Show help if required arguments are missing + if args.service is None or args.workspace is None: + _import_parser.print_help() + return import_repos( service=args.service, target=args.target, diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index 26d7a07ca..59822664d 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -63,6 +63,8 @@ def create_import_subparser(parser: argparse.ArgumentParser) -> None: parser.add_argument( "service", metavar="SERVICE", + nargs="?", + default=None, help="Remote service: github, gitlab, codeberg, gitea, forgejo, codecommit", ) parser.add_argument( @@ -80,7 +82,7 @@ def create_import_subparser(parser: argparse.ArgumentParser) -> None: "--workspace", dest="workspace", metavar="DIR", - required=True, + default=None, help="Workspace root directory (REQUIRED)", ) parser.add_argument( diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index e4558b032..24bae2e1f 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -1206,3 +1206,31 @@ def fetch_repos( ) assert "Error loading config" in caplog.text + + +def test_import_no_args_shows_help(capsys: pytest.CaptureFixture[str]) -> None: + """Test that 'vcspull import' without args shows help (like --help).""" + from vcspull.cli import cli + + # Call cli with just "import" - should show help and not error + cli(["import"]) + + captured = capsys.readouterr() + # Verify help is shown (usage line and description) + assert "usage: vcspull import" in captured.out + assert "Import repositories from remote services" in captured.out + assert "positional arguments:" in captured.out + assert "SERVICE" in captured.out + + +def test_import_only_service_shows_help(capsys: pytest.CaptureFixture[str]) -> None: + """Test that 'vcspull import github' without workspace shows help.""" + from vcspull.cli import cli + + # Call cli with just "import github" - missing workspace + cli(["import", "github"]) + + captured = capsys.readouterr() + # Verify help is shown + assert "usage: vcspull import" in captured.out + assert "-w, --workspace DIR" in captured.out From 1325962c5375c38f904a11e8bc6fefeff8180fbc Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 1 Feb 2026 13:35:07 -0600 Subject: [PATCH 025/109] fix(remotes/codecommit) Apply filter_repo before yielding repos why: CodeCommit importer was yielding repos without applying filter_repo(), causing --language, --topics, --min-stars, --archived, and --forks options to be silently ignored for CodeCommit imports. what: - Import filter_repo from base module - Apply filter_repo(repo, options) check before yielding, matching other importers --- src/vcspull/_internal/remotes/codecommit.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/vcspull/_internal/remotes/codecommit.py b/src/vcspull/_internal/remotes/codecommit.py index a7804160e..999666c94 100644 --- a/src/vcspull/_internal/remotes/codecommit.py +++ b/src/vcspull/_internal/remotes/codecommit.py @@ -13,6 +13,7 @@ DependencyError, ImportOptions, RemoteRepo, + filter_repo, ) log = logging.getLogger(__name__) @@ -235,8 +236,9 @@ def fetch_repos(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: break repo = self._parse_repo(repo_metadata) - yield repo - count += 1 + if filter_repo(repo, options): + yield repo + count += 1 def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo: """Parse CodeCommit repository metadata into RemoteRepo. From 034dcafdf0e16625d7975d875b79fb0e82495190 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 8 Feb 2026 11:40:08 -0600 Subject: [PATCH 026/109] feat(remotes[ssh_url]) Add ssh_url field to RemoteRepo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: SSH URLs are the practical default for cloning — they work for both public and private repos, and users who run vcspull import almost certainly have SSH keys configured. what: - Add ssh_url field to RemoteRepo dataclass - Update to_vcspull_url() to accept use_ssh param, defaulting to True - Include ssh_url in to_dict() output - Populate ssh_url from each forge's API field in all importers (GitHub ssh_url, GitLab ssh_url_to_repo, Gitea ssh_url, CodeCommit cloneUrlSsh) - Update all test fixtures and mock API responses with ssh_url --- src/vcspull/_internal/remotes/base.py | 26 ++++++-- src/vcspull/_internal/remotes/codecommit.py | 1 + src/vcspull/_internal/remotes/gitea.py | 1 + src/vcspull/_internal/remotes/github.py | 1 + src/vcspull/_internal/remotes/gitlab.py | 1 + tests/_internal/remotes/conftest.py | 7 ++ tests/_internal/remotes/test_base.py | 64 ++++++++++++++++++- tests/_internal/remotes/test_gitea.py | 5 ++ tests/_internal/remotes/test_github.py | 12 ++++ tests/_internal/remotes/test_gitlab.py | 6 ++ .../remotes/test_pagination_duplicates.py | 3 + 11 files changed, 119 insertions(+), 8 deletions(-) diff --git a/src/vcspull/_internal/remotes/base.py b/src/vcspull/_internal/remotes/base.py index 782adac79..a7080c2f5 100644 --- a/src/vcspull/_internal/remotes/base.py +++ b/src/vcspull/_internal/remotes/base.py @@ -73,7 +73,9 @@ class RemoteRepo: name : str Repository name (filesystem-safe) clone_url : str - URL for cloning the repository + HTTPS URL for cloning the repository + ssh_url : str + SSH URL for cloning the repository html_url : str URL for viewing the repository in a browser description : str | None @@ -96,6 +98,7 @@ class RemoteRepo: name: str clone_url: str + ssh_url: str html_url: str description: str | None language: str | None @@ -106,9 +109,15 @@ class RemoteRepo: default_branch: str owner: str - def to_vcspull_url(self) -> str: + def to_vcspull_url(self, *, use_ssh: bool = True) -> str: """Return the URL formatted for vcspull config. + Parameters + ---------- + use_ssh : bool + When True and ``ssh_url`` is non-empty, use the SSH URL. + Falls back to ``clone_url`` when ``ssh_url`` is empty. + Returns ------- str @@ -119,6 +128,7 @@ def to_vcspull_url(self) -> str: >>> repo = RemoteRepo( ... name="test", ... clone_url="https://github.com/user/test.git", + ... ssh_url="git@github.com:user/test.git", ... html_url="https://github.com/user/test", ... description=None, ... language=None, @@ -130,11 +140,14 @@ def to_vcspull_url(self) -> str: ... owner="user", ... ) >>> repo.to_vcspull_url() + 'git+git@github.com:user/test.git' + >>> repo.to_vcspull_url(use_ssh=False) 'git+https://github.com/user/test.git' """ - if self.clone_url.startswith("git+"): - return self.clone_url - return f"git+{self.clone_url}" + url = self.ssh_url if use_ssh and self.ssh_url else self.clone_url + if url.startswith("git+"): + return url + return f"git+{url}" def to_dict(self) -> dict[str, t.Any]: """Convert to dictionary for JSON serialization. @@ -149,6 +162,7 @@ def to_dict(self) -> dict[str, t.Any]: >>> repo = RemoteRepo( ... name="test", ... clone_url="https://github.com/user/test.git", + ... ssh_url="git@github.com:user/test.git", ... html_url="https://github.com/user/test", ... description="A test repo", ... language="Python", @@ -168,6 +182,7 @@ def to_dict(self) -> dict[str, t.Any]: return { "name": self.name, "clone_url": self.clone_url, + "ssh_url": self.ssh_url, "html_url": self.html_url, "description": self.description, "language": self.language, @@ -452,6 +467,7 @@ def filter_repo( >>> repo = RemoteRepo( ... name="test", ... clone_url="https://github.com/user/test.git", + ... ssh_url="git@github.com:user/test.git", ... html_url="https://github.com/user/test", ... description=None, ... language="Python", diff --git a/src/vcspull/_internal/remotes/codecommit.py b/src/vcspull/_internal/remotes/codecommit.py index 999666c94..db407fe8e 100644 --- a/src/vcspull/_internal/remotes/codecommit.py +++ b/src/vcspull/_internal/remotes/codecommit.py @@ -266,6 +266,7 @@ def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo: return RemoteRepo( name=repo_name, clone_url=data.get("cloneUrlHttp", ""), + ssh_url=data.get("cloneUrlSsh", ""), html_url=html_url, description=data.get("repositoryDescription"), language=None, # CodeCommit doesn't track language diff --git a/src/vcspull/_internal/remotes/gitea.py b/src/vcspull/_internal/remotes/gitea.py index 7837858ac..bac27099f 100644 --- a/src/vcspull/_internal/remotes/gitea.py +++ b/src/vcspull/_internal/remotes/gitea.py @@ -283,6 +283,7 @@ def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo: return RemoteRepo( name=data.get("name", ""), clone_url=data.get("clone_url", ""), + ssh_url=data.get("ssh_url", ""), html_url=data.get("html_url", ""), description=data.get("description"), language=data.get("language"), diff --git a/src/vcspull/_internal/remotes/github.py b/src/vcspull/_internal/remotes/github.py index d2303e5bf..b1a9a64f7 100644 --- a/src/vcspull/_internal/remotes/github.py +++ b/src/vcspull/_internal/remotes/github.py @@ -271,6 +271,7 @@ def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo: return RemoteRepo( name=data["name"], clone_url=data["clone_url"], + ssh_url=data.get("ssh_url", ""), html_url=data["html_url"], description=data.get("description"), language=data.get("language"), diff --git a/src/vcspull/_internal/remotes/gitlab.py b/src/vcspull/_internal/remotes/gitlab.py index 9d1f29b7d..77938cdaa 100644 --- a/src/vcspull/_internal/remotes/gitlab.py +++ b/src/vcspull/_internal/remotes/gitlab.py @@ -295,6 +295,7 @@ def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo: return RemoteRepo( name=name, clone_url=data.get("http_url_to_repo", ""), + ssh_url=data.get("ssh_url_to_repo", ""), html_url=data.get("web_url", ""), description=data.get("description"), language=None, # GitLab doesn't return language in list endpoints diff --git a/tests/_internal/remotes/conftest.py b/tests/_internal/remotes/conftest.py index bb3fa5da9..0fd0cd9bf 100644 --- a/tests/_internal/remotes/conftest.py +++ b/tests/_internal/remotes/conftest.py @@ -98,6 +98,7 @@ def github_user_repos_response() -> bytes: { "name": "repo1", "clone_url": "https://github.com/testuser/repo1.git", + "ssh_url": "git@github.com:testuser/repo1.git", "html_url": "https://github.com/testuser/repo1", "description": "Test repo 1", "language": "Python", @@ -111,6 +112,7 @@ def github_user_repos_response() -> bytes: { "name": "repo2", "clone_url": "https://github.com/testuser/repo2.git", + "ssh_url": "git@github.com:testuser/repo2.git", "html_url": "https://github.com/testuser/repo2", "description": "Test repo 2", "language": "JavaScript", @@ -133,6 +135,7 @@ def github_forked_repo_response() -> bytes: { "name": "forked-repo", "clone_url": "https://github.com/testuser/forked-repo.git", + "ssh_url": "git@github.com:testuser/forked-repo.git", "html_url": "https://github.com/testuser/forked-repo", "description": "A forked repo", "language": "Python", @@ -155,6 +158,7 @@ def github_archived_repo_response() -> bytes: { "name": "archived-repo", "clone_url": "https://github.com/testuser/archived-repo.git", + "ssh_url": "git@github.com:testuser/archived-repo.git", "html_url": "https://github.com/testuser/archived-repo", "description": "An archived repo", "language": "Python", @@ -178,6 +182,7 @@ def gitlab_user_projects_response() -> bytes: "path": "project1", "name": "Project 1", "http_url_to_repo": "https://gitlab.com/testuser/project1.git", + "ssh_url_to_repo": "git@gitlab.com:testuser/project1.git", "web_url": "https://gitlab.com/testuser/project1", "description": "Test project 1", "topics": ["python"], @@ -198,6 +203,7 @@ def gitea_user_repos_response() -> bytes: { "name": "repo1", "clone_url": "https://codeberg.org/testuser/repo1.git", + "ssh_url": "git@codeberg.org:testuser/repo1.git", "html_url": "https://codeberg.org/testuser/repo1", "description": "Test repo 1", "language": "Python", @@ -222,6 +228,7 @@ def gitea_search_response() -> bytes: { "name": "search-result", "clone_url": "https://codeberg.org/user/search-result.git", + "ssh_url": "git@codeberg.org:user/search-result.git", "html_url": "https://codeberg.org/user/search-result", "description": "Found by search", "language": "Go", diff --git a/tests/_internal/remotes/test_base.py b/tests/_internal/remotes/test_base.py index b72da007e..d9456e4e9 100644 --- a/tests/_internal/remotes/test_base.py +++ b/tests/_internal/remotes/test_base.py @@ -29,6 +29,7 @@ class FilterRepoFixture(t.NamedTuple): repo_kwargs={ "name": "test", "clone_url": "https://github.com/user/test.git", + "ssh_url": "git@github.com:user/test.git", "html_url": "https://github.com/user/test", "description": None, "language": "Python", @@ -47,6 +48,7 @@ class FilterRepoFixture(t.NamedTuple): repo_kwargs={ "name": "fork", "clone_url": "https://github.com/user/fork.git", + "ssh_url": "git@github.com:user/fork.git", "html_url": "https://github.com/user/fork", "description": None, "language": "Python", @@ -65,6 +67,7 @@ class FilterRepoFixture(t.NamedTuple): repo_kwargs={ "name": "fork", "clone_url": "https://github.com/user/fork.git", + "ssh_url": "git@github.com:user/fork.git", "html_url": "https://github.com/user/fork", "description": None, "language": "Python", @@ -83,6 +86,7 @@ class FilterRepoFixture(t.NamedTuple): repo_kwargs={ "name": "archived", "clone_url": "https://github.com/user/archived.git", + "ssh_url": "git@github.com:user/archived.git", "html_url": "https://github.com/user/archived", "description": None, "language": "Python", @@ -101,6 +105,7 @@ class FilterRepoFixture(t.NamedTuple): repo_kwargs={ "name": "archived", "clone_url": "https://github.com/user/archived.git", + "ssh_url": "git@github.com:user/archived.git", "html_url": "https://github.com/user/archived", "description": None, "language": "Python", @@ -119,6 +124,7 @@ class FilterRepoFixture(t.NamedTuple): repo_kwargs={ "name": "python-repo", "clone_url": "https://github.com/user/python-repo.git", + "ssh_url": "git@github.com:user/python-repo.git", "html_url": "https://github.com/user/python-repo", "description": None, "language": "Python", @@ -137,6 +143,7 @@ class FilterRepoFixture(t.NamedTuple): repo_kwargs={ "name": "python-repo", "clone_url": "https://github.com/user/python-repo.git", + "ssh_url": "git@github.com:user/python-repo.git", "html_url": "https://github.com/user/python-repo", "description": None, "language": "Python", @@ -155,6 +162,7 @@ class FilterRepoFixture(t.NamedTuple): repo_kwargs={ "name": "python-repo", "clone_url": "https://github.com/user/python-repo.git", + "ssh_url": "git@github.com:user/python-repo.git", "html_url": "https://github.com/user/python-repo", "description": None, "language": "Python", @@ -173,6 +181,7 @@ class FilterRepoFixture(t.NamedTuple): repo_kwargs={ "name": "popular", "clone_url": "https://github.com/user/popular.git", + "ssh_url": "git@github.com:user/popular.git", "html_url": "https://github.com/user/popular", "description": None, "language": "Python", @@ -191,6 +200,7 @@ class FilterRepoFixture(t.NamedTuple): repo_kwargs={ "name": "unpopular", "clone_url": "https://github.com/user/unpopular.git", + "ssh_url": "git@github.com:user/unpopular.git", "html_url": "https://github.com/user/unpopular", "description": None, "language": "Python", @@ -209,6 +219,7 @@ class FilterRepoFixture(t.NamedTuple): repo_kwargs={ "name": "cli-tool", "clone_url": "https://github.com/user/cli-tool.git", + "ssh_url": "git@github.com:user/cli-tool.git", "html_url": "https://github.com/user/cli-tool", "description": None, "language": "Python", @@ -227,6 +238,7 @@ class FilterRepoFixture(t.NamedTuple): repo_kwargs={ "name": "web-app", "clone_url": "https://github.com/user/web-app.git", + "ssh_url": "git@github.com:user/web-app.git", "html_url": "https://github.com/user/web-app", "description": None, "language": "Python", @@ -260,11 +272,52 @@ def test_filter_repo( assert filter_repo(repo, options) == expected -def test_remote_repo_to_vcspull_url() -> None: - """Test RemoteRepo.to_vcspull_url adds git+ prefix.""" +def test_remote_repo_to_vcspull_url_defaults_to_ssh() -> None: + """Test RemoteRepo.to_vcspull_url defaults to SSH URL.""" repo = RemoteRepo( name="test", clone_url="https://github.com/user/test.git", + ssh_url="git@github.com:user/test.git", + html_url="https://github.com/user/test", + description=None, + language=None, + topics=(), + stars=0, + is_fork=False, + is_archived=False, + default_branch="main", + owner="user", + ) + assert repo.to_vcspull_url() == "git+git@github.com:user/test.git" + + +def test_remote_repo_to_vcspull_url_https() -> None: + """Test RemoteRepo.to_vcspull_url with use_ssh=False returns HTTPS.""" + repo = RemoteRepo( + name="test", + clone_url="https://github.com/user/test.git", + ssh_url="git@github.com:user/test.git", + html_url="https://github.com/user/test", + description=None, + language=None, + topics=(), + stars=0, + is_fork=False, + is_archived=False, + default_branch="main", + owner="user", + ) + assert repo.to_vcspull_url(use_ssh=False) == ( + "git+https://github.com/user/test.git" + ) + + +def test_remote_repo_to_vcspull_url_fallback_no_ssh() -> None: + """Test RemoteRepo.to_vcspull_url falls back to clone_url when ssh_url empty.""" + repo = RemoteRepo( + name="test", + clone_url="https://github.com/user/test.git", + ssh_url="", html_url="https://github.com/user/test", description=None, language=None, @@ -283,6 +336,7 @@ def test_remote_repo_to_vcspull_url_already_prefixed() -> None: repo = RemoteRepo( name="test", clone_url="git+https://github.com/user/test.git", + ssh_url="", html_url="https://github.com/user/test", description=None, language=None, @@ -293,7 +347,9 @@ def test_remote_repo_to_vcspull_url_already_prefixed() -> None: default_branch="main", owner="user", ) - assert repo.to_vcspull_url() == "git+https://github.com/user/test.git" + assert repo.to_vcspull_url(use_ssh=False) == ( + "git+https://github.com/user/test.git" + ) def test_remote_repo_to_dict() -> None: @@ -301,6 +357,7 @@ def test_remote_repo_to_dict() -> None: repo = RemoteRepo( name="test", clone_url="https://github.com/user/test.git", + ssh_url="git@github.com:user/test.git", html_url="https://github.com/user/test", description="A test repo", language="Python", @@ -314,6 +371,7 @@ def test_remote_repo_to_dict() -> None: d = repo.to_dict() assert d["name"] == "test" assert d["clone_url"] == "https://github.com/user/test.git" + assert d["ssh_url"] == "git@github.com:user/test.git" assert d["language"] == "Python" assert d["topics"] == ["cli", "tool"] assert d["stars"] == 100 diff --git a/tests/_internal/remotes/test_gitea.py b/tests/_internal/remotes/test_gitea.py index c20699e33..b08f5af21 100644 --- a/tests/_internal/remotes/test_gitea.py +++ b/tests/_internal/remotes/test_gitea.py @@ -34,6 +34,7 @@ def test_gitea_fetch_org( { "name": "org-repo", "clone_url": "https://codeberg.org/testorg/org-repo.git", + "ssh_url": "git@codeberg.org:testorg/org-repo.git", "html_url": "https://codeberg.org/testorg/org-repo", "description": "Org repo", "language": "Go", @@ -75,6 +76,7 @@ def test_gitea_search_with_array_response( { "name": "plain-result", "clone_url": "https://gitea.example.com/user/plain-result.git", + "ssh_url": "git@gitea.example.com:user/plain-result.git", "html_url": "https://gitea.example.com/user/plain-result", "description": "Plain array result", "language": "Python", @@ -132,6 +134,7 @@ def test_gitea_uses_stars_count_field( { "name": "starred-repo", "clone_url": "https://codeberg.org/user/starred-repo.git", + "ssh_url": "git@codeberg.org:user/starred-repo.git", "html_url": "https://codeberg.org/user/starred-repo", "description": "Popular repo", "language": "Rust", @@ -159,6 +162,7 @@ def test_gitea_filters_by_language( { "name": "go-repo", "clone_url": "https://codeberg.org/user/go-repo.git", + "ssh_url": "git@codeberg.org:user/go-repo.git", "html_url": "https://codeberg.org/user/go-repo", "description": "Go repo", "language": "Go", @@ -172,6 +176,7 @@ def test_gitea_filters_by_language( { "name": "rust-repo", "clone_url": "https://codeberg.org/user/rust-repo.git", + "ssh_url": "git@codeberg.org:user/rust-repo.git", "html_url": "https://codeberg.org/user/rust-repo", "description": "Rust repo", "language": "Rust", diff --git a/tests/_internal/remotes/test_github.py b/tests/_internal/remotes/test_github.py index bcc39ac17..d286756aa 100644 --- a/tests/_internal/remotes/test_github.py +++ b/tests/_internal/remotes/test_github.py @@ -28,6 +28,7 @@ class GitHubUserFixture(t.NamedTuple): { "name": "repo1", "clone_url": "https://github.com/testuser/repo1.git", + "ssh_url": "git@github.com:testuser/repo1.git", "html_url": "https://github.com/testuser/repo1", "description": "Test repo", "language": "Python", @@ -49,6 +50,7 @@ class GitHubUserFixture(t.NamedTuple): { "name": "original", "clone_url": "https://github.com/testuser/original.git", + "ssh_url": "git@github.com:testuser/original.git", "html_url": "https://github.com/testuser/original", "description": "Original repo", "language": "Python", @@ -62,6 +64,7 @@ class GitHubUserFixture(t.NamedTuple): { "name": "forked", "clone_url": "https://github.com/testuser/forked.git", + "ssh_url": "git@github.com:testuser/forked.git", "html_url": "https://github.com/testuser/forked", "description": "Forked repo", "language": "Python", @@ -87,6 +90,7 @@ class GitHubUserFixture(t.NamedTuple): { "name": "original", "clone_url": "https://github.com/testuser/original.git", + "ssh_url": "git@github.com:testuser/original.git", "html_url": "https://github.com/testuser/original", "description": "Original repo", "language": "Python", @@ -100,6 +104,7 @@ class GitHubUserFixture(t.NamedTuple): { "name": "forked", "clone_url": "https://github.com/testuser/forked.git", + "ssh_url": "git@github.com:testuser/forked.git", "html_url": "https://github.com/testuser/forked", "description": "Forked repo", "language": "Python", @@ -125,6 +130,7 @@ class GitHubUserFixture(t.NamedTuple): { "name": "active", "clone_url": "https://github.com/testuser/active.git", + "ssh_url": "git@github.com:testuser/active.git", "html_url": "https://github.com/testuser/active", "description": "Active repo", "language": "Python", @@ -138,6 +144,7 @@ class GitHubUserFixture(t.NamedTuple): { "name": "archived", "clone_url": "https://github.com/testuser/archived.git", + "ssh_url": "git@github.com:testuser/archived.git", "html_url": "https://github.com/testuser/archived", "description": "Archived repo", "language": "Python", @@ -163,6 +170,7 @@ class GitHubUserFixture(t.NamedTuple): { "name": "python-repo", "clone_url": "https://github.com/testuser/python-repo.git", + "ssh_url": "git@github.com:testuser/python-repo.git", "html_url": "https://github.com/testuser/python-repo", "description": "Python repo", "language": "Python", @@ -176,6 +184,7 @@ class GitHubUserFixture(t.NamedTuple): { "name": "js-repo", "clone_url": "https://github.com/testuser/js-repo.git", + "ssh_url": "git@github.com:testuser/js-repo.git", "html_url": "https://github.com/testuser/js-repo", "description": "JavaScript repo", "language": "JavaScript", @@ -243,6 +252,7 @@ def test_github_fetch_org( { "name": "org-repo", "clone_url": "https://github.com/testorg/org-repo.git", + "ssh_url": "git@github.com:testorg/org-repo.git", "html_url": "https://github.com/testorg/org-repo", "description": "Org repo", "language": "Python", @@ -281,6 +291,7 @@ def test_github_fetch_search( { "name": "search-result", "clone_url": "https://github.com/user/search-result.git", + "ssh_url": "git@github.com:user/search-result.git", "html_url": "https://github.com/user/search-result", "description": "Found by search", "language": "Python", @@ -342,6 +353,7 @@ def test_github_limit_respected( { "name": f"repo{i}", "clone_url": f"https://github.com/user/repo{i}.git", + "ssh_url": f"git@github.com:user/repo{i}.git", "html_url": f"https://github.com/user/repo{i}", "description": f"Repo {i}", "language": "Python", diff --git a/tests/_internal/remotes/test_gitlab.py b/tests/_internal/remotes/test_gitlab.py index d51708b50..e64308f31 100644 --- a/tests/_internal/remotes/test_gitlab.py +++ b/tests/_internal/remotes/test_gitlab.py @@ -35,6 +35,7 @@ def test_gitlab_fetch_group( "path": "group-project", "name": "Group Project", "http_url_to_repo": "https://gitlab.com/testgroup/group-project.git", + "ssh_url_to_repo": "git@gitlab.com:testgroup/group-project.git", "web_url": "https://gitlab.com/testgroup/group-project", "description": "Group project", "topics": [], @@ -76,6 +77,7 @@ def test_gitlab_search_with_auth( "path": "search-result", "name": "Search Result", "http_url_to_repo": "https://gitlab.com/user/search-result.git", + "ssh_url_to_repo": "git@gitlab.com:user/search-result.git", "web_url": "https://gitlab.com/user/search-result", "description": "Found", "topics": [], @@ -125,6 +127,7 @@ def test_gitlab_handles_forked_project( "path": "forked-project", "name": "Forked Project", "http_url_to_repo": "https://gitlab.com/user/forked-project.git", + "ssh_url_to_repo": "git@gitlab.com:user/forked-project.git", "web_url": "https://gitlab.com/user/forked-project", "description": "A fork", "topics": [], @@ -152,6 +155,7 @@ def test_gitlab_uses_path_not_name( "path": "my-project", "name": "My Project With Spaces", # This should NOT be used "http_url_to_repo": "https://gitlab.com/user/my-project.git", + "ssh_url_to_repo": "git@gitlab.com:user/my-project.git", "web_url": "https://gitlab.com/user/my-project", "description": "Project with spaces in name", "topics": [], @@ -206,6 +210,7 @@ def test_gitlab_subgroup_url_encoding( "path": "subgroup-project", "name": "Subgroup Project", "http_url_to_repo": "https://gitlab.com/parent/child/subgroup-project.git", + "ssh_url_to_repo": "git@gitlab.com:parent/child/subgroup-project.git", "web_url": "https://gitlab.com/parent/child/subgroup-project", "description": "Project in subgroup", "topics": [], @@ -252,6 +257,7 @@ def test_gitlab_deeply_nested_subgroup( "path": "deep-project", "name": "Deep Project", "http_url_to_repo": "https://gitlab.com/a/b/c/d/deep-project.git", + "ssh_url_to_repo": "git@gitlab.com:a/b/c/d/deep-project.git", "web_url": "https://gitlab.com/a/b/c/d/deep-project", "description": "Deeply nested project", "topics": [], diff --git a/tests/_internal/remotes/test_pagination_duplicates.py b/tests/_internal/remotes/test_pagination_duplicates.py index fd0711eed..76d0658bf 100644 --- a/tests/_internal/remotes/test_pagination_duplicates.py +++ b/tests/_internal/remotes/test_pagination_duplicates.py @@ -46,6 +46,7 @@ def _make_github_repo( return { "name": name, "clone_url": f"https://github.com/testuser/{name}.git", + "ssh_url": f"git@github.com:testuser/{name}.git", "html_url": f"https://github.com/testuser/{name}", "description": f"Repo {name}", "language": "Python", @@ -68,6 +69,7 @@ def _make_gitea_repo( return { "name": name, "clone_url": f"https://codeberg.org/testuser/{name}.git", + "ssh_url": f"git@codeberg.org:testuser/{name}.git", "html_url": f"https://codeberg.org/testuser/{name}", "description": f"Repo {name}", "language": "Python", @@ -91,6 +93,7 @@ def _make_gitlab_repo( "path": name, "name": name, "http_url_to_repo": f"https://gitlab.com/testuser/{name}.git", + "ssh_url_to_repo": f"git@gitlab.com:testuser/{name}.git", "web_url": f"https://gitlab.com/testuser/{name}", "description": f"Project {name}", "topics": [], From 12e5597b0f952fc8f9b637c05bd7e36b24ece1f4 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 8 Feb 2026 11:40:19 -0600 Subject: [PATCH 027/109] feat(cli/import[--https]) Default to SSH URLs, add --https flag why: SSH is the safest default for importing repos. Users who need HTTPS can opt in with --https. what: - Add --https flag to import subcommand argument parser - Add use_https param to import_repos() function - Pass use_ssh=not use_https to RemoteRepo.to_vcspull_url() - Wire use_https from CLI args through cli/__init__.py - Add tests for SSH default, HTTPS opt-in, and CLI flag parsing --- src/vcspull/cli/__init__.py | 1 + src/vcspull/cli/import_repos.py | 13 ++- tests/cli/test_import_repos.py | 144 ++++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+), 1 deletion(-) diff --git a/src/vcspull/cli/__init__.py b/src/vcspull/cli/__init__.py index d497c7145..14ac60a0e 100644 --- a/src/vcspull/cli/__init__.py +++ b/src/vcspull/cli/__init__.py @@ -524,4 +524,5 @@ def cli(_args: list[str] | None = None) -> None: output_json=getattr(args, "output_json", False), output_ndjson=getattr(args, "output_ndjson", False), color=getattr(args, "color", "auto"), + use_https=getattr(args, "use_https", False), ) diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index 59822664d..231fbeeb5 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -195,6 +195,12 @@ def create_import_subparser(parser: argparse.ArgumentParser) -> None: dest="output_ndjson", help="Output as NDJSON (one JSON per line)", ) + output_group.add_argument( + "--https", + action="store_true", + dest="use_https", + help="Use HTTPS clone URLs instead of SSH (default: SSH)", + ) output_group.add_argument( "--color", choices=["auto", "always", "never"], @@ -306,6 +312,7 @@ def import_repos( output_json: bool, output_ndjson: bool, color: str, + use_https: bool = False, ) -> None: """Import repositories from a remote service. @@ -351,6 +358,8 @@ def import_repos( Output as NDJSON color : str Color mode + use_https : bool + Use HTTPS clone URLs instead of SSH (default: False, i.e., SSH) """ output_mode = get_output_mode(output_json, output_ndjson) formatter = OutputFormatter(output_mode) @@ -568,7 +577,9 @@ def import_repos( skipped_count += 1 continue - raw_config[workspace_label][repo.name] = {"repo": repo.to_vcspull_url()} + raw_config[workspace_label][repo.name] = { + "repo": repo.to_vcspull_url(use_ssh=not use_https), + } added_count += 1 if added_count == 0: diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index 24bae2e1f..43d9f08eb 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -43,6 +43,7 @@ def _make_repo( return RemoteRepo( name=name, clone_url=f"https://github.com/{owner}/{name}.git", + ssh_url=f"git@github.com:{owner}/{name}.git", html_url=f"https://github.com/{owner}/{name}", description=f"Test repo {name}", language=language, @@ -1234,3 +1235,146 @@ def test_import_only_service_shows_help(capsys: pytest.CaptureFixture[str]) -> N # Verify help is shown assert "usage: vcspull import" in captured.out assert "-w, --workspace DIR" in captured.out + + +def test_import_repos_defaults_to_ssh_urls( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos writes SSH URLs to config by default.""" + import yaml + + caplog.set_level(logging.INFO) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + config_file = tmp_path / ".vcspull.yaml" + + class MockImporter: + service_name = "MockService" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + yield _make_repo("myrepo") + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service="github", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(config_file), + dry_run=False, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + assert config_file.exists() + with config_file.open() as f: + config = yaml.safe_load(f) + + repo_url = config["~/repos/"]["myrepo"]["repo"] + assert repo_url == "git+git@github.com:testuser/myrepo.git" + + +def test_import_repos_https_flag( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos writes HTTPS URLs when use_https=True.""" + import yaml + + caplog.set_level(logging.INFO) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + config_file = tmp_path / ".vcspull.yaml" + + class MockImporter: + service_name = "MockService" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + yield _make_repo("myrepo") + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service="github", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(config_file), + dry_run=False, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + use_https=True, + ) + + assert config_file.exists() + with config_file.open() as f: + config = yaml.safe_load(f) + + repo_url = config["~/repos/"]["myrepo"]["repo"] + assert repo_url == "git+https://github.com/testuser/myrepo.git" + + +def test_import_https_flag_via_cli(capsys: pytest.CaptureFixture[str]) -> None: + """Test that --https flag is recognized by the CLI parser.""" + from vcspull.cli import create_parser + + parser = create_parser(return_subparsers=False) + args = parser.parse_args( + ["import", "github", "testuser", "-w", "/tmp/repos", "--https"] + ) + assert args.use_https is True + + +def test_import_ssh_default_via_cli(capsys: pytest.CaptureFixture[str]) -> None: + """Test that SSH is the default (no --https flag).""" + from vcspull.cli import create_parser + + parser = create_parser(return_subparsers=False) + args = parser.parse_args(["import", "github", "testuser", "-w", "/tmp/repos"]) + assert args.use_https is False From 5f0a0615b66e5786c434922b76e8debdf3877db8 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 8 Feb 2026 11:40:25 -0600 Subject: [PATCH 028/109] docs(CHANGES) Document SSH-default clone URLs for vcspull import why: Record the new SSH-by-default behavior and --https flag. what: - Add changelog entry under v1.55.x for SSH clone URL default --- CHANGES | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGES b/CHANGES index 96ca69a59..01080c5d3 100644 --- a/CHANGES +++ b/CHANGES @@ -35,6 +35,19 @@ _Notes on upcoming releases will be added here_ ### New features +#### `vcspull import`: Default to SSH clone URLs (#510) + +`vcspull import` now uses SSH clone URLs by default. Use `--https` +to get HTTPS URLs instead: + +```console +$ vcspull import github torvalds -w ~/repos/linux --mode user +``` + +```console +$ vcspull import github torvalds -w ~/repos/linux --mode user --https +``` + #### `vcspull import` command for remote repository discovery (#510) Import repositories from GitHub, GitLab, Codeberg/Gitea/Forgejo, and AWS From 7af8a489e22bc57440059c1c7c62f16778d31f85 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 8 Feb 2026 13:07:24 -0600 Subject: [PATCH 029/109] fix(remotes/base[query_string]) Use urllib.parse.urlencode for query strings why: Manual query string construction only encodes values, not keys, and uses quote() instead of quote_plus(), which is the standard for query parameters. what: - Replace manual f-string query building with urllib.parse.urlencode(params) --- src/vcspull/_internal/remotes/base.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/vcspull/_internal/remotes/base.py b/src/vcspull/_internal/remotes/base.py index a7080c2f5..ec80530bd 100644 --- a/src/vcspull/_internal/remotes/base.py +++ b/src/vcspull/_internal/remotes/base.py @@ -328,10 +328,7 @@ def get( url = f"{self.base_url}{endpoint}" if params: - query_string = "&".join( - f"{k}={urllib.parse.quote(str(v))}" for k, v in params.items() - ) - url = f"{url}?{query_string}" + url = f"{url}?{urllib.parse.urlencode(params)}" headers = self._build_headers() request = urllib.request.Request(url, headers=headers) From 43ab6da580bbb1ddd1abf10b422e74564f114b55 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 8 Feb 2026 13:08:35 -0600 Subject: [PATCH 030/109] fix(remotes[url_encoding]) URL-encode options.target in endpoint paths why: User/org names with special characters (spaces, unicode) would produce malformed API URLs. GitLab subgroups used manual .replace("/", "%2F") which is fragile compared to proper URL encoding. what: - Use urllib.parse.quote(options.target, safe="") in github.py, gitlab.py, gitea.py - Add import urllib.parse to github.py and gitlab.py - Replace manual .replace("/", "%2F") with quote() in gitlab.py _fetch_group --- src/vcspull/_internal/remotes/gitea.py | 6 ++++-- src/vcspull/_internal/remotes/github.py | 7 +++++-- src/vcspull/_internal/remotes/gitlab.py | 8 +++++--- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/vcspull/_internal/remotes/gitea.py b/src/vcspull/_internal/remotes/gitea.py index bac27099f..a3cbf49fa 100644 --- a/src/vcspull/_internal/remotes/gitea.py +++ b/src/vcspull/_internal/remotes/gitea.py @@ -131,7 +131,8 @@ def _fetch_user(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: RemoteRepo Repository information """ - endpoint = f"/users/{options.target}/repos" + target = urllib.parse.quote(options.target, safe="") + endpoint = f"/users/{target}/repos" yield from self._paginate_repos(endpoint, options) def _fetch_org(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: @@ -147,7 +148,8 @@ def _fetch_org(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: RemoteRepo Repository information """ - endpoint = f"/orgs/{options.target}/repos" + target = urllib.parse.quote(options.target, safe="") + endpoint = f"/orgs/{target}/repos" yield from self._paginate_repos(endpoint, options) def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: diff --git a/src/vcspull/_internal/remotes/github.py b/src/vcspull/_internal/remotes/github.py index b1a9a64f7..855e7b70c 100644 --- a/src/vcspull/_internal/remotes/github.py +++ b/src/vcspull/_internal/remotes/github.py @@ -4,6 +4,7 @@ import logging import typing as t +import urllib.parse from .base import ( HTTPClient, @@ -114,7 +115,8 @@ def _fetch_user(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: RemoteRepo Repository information """ - endpoint = f"/users/{options.target}/repos" + target = urllib.parse.quote(options.target, safe="") + endpoint = f"/users/{target}/repos" yield from self._paginate_repos(endpoint, options) def _fetch_org(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: @@ -130,7 +132,8 @@ def _fetch_org(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: RemoteRepo Repository information """ - endpoint = f"/orgs/{options.target}/repos" + target = urllib.parse.quote(options.target, safe="") + endpoint = f"/orgs/{target}/repos" yield from self._paginate_repos(endpoint, options) def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: diff --git a/src/vcspull/_internal/remotes/gitlab.py b/src/vcspull/_internal/remotes/gitlab.py index 77938cdaa..316af0a06 100644 --- a/src/vcspull/_internal/remotes/gitlab.py +++ b/src/vcspull/_internal/remotes/gitlab.py @@ -4,6 +4,7 @@ import logging import typing as t +import urllib.parse from .base import ( AuthenticationError, @@ -115,7 +116,8 @@ def _fetch_user(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: RemoteRepo Repository information """ - endpoint = f"/users/{options.target}/projects" + target = urllib.parse.quote(options.target, safe="") + endpoint = f"/users/{target}/projects" yield from self._paginate_repos(endpoint, options) def _fetch_group(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: @@ -131,8 +133,8 @@ def _fetch_group(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: RemoteRepo Repository information """ - # URL-encode the group name in case it contains slashes (subgroups) - target = options.target.replace("/", "%2F") + # URL-encode the group name (handles slashes in subgroups, etc.) + target = urllib.parse.quote(options.target, safe="") endpoint = f"/groups/{target}/projects" yield from self._paginate_repos(endpoint, options, include_subgroups=True) From 634350640103a6e8d5be23fc422c8808e7e3d907 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 8 Feb 2026 13:09:16 -0600 Subject: [PATCH 031/109] style(remotes/base[imports]) Use namespace import for dataclasses why: Project convention requires namespace imports for stdlib modules (e.g., import enum, not from enum import Enum). what: - Replace from dataclasses import dataclass, field with import dataclasses - Update usages to dataclasses.dataclass and dataclasses.field --- src/vcspull/_internal/remotes/base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/vcspull/_internal/remotes/base.py b/src/vcspull/_internal/remotes/base.py index ec80530bd..22d65cfc5 100644 --- a/src/vcspull/_internal/remotes/base.py +++ b/src/vcspull/_internal/remotes/base.py @@ -2,6 +2,7 @@ from __future__ import annotations +import dataclasses import enum import json import logging @@ -10,7 +11,6 @@ import urllib.error import urllib.parse import urllib.request -from dataclasses import dataclass, field log = logging.getLogger(__name__) @@ -64,7 +64,7 @@ class DependencyError(RemoteImportError): """Raised when a required dependency is missing.""" -@dataclass(frozen=True) +@dataclasses.dataclass(frozen=True) class RemoteRepo: """Represents a repository from a remote service. @@ -195,7 +195,7 @@ def to_dict(self) -> dict[str, t.Any]: } -@dataclass +@dataclasses.dataclass class ImportOptions: """Options for importing repositories from a remote service. @@ -230,7 +230,7 @@ class ImportOptions: include_forks: bool = False include_archived: bool = False language: str | None = None - topics: list[str] = field(default_factory=list) + topics: list[str] = dataclasses.field(default_factory=list) min_stars: int = 0 limit: int = 100 From 8cbfb63771de0eb6435926c35d52b73da2a389ef Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 8 Feb 2026 13:11:03 -0600 Subject: [PATCH 032/109] fix(cli/import[config_validation]) Validate config structure and narrow exceptions why: Bare Exception catches hide bugs; non-dict YAML configs cause cryptic AttributeErrors; non-YAML file paths silently produce invalid configs. what: - Reject non-YAML --file paths with clear error message - Validate loaded config is a dict (not a YAML list or scalar) - Validate workspace section is a dict before writing to it - Narrow config load exception to yaml.YAMLError | OSError - Narrow config save exception to OSError - Add tests for non-YAML config rejection and non-dict config rejection --- src/vcspull/cli/import_repos.py | 36 +++++++++-- tests/cli/test_import_repos.py | 109 ++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+), 4 deletions(-) diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index 231fbeeb5..f6ede5bfa 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -282,7 +282,11 @@ def _resolve_config_file(config_path_str: str | None) -> pathlib.Path: Resolved config file path """ if config_path_str: - return pathlib.Path(config_path_str).expanduser().resolve() + path = pathlib.Path(config_path_str).expanduser().resolve() + if path.suffix.lower() not in {".yaml", ".yml"}: + msg = f"Only YAML config files are supported, got: {path.suffix}" + raise ValueError(msg) + return path home_configs = find_home_config_files(filetype=["yaml"]) if home_configs: @@ -416,7 +420,11 @@ def import_repos( workspace_label = workspace_root_label(workspace_path, cwd=cwd, home=home) # Resolve config file - config_file_path = _resolve_config_file(config_path_str) + try: + config_file_path = _resolve_config_file(config_path_str) + except ValueError as exc: + log.error("%s✗%s %s", Fore.RED, Style.RESET_ALL, exc) # noqa: TRY400 + return display_config_path = str(PrivatePath(config_file_path)) # Fetch repositories @@ -559,13 +567,33 @@ def import_repos( try: with config_file_path.open() as f: raw_config = yaml.safe_load(f) or {} - except Exception: + except (yaml.YAMLError, OSError): log.exception("Error loading config file") return + + if not isinstance(raw_config, dict): + log.error( + "%s✗%s Config file is not a valid YAML mapping: %s", + Fore.RED, + Style.RESET_ALL, + display_config_path, + ) + return else: raw_config = {} # Add repositories to config + if workspace_label in raw_config and not isinstance( + raw_config[workspace_label], dict + ): + log.error( + "%s✗%s Workspace section '%s' is not a mapping in config", + Fore.RED, + Style.RESET_ALL, + workspace_label, + ) + return + if workspace_label not in raw_config: raw_config[workspace_label] = {} @@ -613,5 +641,5 @@ def import_repos( skipped_count, Style.RESET_ALL, ) - except Exception: + except OSError: log.exception("Error saving config to %s", display_config_path) diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index 43d9f08eb..b58049edd 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -1378,3 +1378,112 @@ def test_import_ssh_default_via_cli(capsys: pytest.CaptureFixture[str]) -> None: parser = create_parser(return_subparsers=False) args = parser.parse_args(["import", "github", "testuser", "-w", "/tmp/repos"]) assert args.use_https is False + + +def test_import_repos_rejects_non_yaml_config( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos rejects non-YAML config file paths.""" + caplog.set_level(logging.ERROR) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + + class MockImporter: + service_name = "MockService" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + yield _make_repo("repo1") + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service="github", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(tmp_path / "config.json"), + dry_run=False, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + assert "Only YAML config files are supported" in caplog.text + + +def test_import_repos_rejects_non_dict_config( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos rejects config that is a YAML list instead of dict.""" + caplog.set_level(logging.ERROR) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + config_file = tmp_path / ".vcspull.yaml" + # Write a YAML list instead of a mapping + config_file.write_text("- item1\n- item2\n", encoding="utf-8") + + class MockImporter: + service_name = "MockService" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + yield _make_repo("repo1") + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service="github", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(config_file), + dry_run=False, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + assert "not a valid YAML mapping" in caplog.text From e3f50dcb9bc610393d3d9ce52df2b38cf19a2304 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 8 Feb 2026 13:11:52 -0600 Subject: [PATCH 033/109] refactor(shadowing) Rename t loop variable to avoid shadowing typing why: Using t as a loop variable shadows import typing as t, causing confusion for readers even though Python 3 scopes comprehension variables. what: - Rename t to topic in base.py filter_repo topic comprehensions - Rename t to topic in import_repos.py topic parsing comprehension --- src/vcspull/_internal/remotes/base.py | 4 ++-- src/vcspull/cli/import_repos.py | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/vcspull/_internal/remotes/base.py b/src/vcspull/_internal/remotes/base.py index 22d65cfc5..76a68ba42 100644 --- a/src/vcspull/_internal/remotes/base.py +++ b/src/vcspull/_internal/remotes/base.py @@ -498,8 +498,8 @@ def filter_repo( # Check topics filter if options.topics: - repo_topics_lower = {t.lower() for t in repo.topics} - required_topics_lower = {t.lower() for t in options.topics} + repo_topics_lower = {topic.lower() for topic in repo.topics} + required_topics_lower = {topic.lower() for topic in options.topics} if not required_topics_lower.issubset(repo_topics_lower): return False diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index f6ede5bfa..c0e17f6ea 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -398,7 +398,11 @@ def import_repos( # Build import options import_mode = ImportMode(mode) - topic_list = [t.strip() for t in topics.split(",") if t.strip()] if topics else [] + topic_list = ( + [topic.strip() for topic in topics.split(",") if topic.strip()] + if topics + else [] + ) options = ImportOptions( mode=import_mode, From 6171aec2045af19623f3d30dea34ecd7ac164ba9 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 8 Feb 2026 13:12:13 -0600 Subject: [PATCH 034/109] docs(remotes/base[RemoteRepo]) Fix docstring type for topics field why: Docstring says list[str] but the actual type annotation is tuple[str, ...]. what: - Change topics docstring from list[str] to tuple[str, ...] --- src/vcspull/_internal/remotes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vcspull/_internal/remotes/base.py b/src/vcspull/_internal/remotes/base.py index 76a68ba42..fd5b13534 100644 --- a/src/vcspull/_internal/remotes/base.py +++ b/src/vcspull/_internal/remotes/base.py @@ -82,7 +82,7 @@ class RemoteRepo: Repository description language : str | None Primary programming language - topics : list[str] + topics : tuple[str, ...] Repository topics/tags stars : int Star/favorite count From 83dafe9e633f55a9c460908d26437903731d58ee Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 8 Feb 2026 13:12:46 -0600 Subject: [PATCH 035/109] =?UTF-8?q?docs(remotes[terminology])=20Fix=20Scra?= =?UTF-8?q?ping=20=E2=86=92=20Import=20terminology=20in=20docstrings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: The feature is called "import" not "scraping" — docstrings should match. what: - Change ImportMode docstring from "Scraping mode" to "Import mode" - Change all "Scraping options" references to "Import options" in github.py, gitlab.py, and gitea.py --- src/vcspull/_internal/remotes/base.py | 2 +- src/vcspull/_internal/remotes/gitea.py | 10 +++++----- src/vcspull/_internal/remotes/github.py | 10 +++++----- src/vcspull/_internal/remotes/gitlab.py | 10 +++++----- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/vcspull/_internal/remotes/base.py b/src/vcspull/_internal/remotes/base.py index fd5b13534..e7d865769 100644 --- a/src/vcspull/_internal/remotes/base.py +++ b/src/vcspull/_internal/remotes/base.py @@ -16,7 +16,7 @@ class ImportMode(enum.Enum): - """Scraping mode for remote services.""" + """Import mode for remote services.""" USER = "user" ORG = "org" diff --git a/src/vcspull/_internal/remotes/gitea.py b/src/vcspull/_internal/remotes/gitea.py index a3cbf49fa..a33eac898 100644 --- a/src/vcspull/_internal/remotes/gitea.py +++ b/src/vcspull/_internal/remotes/gitea.py @@ -95,7 +95,7 @@ def fetch_repos(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: Parameters ---------- options : ImportOptions - Scraping options + Import options Yields ------ @@ -124,7 +124,7 @@ def _fetch_user(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: Parameters ---------- options : ImportOptions - Scraping options + Import options Yields ------ @@ -141,7 +141,7 @@ def _fetch_org(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: Parameters ---------- options : ImportOptions - Scraping options + Import options Yields ------ @@ -158,7 +158,7 @@ def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: Parameters ---------- options : ImportOptions - Scraping options + Import options Yields ------ @@ -225,7 +225,7 @@ def _paginate_repos( endpoint : str API endpoint options : ImportOptions - Scraping options + Import options Yields ------ diff --git a/src/vcspull/_internal/remotes/github.py b/src/vcspull/_internal/remotes/github.py index 855e7b70c..e0104618e 100644 --- a/src/vcspull/_internal/remotes/github.py +++ b/src/vcspull/_internal/remotes/github.py @@ -79,7 +79,7 @@ def fetch_repos(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: Parameters ---------- options : ImportOptions - Scraping options + Import options Yields ------ @@ -108,7 +108,7 @@ def _fetch_user(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: Parameters ---------- options : ImportOptions - Scraping options + Import options Yields ------ @@ -125,7 +125,7 @@ def _fetch_org(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: Parameters ---------- options : ImportOptions - Scraping options + Import options Yields ------ @@ -142,7 +142,7 @@ def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: Parameters ---------- options : ImportOptions - Scraping options + Import options Yields ------ @@ -212,7 +212,7 @@ def _paginate_repos( endpoint : str API endpoint options : ImportOptions - Scraping options + Import options Yields ------ diff --git a/src/vcspull/_internal/remotes/gitlab.py b/src/vcspull/_internal/remotes/gitlab.py index 316af0a06..c1b5a1cc4 100644 --- a/src/vcspull/_internal/remotes/gitlab.py +++ b/src/vcspull/_internal/remotes/gitlab.py @@ -80,7 +80,7 @@ def fetch_repos(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: Parameters ---------- options : ImportOptions - Scraping options + Import options Yields ------ @@ -109,7 +109,7 @@ def _fetch_user(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: Parameters ---------- options : ImportOptions - Scraping options + Import options Yields ------ @@ -126,7 +126,7 @@ def _fetch_group(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: Parameters ---------- options : ImportOptions - Scraping options + Import options Yields ------ @@ -146,7 +146,7 @@ def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: Parameters ---------- options : ImportOptions - Scraping options + Import options Yields ------ @@ -217,7 +217,7 @@ def _paginate_repos( endpoint : str API endpoint options : ImportOptions - Scraping options + Import options include_subgroups : bool Whether to include projects from subgroups From fa78f5ccf8f8953f179a68ce138d038873c0ab5a Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 8 Feb 2026 13:13:54 -0600 Subject: [PATCH 036/109] refactor(test[MockHTTPResponse]) Deduplicate MockHTTPResponse across test files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: MockHTTPResponse was defined identically in conftest.py, test_pagination_duplicates.py, and test_gitlab.py — DRY violation. what: - Remove duplicate MockHTTPResponse from test_pagination_duplicates.py - Remove duplicate MockHTTPResponse from test_gitlab.py - Import from tests._internal.remotes.conftest in both files --- tests/_internal/remotes/test_gitlab.py | 23 +------------ .../remotes/test_pagination_duplicates.py | 33 +------------------ 2 files changed, 2 insertions(+), 54 deletions(-) diff --git a/tests/_internal/remotes/test_gitlab.py b/tests/_internal/remotes/test_gitlab.py index e64308f31..aed234021 100644 --- a/tests/_internal/remotes/test_gitlab.py +++ b/tests/_internal/remotes/test_gitlab.py @@ -8,6 +8,7 @@ import pytest +from tests._internal.remotes.conftest import MockHTTPResponse from vcspull._internal.remotes.base import ImportMode, ImportOptions from vcspull._internal.remotes.gitlab import GitLabImporter @@ -173,28 +174,6 @@ def test_gitlab_uses_path_not_name( assert repos[0].name == "my-project" # Uses 'path', not 'name' -class MockHTTPResponse: - """Mock HTTP response for subgroup test.""" - - def __init__(self, body: bytes, headers: dict[str, str] | None = None) -> None: - self._body = body - self._headers = headers or {} - self.status = 200 - self.code = 200 - - def read(self) -> bytes: - return self._body - - def getheaders(self) -> list[tuple[str, str]]: - return list(self._headers.items()) - - def __enter__(self) -> MockHTTPResponse: - return self - - def __exit__(self, *args: t.Any) -> None: - pass - - def test_gitlab_subgroup_url_encoding( monkeypatch: pytest.MonkeyPatch, ) -> None: diff --git a/tests/_internal/remotes/test_pagination_duplicates.py b/tests/_internal/remotes/test_pagination_duplicates.py index 76d0658bf..dd7f4637f 100644 --- a/tests/_internal/remotes/test_pagination_duplicates.py +++ b/tests/_internal/remotes/test_pagination_duplicates.py @@ -21,6 +21,7 @@ import pytest +from tests._internal.remotes.conftest import MockHTTPResponse from vcspull._internal.remotes.base import ImportMode, ImportOptions from vcspull._internal.remotes.gitea import ( DEFAULT_PER_PAGE as GITEA_DEFAULT_PER_PAGE, @@ -105,38 +106,6 @@ def _make_gitlab_repo( } -class MockHTTPResponse: - """Mock HTTP response for testing.""" - - def __init__( - self, - body: bytes, - headers: dict[str, str] | None = None, - status: int = 200, - ) -> None: - """Initialize mock response.""" - self._body = body - self._headers = headers or {} - self.status = status - self.code = status - - def read(self) -> bytes: - """Return response body.""" - return self._body - - def getheaders(self) -> list[tuple[str, str]]: - """Return response headers as list of tuples.""" - return list(self._headers.items()) - - def __enter__(self) -> MockHTTPResponse: - """Context manager entry.""" - return self - - def __exit__(self, *args: t.Any) -> None: - """Context manager exit.""" - pass - - def test_github_pagination_consistent_per_page( monkeypatch: pytest.MonkeyPatch, ) -> None: From e8d68f3a8a899ce17e21a9c30d373eef0dd92ccf Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 8 Feb 2026 13:14:27 -0600 Subject: [PATCH 037/109] fix(remotes/codecommit[region]) Extract region from clone URL when not set why: When --region is not passed, the console URL defaults to us-east-1, producing broken links for repos in other regions. AWS clone URLs always embed the correct region in the hostname. what: - Parse region from cloneUrlHttp (git-codecommit.{region}.amazonaws.com) - Fall back to us-east-1 only when region cannot be determined from URL - Add debug log when falling back --- src/vcspull/_internal/remotes/codecommit.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/vcspull/_internal/remotes/codecommit.py b/src/vcspull/_internal/remotes/codecommit.py index db407fe8e..fd4f9c1da 100644 --- a/src/vcspull/_internal/remotes/codecommit.py +++ b/src/vcspull/_internal/remotes/codecommit.py @@ -257,7 +257,19 @@ def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo: account_id = data.get("accountId", "") # Build console URL - region = self._region or "us-east-1" + region = self._region + if not region: + # Extract region from clone URL + # (format: https://git-codecommit.{region}.amazonaws.com/...) + clone_http = data.get("cloneUrlHttp", "") + if "git-codecommit." in clone_http: + region = clone_http.split("git-codecommit.")[1].split(".")[0] + else: + region = "us-east-1" + log.debug( + "Could not determine region, defaulting to %s for console URL", + region, + ) html_url = ( f"https://{region}.console.aws.amazon.com/codesuite/codecommit/" f"repositories/{repo_name}/browse" From 0ef722c3d431613717da376f6cf41405e3c2f843 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 8 Feb 2026 13:14:45 -0600 Subject: [PATCH 038/109] docs(cli/import[--token]) Add security note to --token help text why: Tokens passed via CLI args are visible in process lists and shell history; environment variables are the safer alternative. what: - Update --token help text to recommend env var for security --- src/vcspull/cli/import_repos.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index c0e17f6ea..09fe48058 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -103,7 +103,7 @@ def create_import_subparser(parser: argparse.ArgumentParser) -> None: "--token", dest="token", metavar="TOKEN", - help="API token (overrides environment variable)", + help="API token (overrides env var; prefer env var for security)", ) parser.add_argument( "--region", From 1598bddb1db7f69b3b67291326810c0dfd853d90 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 8 Feb 2026 13:16:41 -0600 Subject: [PATCH 039/109] test(remotes/codecommit) Add CodeCommitImporter unit tests why: CodeCommitImporter (279 lines) had no dedicated tests. All other importers have test files; CodeCommit was the gap. what: - Test _check_aws_cli: FileNotFoundError and non-zero returncode - Test _build_aws_command: region/profile flag construction - Test _run_aws_command: credential, endpoint, region, JSON parse errors - Test fetch_repos: basic pipeline, empty repos, name filter, limit, batch processing (25-at-a-time grouping) - Test _parse_repo: field mapping, region extraction from clone URL, explicit region precedence, fallback to us-east-1 - Test is_authenticated: success and failure cases --- tests/_internal/remotes/test_codecommit.py | 559 +++++++++++++++++++++ 1 file changed, 559 insertions(+) create mode 100644 tests/_internal/remotes/test_codecommit.py diff --git a/tests/_internal/remotes/test_codecommit.py b/tests/_internal/remotes/test_codecommit.py new file mode 100644 index 000000000..c757afb39 --- /dev/null +++ b/tests/_internal/remotes/test_codecommit.py @@ -0,0 +1,559 @@ +"""Tests for vcspull._internal.remotes.codecommit module.""" + +from __future__ import annotations + +import json +import subprocess +import typing as t + +import pytest + +from vcspull._internal.remotes.base import ImportOptions +from vcspull._internal.remotes.codecommit import CodeCommitImporter + + +def _aws_ok( + stdout: str = "", + stderr: str = "", +) -> subprocess.CompletedProcess[str]: + """Create a successful subprocess result.""" + return subprocess.CompletedProcess( + args=["aws"], + returncode=0, + stdout=stdout, + stderr=stderr, + ) + + +def _aws_err( + stderr: str = "", + returncode: int = 1, +) -> subprocess.CompletedProcess[str]: + """Create a failed subprocess result.""" + return subprocess.CompletedProcess( + args=["aws"], + returncode=returncode, + stdout="", + stderr=stderr, + ) + + +def _make_cc_repo( + name: str, + *, + region: str = "us-east-1", + account_id: str = "123456789012", + default_branch: str = "main", + description: str | None = None, +) -> dict[str, t.Any]: + """Create a CodeCommit repository metadata dict.""" + return { + "repositoryName": name, + "cloneUrlHttp": ( + f"https://git-codecommit.{region}.amazonaws.com/v1/repos/{name}" + ), + "cloneUrlSsh": (f"ssh://git-codecommit.{region}.amazonaws.com/v1/repos/{name}"), + "accountId": account_id, + "defaultBranch": default_branch, + "repositoryDescription": description, + } + + +# --------------------------------------------------------------------------- +# _check_aws_cli +# --------------------------------------------------------------------------- + + +def test_check_aws_cli_file_not_found(monkeypatch: pytest.MonkeyPatch) -> None: + """Test _check_aws_cli raises DependencyError when aws binary missing.""" + from vcspull._internal.remotes.base import DependencyError + + def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str]: + msg = "aws" + raise FileNotFoundError(msg) + + monkeypatch.setattr("subprocess.run", mock_run) + + with pytest.raises(DependencyError, match="not installed"): + CodeCommitImporter() + + +def test_check_aws_cli_nonzero_returncode(monkeypatch: pytest.MonkeyPatch) -> None: + """Test _check_aws_cli raises DependencyError for non-zero returncode.""" + from vcspull._internal.remotes.base import DependencyError + + monkeypatch.setattr("subprocess.run", lambda cmd, **kw: _aws_err()) + + with pytest.raises(DependencyError, match="not installed"): + CodeCommitImporter() + + +# --------------------------------------------------------------------------- +# _build_aws_command +# --------------------------------------------------------------------------- + + +def test_build_aws_command_no_flags(monkeypatch: pytest.MonkeyPatch) -> None: + """Test _build_aws_command with no region/profile.""" + monkeypatch.setattr("subprocess.run", lambda cmd, **kw: _aws_ok("aws-cli/2.x")) + + importer = CodeCommitImporter() + result = importer._build_aws_command("codecommit", "list-repositories") + assert result == ["aws", "codecommit", "list-repositories"] + + +def test_build_aws_command_with_region(monkeypatch: pytest.MonkeyPatch) -> None: + """Test _build_aws_command appends --region.""" + monkeypatch.setattr("subprocess.run", lambda cmd, **kw: _aws_ok("aws-cli/2.x")) + + importer = CodeCommitImporter(region="eu-west-1") + result = importer._build_aws_command("codecommit", "list-repositories") + assert result == [ + "aws", + "--region", + "eu-west-1", + "codecommit", + "list-repositories", + ] + + +def test_build_aws_command_with_profile(monkeypatch: pytest.MonkeyPatch) -> None: + """Test _build_aws_command appends --profile.""" + monkeypatch.setattr("subprocess.run", lambda cmd, **kw: _aws_ok("aws-cli/2.x")) + + importer = CodeCommitImporter(profile="myprofile") + result = importer._build_aws_command("codecommit", "list-repositories") + assert result == [ + "aws", + "--profile", + "myprofile", + "codecommit", + "list-repositories", + ] + + +def test_build_aws_command_with_region_and_profile( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test _build_aws_command with both region and profile.""" + monkeypatch.setattr("subprocess.run", lambda cmd, **kw: _aws_ok("aws-cli/2.x")) + + importer = CodeCommitImporter(region="ap-south-1", profile="prod") + result = importer._build_aws_command("sts", "get-caller-identity") + assert result == [ + "aws", + "--region", + "ap-south-1", + "--profile", + "prod", + "sts", + "get-caller-identity", + ] + + +# --------------------------------------------------------------------------- +# _run_aws_command — error handling +# --------------------------------------------------------------------------- + + +class RunAwsErrorFixture(t.NamedTuple): + """Fixture for _run_aws_command error test cases.""" + + test_id: str + stderr: str + expected_error_type: str + expected_match: str + + +RUN_AWS_ERROR_FIXTURES: list[RunAwsErrorFixture] = [ + RunAwsErrorFixture( + test_id="credential-error", + stderr="Unable to locate credentials", + expected_error_type="AuthenticationError", + expected_match="credentials not configured", + ), + RunAwsErrorFixture( + test_id="endpoint-connection-error", + stderr="Could not connect to the endpoint URL", + expected_error_type="ConfigurationError", + expected_match="Could not connect", + ), + RunAwsErrorFixture( + test_id="invalid-region-error", + stderr="Invalid region: foobar-1", + expected_error_type="ConfigurationError", + expected_match="Invalid AWS region", + ), + RunAwsErrorFixture( + test_id="generic-aws-error", + stderr="Something unexpected happened", + expected_error_type="ConfigurationError", + expected_match="AWS CLI error", + ), +] + + +@pytest.mark.parametrize( + list(RunAwsErrorFixture._fields), + RUN_AWS_ERROR_FIXTURES, + ids=[f.test_id for f in RUN_AWS_ERROR_FIXTURES], +) +def test_run_aws_command_errors( + test_id: str, + stderr: str, + expected_error_type: str, + expected_match: str, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test _run_aws_command handles various AWS CLI errors.""" + from vcspull._internal.remotes import base + + call_count = 0 + + def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str]: + nonlocal call_count + call_count += 1 + # First call is _check_aws_cli — succeed + if call_count == 1: + return _aws_ok("aws-cli/2.x") + # Subsequent calls fail with the test error + return _aws_err(stderr=stderr) + + monkeypatch.setattr("subprocess.run", mock_run) + importer = CodeCommitImporter() + + error_class = getattr(base, expected_error_type) + with pytest.raises(error_class, match=expected_match): + importer._run_aws_command("codecommit", "list-repositories") + + +def test_run_aws_command_json_parse_error(monkeypatch: pytest.MonkeyPatch) -> None: + """Test _run_aws_command raises ConfigurationError for invalid JSON.""" + from vcspull._internal.remotes.base import ConfigurationError + + call_count = 0 + + def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str]: + nonlocal call_count + call_count += 1 + if call_count == 1: + return _aws_ok("aws-cli/2.x") + return _aws_ok(stdout="not valid json {{{") + + monkeypatch.setattr("subprocess.run", mock_run) + importer = CodeCommitImporter() + + with pytest.raises(ConfigurationError, match="Invalid JSON"): + importer._run_aws_command("codecommit", "list-repositories") + + +def test_run_aws_command_file_not_found(monkeypatch: pytest.MonkeyPatch) -> None: + """Test _run_aws_command raises DependencyError when aws disappears mid-session.""" + from vcspull._internal.remotes.base import DependencyError + + call_count = 0 + + def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str]: + nonlocal call_count + call_count += 1 + if call_count == 1: + return _aws_ok("aws-cli/2.x") + msg = "aws" + raise FileNotFoundError(msg) + + monkeypatch.setattr("subprocess.run", mock_run) + importer = CodeCommitImporter() + + with pytest.raises(DependencyError, match="not found"): + importer._run_aws_command("codecommit", "list-repositories") + + +# --------------------------------------------------------------------------- +# fetch_repos +# --------------------------------------------------------------------------- + + +def test_fetch_repos_basic(monkeypatch: pytest.MonkeyPatch) -> None: + """Test fetch_repos returns repos from list + batch-get pipeline.""" + repos_data = [_make_cc_repo("my-repo"), _make_cc_repo("other-repo")] + + call_count = 0 + + def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str]: + nonlocal call_count + call_count += 1 + if call_count == 1: + # _check_aws_cli + return _aws_ok("aws-cli/2.x") + if "list-repositories" in cmd: + return _aws_ok( + json.dumps( + { + "repositories": [ + {"repositoryName": "my-repo"}, + {"repositoryName": "other-repo"}, + ] + } + ) + ) + if "batch-get-repositories" in cmd: + return _aws_ok(json.dumps({"repositories": repos_data})) + return _aws_err(stderr="unknown command") + + monkeypatch.setattr("subprocess.run", mock_run) + importer = CodeCommitImporter() + options = ImportOptions() + repos = list(importer.fetch_repos(options)) + + assert len(repos) == 2 + assert repos[0].name == "my-repo" + assert repos[1].name == "other-repo" + + +def test_fetch_repos_empty(monkeypatch: pytest.MonkeyPatch) -> None: + """Test fetch_repos returns nothing when no repositories exist.""" + call_count = 0 + + def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str]: + nonlocal call_count + call_count += 1 + if call_count == 1: + return _aws_ok("aws-cli/2.x") + if "list-repositories" in cmd: + return _aws_ok(json.dumps({"repositories": []})) + return _aws_err(stderr="unknown command") + + monkeypatch.setattr("subprocess.run", mock_run) + importer = CodeCommitImporter() + options = ImportOptions() + repos = list(importer.fetch_repos(options)) + + assert len(repos) == 0 + + +def test_fetch_repos_name_filter(monkeypatch: pytest.MonkeyPatch) -> None: + """Test fetch_repos filters by target name.""" + repos_data = [_make_cc_repo("django-app")] + + call_count = 0 + + def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str]: + nonlocal call_count + call_count += 1 + if call_count == 1: + return _aws_ok("aws-cli/2.x") + if "list-repositories" in cmd: + return _aws_ok( + json.dumps( + { + "repositories": [ + {"repositoryName": "django-app"}, + {"repositoryName": "flask-app"}, + {"repositoryName": "react-app"}, + ] + } + ) + ) + if "batch-get-repositories" in cmd: + # Only django-app should be requested + assert "django-app" in cmd + return _aws_ok(json.dumps({"repositories": repos_data})) + return _aws_err(stderr="unknown command") + + monkeypatch.setattr("subprocess.run", mock_run) + importer = CodeCommitImporter() + options = ImportOptions(target="django") + repos = list(importer.fetch_repos(options)) + + assert len(repos) == 1 + assert repos[0].name == "django-app" + + +def test_fetch_repos_limit(monkeypatch: pytest.MonkeyPatch) -> None: + """Test fetch_repos respects limit option.""" + repos_data = [_make_cc_repo(f"repo{i}") for i in range(5)] + + call_count = 0 + + def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str]: + nonlocal call_count + call_count += 1 + if call_count == 1: + return _aws_ok("aws-cli/2.x") + if "list-repositories" in cmd: + return _aws_ok( + json.dumps( + {"repositories": [{"repositoryName": f"repo{i}"} for i in range(5)]} + ) + ) + if "batch-get-repositories" in cmd: + return _aws_ok(json.dumps({"repositories": repos_data})) + return _aws_err(stderr="unknown command") + + monkeypatch.setattr("subprocess.run", mock_run) + importer = CodeCommitImporter() + options = ImportOptions(limit=2) + repos = list(importer.fetch_repos(options)) + + assert len(repos) == 2 + + +def test_fetch_repos_batch_processing(monkeypatch: pytest.MonkeyPatch) -> None: + """Test fetch_repos batches in groups of 25.""" + # Create 30 repos — should result in 2 batch-get calls (25 + 5) + batch_get_calls: list[list[str]] = [] + + call_count = 0 + + def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str]: + nonlocal call_count + call_count += 1 + if call_count == 1: + return _aws_ok("aws-cli/2.x") + if "list-repositories" in cmd: + return _aws_ok( + json.dumps( + { + "repositories": [ + {"repositoryName": f"repo{i}"} for i in range(30) + ] + } + ) + ) + if "batch-get-repositories" in cmd: + # Extract repo names from command (after --repository-names) + names_idx = cmd.index("--repository-names") + 1 + repo_names = cmd[names_idx:] + batch_get_calls.append(repo_names) + repos = [_make_cc_repo(name) for name in repo_names] + return _aws_ok(json.dumps({"repositories": repos})) + return _aws_err(stderr="unknown command") + + monkeypatch.setattr("subprocess.run", mock_run) + importer = CodeCommitImporter() + options = ImportOptions(limit=100) + repos = list(importer.fetch_repos(options)) + + assert len(repos) == 30 + assert len(batch_get_calls) == 2 + assert len(batch_get_calls[0]) == 25 + assert len(batch_get_calls[1]) == 5 + + +# --------------------------------------------------------------------------- +# _parse_repo — region extraction +# --------------------------------------------------------------------------- + + +def test_parse_repo_region_from_clone_url(monkeypatch: pytest.MonkeyPatch) -> None: + """Test _parse_repo extracts region from clone URL when not set.""" + monkeypatch.setattr("subprocess.run", lambda cmd, **kw: _aws_ok("aws-cli/2.x")) + + # No region set — should extract from clone URL + importer = CodeCommitImporter(region=None) + data = _make_cc_repo("my-repo", region="us-west-2") + repo = importer._parse_repo(data) + + assert "us-west-2" in repo.html_url + assert "us-east-1" not in repo.html_url + + +def test_parse_repo_region_explicit(monkeypatch: pytest.MonkeyPatch) -> None: + """Test _parse_repo uses explicit region when set.""" + monkeypatch.setattr("subprocess.run", lambda cmd, **kw: _aws_ok("aws-cli/2.x")) + + importer = CodeCommitImporter(region="eu-central-1") + data = _make_cc_repo("my-repo", region="us-west-2") + repo = importer._parse_repo(data) + + # Explicit region takes precedence over clone URL + assert "eu-central-1" in repo.html_url + + +def test_parse_repo_fallback_region(monkeypatch: pytest.MonkeyPatch) -> None: + """Test _parse_repo falls back to us-east-1 when no region info available.""" + monkeypatch.setattr("subprocess.run", lambda cmd, **kw: _aws_ok("aws-cli/2.x")) + + importer = CodeCommitImporter(region=None) + # Data without a recognizable clone URL + data = { + "repositoryName": "my-repo", + "cloneUrlHttp": "", + "cloneUrlSsh": "", + "accountId": "123456789012", + } + repo = importer._parse_repo(data) + + assert "us-east-1" in repo.html_url + + +def test_parse_repo_fields(monkeypatch: pytest.MonkeyPatch) -> None: + """Test _parse_repo maps all fields correctly.""" + monkeypatch.setattr("subprocess.run", lambda cmd, **kw: _aws_ok("aws-cli/2.x")) + + importer = CodeCommitImporter(region="us-east-1") + data = _make_cc_repo( + "test-repo", + region="us-east-1", + account_id="999888777666", + default_branch="develop", + description="A test repository", + ) + repo = importer._parse_repo(data) + + assert repo.name == "test-repo" + assert "git-codecommit.us-east-1" in repo.clone_url + assert "git-codecommit.us-east-1" in repo.ssh_url + assert repo.description == "A test repository" + assert repo.language is None + assert repo.topics == () + assert repo.stars == 0 + assert repo.is_fork is False + assert repo.is_archived is False + assert repo.default_branch == "develop" + assert repo.owner == "999888777666" + + +# --------------------------------------------------------------------------- +# is_authenticated +# --------------------------------------------------------------------------- + + +def test_is_authenticated_success(monkeypatch: pytest.MonkeyPatch) -> None: + """Test is_authenticated returns True when sts get-caller-identity succeeds.""" + call_count = 0 + + def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str]: + nonlocal call_count + call_count += 1 + if call_count == 1: + return _aws_ok("aws-cli/2.x") + # sts get-caller-identity succeeds + return _aws_ok( + json.dumps( + {"UserId": "AIDA...", "Account": "123456789012", "Arn": "arn:..."} + ) + ) + + monkeypatch.setattr("subprocess.run", mock_run) + importer = CodeCommitImporter() + + assert importer.is_authenticated is True + + +def test_is_authenticated_failure(monkeypatch: pytest.MonkeyPatch) -> None: + """Test is_authenticated returns False when credentials are missing.""" + call_count = 0 + + def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str]: + nonlocal call_count + call_count += 1 + if call_count == 1: + return _aws_ok("aws-cli/2.x") + # sts get-caller-identity fails with credential error + return _aws_err(stderr="Unable to locate credentials") + + monkeypatch.setattr("subprocess.run", mock_run) + importer = CodeCommitImporter() + + assert importer.is_authenticated is False From 569fe79d4877ebaedf8ad981b0e7f3601cf67b32 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 8 Feb 2026 13:48:18 -0600 Subject: [PATCH 040/109] fix(remotes/codecommit[output]) Force --output json in AWS CLI commands why: _run_aws_command parses stdout as JSON but doesn't force JSON output format. If the user's ~/.aws/config sets output=table or output=text, json.loads() fails with an unhelpful ConfigurationError("Invalid JSON from AWS CLI: ..."). what: - Add --output json to _build_aws_command base command - Update test assertions for exact command lists --- src/vcspull/_internal/remotes/codecommit.py | 2 +- tests/_internal/remotes/test_codecommit.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/vcspull/_internal/remotes/codecommit.py b/src/vcspull/_internal/remotes/codecommit.py index fd4f9c1da..380c62208 100644 --- a/src/vcspull/_internal/remotes/codecommit.py +++ b/src/vcspull/_internal/remotes/codecommit.py @@ -90,7 +90,7 @@ def _build_aws_command(self, *args: str) -> list[str]: list[str] Complete command list """ - cmd = ["aws"] + cmd = ["aws", "--output", "json"] if self._region: cmd.extend(["--region", self._region]) if self._profile: diff --git a/tests/_internal/remotes/test_codecommit.py b/tests/_internal/remotes/test_codecommit.py index c757afb39..ffcf1c758 100644 --- a/tests/_internal/remotes/test_codecommit.py +++ b/tests/_internal/remotes/test_codecommit.py @@ -99,7 +99,7 @@ def test_build_aws_command_no_flags(monkeypatch: pytest.MonkeyPatch) -> None: importer = CodeCommitImporter() result = importer._build_aws_command("codecommit", "list-repositories") - assert result == ["aws", "codecommit", "list-repositories"] + assert result == ["aws", "--output", "json", "codecommit", "list-repositories"] def test_build_aws_command_with_region(monkeypatch: pytest.MonkeyPatch) -> None: @@ -110,6 +110,8 @@ def test_build_aws_command_with_region(monkeypatch: pytest.MonkeyPatch) -> None: result = importer._build_aws_command("codecommit", "list-repositories") assert result == [ "aws", + "--output", + "json", "--region", "eu-west-1", "codecommit", @@ -125,6 +127,8 @@ def test_build_aws_command_with_profile(monkeypatch: pytest.MonkeyPatch) -> None result = importer._build_aws_command("codecommit", "list-repositories") assert result == [ "aws", + "--output", + "json", "--profile", "myprofile", "codecommit", @@ -142,6 +146,8 @@ def test_build_aws_command_with_region_and_profile( result = importer._build_aws_command("sts", "get-caller-identity") assert result == [ "aws", + "--output", + "json", "--region", "ap-south-1", "--profile", From 309aa1cb7bdb8b653d7af021e6914325558e1c4b Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 8 Feb 2026 13:49:15 -0600 Subject: [PATCH 041/109] fix(remotes/gitlab[archived]) Fix archived parameter semantics why: GitLab API archived=true returns ONLY archived projects, not "include archived". Setting archived=true when include_archived=True paradoxically excluded all active projects. The correct behavior: omit the param to get all projects. what: - Only set archived=false when excluding archived projects - Omit archived param entirely when including archived (returns all) - Add tests verifying param presence/absence in API URLs --- src/vcspull/_internal/remotes/gitlab.py | 5 +- tests/_internal/remotes/test_gitlab.py | 87 +++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 3 deletions(-) diff --git a/src/vcspull/_internal/remotes/gitlab.py b/src/vcspull/_internal/remotes/gitlab.py index c1b5a1cc4..530e7cbf6 100644 --- a/src/vcspull/_internal/remotes/gitlab.py +++ b/src/vcspull/_internal/remotes/gitlab.py @@ -242,10 +242,9 @@ def _paginate_repos( if include_subgroups: params["include_subgroups"] = "true" - if options.include_archived: - params["archived"] = "true" - else: + if not options.include_archived: params["archived"] = "false" + # When include_archived=True, omit the param to get all projects data, _headers = self._client.get( endpoint, diff --git a/tests/_internal/remotes/test_gitlab.py b/tests/_internal/remotes/test_gitlab.py index aed234021..e6ccca7fe 100644 --- a/tests/_internal/remotes/test_gitlab.py +++ b/tests/_internal/remotes/test_gitlab.py @@ -269,3 +269,90 @@ def urlopen_capture( assert len(repos) == 1 assert repos[0].name == "deep-project" + + +def test_gitlab_archived_param_omitted_when_including( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test that archived param is omitted when include_archived=True. + + GitLab API: archived=true returns ONLY archived projects. + Omitting the param returns all projects (archived + non-archived). + """ + captured_urls: list[str] = [] + + response_json = [ + { + "path": "project1", + "name": "Project 1", + "http_url_to_repo": "https://gitlab.com/user/project1.git", + "ssh_url_to_repo": "git@gitlab.com:user/project1.git", + "web_url": "https://gitlab.com/user/project1", + "description": "Active project", + "topics": [], + "star_count": 10, + "archived": False, + "default_branch": "main", + "namespace": {"path": "user"}, + } + ] + + def urlopen_capture( + request: urllib.request.Request, + timeout: int | None = None, + ) -> MockHTTPResponse: + captured_urls.append(request.full_url) + return MockHTTPResponse(json.dumps(response_json).encode()) + + monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) + + importer = GitLabImporter() + options = ImportOptions(mode=ImportMode.USER, target="user", include_archived=True) + list(importer.fetch_repos(options)) + + assert len(captured_urls) == 1 + # archived param should NOT be in the URL when include_archived=True + assert "archived=" not in captured_urls[0], ( + f"Expected no 'archived' param in URL, got: {captured_urls[0]}" + ) + + +def test_gitlab_archived_param_false_when_excluding( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test that archived=false is set when include_archived=False.""" + captured_urls: list[str] = [] + + response_json = [ + { + "path": "project1", + "name": "Project 1", + "http_url_to_repo": "https://gitlab.com/user/project1.git", + "ssh_url_to_repo": "git@gitlab.com:user/project1.git", + "web_url": "https://gitlab.com/user/project1", + "description": "Active project", + "topics": [], + "star_count": 10, + "archived": False, + "default_branch": "main", + "namespace": {"path": "user"}, + } + ] + + def urlopen_capture( + request: urllib.request.Request, + timeout: int | None = None, + ) -> MockHTTPResponse: + captured_urls.append(request.full_url) + return MockHTTPResponse(json.dumps(response_json).encode()) + + monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) + + importer = GitLabImporter() + options = ImportOptions(mode=ImportMode.USER, target="user", include_archived=False) + list(importer.fetch_repos(options)) + + assert len(captured_urls) == 1 + assert "archived=false" in captured_urls[0], ( + f"Expected 'archived=false' in URL, got: {captured_urls[0]}" + ) From d77ffb3d05245fbf9155e2fb540e825c7e5cd6f3 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 8 Feb 2026 13:50:27 -0600 Subject: [PATCH 042/109] fix(remotes[topics]) Guard against null topics in API responses why: APIs can return "topics": null instead of an empty array. dict.get("topics", []) returns None when the key exists with null value, causing tuple(None) to crash with TypeError: 'NoneType' object is not iterable. what: - Change data.get("topics", []) to data.get("topics") or [] in GitHub, Gitea, GitLab - Fix GitLab fallback chain: data.get("topics") or data.get("tag_list") or [] - Add null topics tests for all three importers --- src/vcspull/_internal/remotes/gitea.py | 2 +- src/vcspull/_internal/remotes/github.py | 2 +- src/vcspull/_internal/remotes/gitlab.py | 2 +- tests/_internal/remotes/test_gitea.py | 33 ++++++++++++++++++++ tests/_internal/remotes/test_github.py | 41 +++++++++++++++++++++++++ tests/_internal/remotes/test_gitlab.py | 32 +++++++++++++++++++ 6 files changed, 109 insertions(+), 3 deletions(-) diff --git a/src/vcspull/_internal/remotes/gitea.py b/src/vcspull/_internal/remotes/gitea.py index a33eac898..690980e92 100644 --- a/src/vcspull/_internal/remotes/gitea.py +++ b/src/vcspull/_internal/remotes/gitea.py @@ -289,7 +289,7 @@ def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo: html_url=data.get("html_url", ""), description=data.get("description"), language=data.get("language"), - topics=tuple(data.get("topics", [])), + topics=tuple(data.get("topics") or []), stars=data.get("stars_count", 0), # Note: Gitea uses stars_count is_fork=data.get("fork", False), is_archived=data.get("archived", False), diff --git a/src/vcspull/_internal/remotes/github.py b/src/vcspull/_internal/remotes/github.py index e0104618e..a191e34cf 100644 --- a/src/vcspull/_internal/remotes/github.py +++ b/src/vcspull/_internal/remotes/github.py @@ -278,7 +278,7 @@ def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo: html_url=data["html_url"], description=data.get("description"), language=data.get("language"), - topics=tuple(data.get("topics", [])), + topics=tuple(data.get("topics") or []), stars=data.get("stargazers_count", 0), is_fork=data.get("fork", False), is_archived=data.get("archived", False), diff --git a/src/vcspull/_internal/remotes/gitlab.py b/src/vcspull/_internal/remotes/gitlab.py index 530e7cbf6..cb9e00557 100644 --- a/src/vcspull/_internal/remotes/gitlab.py +++ b/src/vcspull/_internal/remotes/gitlab.py @@ -300,7 +300,7 @@ def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo: html_url=data.get("web_url", ""), description=data.get("description"), language=None, # GitLab doesn't return language in list endpoints - topics=tuple(data.get("topics", data.get("tag_list", []))), + topics=tuple(data.get("topics") or data.get("tag_list") or []), stars=data.get("star_count", 0), is_fork=is_fork, is_archived=data.get("archived", False), diff --git a/tests/_internal/remotes/test_gitea.py b/tests/_internal/remotes/test_gitea.py index b08f5af21..d3ba212de 100644 --- a/tests/_internal/remotes/test_gitea.py +++ b/tests/_internal/remotes/test_gitea.py @@ -154,6 +154,39 @@ def test_gitea_uses_stars_count_field( assert repos[0].stars == 500 +def test_gitea_handles_null_topics( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test Gitea handles null topics in API response. + + Gitea API can return "topics": null instead of an empty array. + dict.get("topics", []) returns None when the key exists with null value, + causing tuple(None) to crash with TypeError. + """ + response_json = [ + { + "name": "null-topics-repo", + "clone_url": "https://codeberg.org/user/null-topics-repo.git", + "ssh_url": "git@codeberg.org:user/null-topics-repo.git", + "html_url": "https://codeberg.org/user/null-topics-repo", + "description": "Repo with null topics", + "language": "Python", + "topics": None, + "stars_count": 10, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "user"}, + } + ] + mock_urlopen([(json.dumps(response_json).encode(), {}, 200)]) + importer = GiteaImporter(base_url="https://codeberg.org") + options = ImportOptions(mode=ImportMode.USER, target="user") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].topics == () + + def test_gitea_filters_by_language( mock_urlopen: t.Callable[..., None], ) -> None: diff --git a/tests/_internal/remotes/test_github.py b/tests/_internal/remotes/test_github.py index d286756aa..b731f49d2 100644 --- a/tests/_internal/remotes/test_github.py +++ b/tests/_internal/remotes/test_github.py @@ -344,6 +344,47 @@ def test_github_importer_service_name() -> None: assert importer.service_name == "GitHub" +def test_github_handles_null_topics( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test GitHub handles null topics in API response. + + GitHub API can return "topics": null instead of an empty array. + dict.get("topics", []) returns None when the key exists with null value, + causing tuple(None) to crash with TypeError. + """ + response_json = [ + { + "name": "null-topics-repo", + "clone_url": "https://github.com/user/null-topics-repo.git", + "ssh_url": "git@github.com:user/null-topics-repo.git", + "html_url": "https://github.com/user/null-topics-repo", + "description": "Repo with null topics", + "language": "Python", + "topics": None, + "stargazers_count": 10, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "user"}, + } + ] + mock_urlopen( + [ + ( + json.dumps(response_json).encode(), + {"x-ratelimit-remaining": "100", "x-ratelimit-limit": "60"}, + 200, + ) + ] + ) + importer = GitHubImporter() + options = ImportOptions(mode=ImportMode.USER, target="user") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].topics == () + + def test_github_limit_respected( mock_urlopen: t.Callable[..., None], ) -> None: diff --git a/tests/_internal/remotes/test_gitlab.py b/tests/_internal/remotes/test_gitlab.py index e6ccca7fe..d6ab0080c 100644 --- a/tests/_internal/remotes/test_gitlab.py +++ b/tests/_internal/remotes/test_gitlab.py @@ -271,6 +271,38 @@ def urlopen_capture( assert repos[0].name == "deep-project" +def test_gitlab_handles_null_topics( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test GitLab handles null topics in API response. + + GitLab API can return "topics": null instead of an empty array. + dict.get("topics", []) returns None when the key exists with null value, + causing tuple(None) to crash with TypeError. + """ + response_json = [ + { + "path": "null-topics-project", + "name": "Null Topics Project", + "http_url_to_repo": "https://gitlab.com/user/null-topics-project.git", + "ssh_url_to_repo": "git@gitlab.com:user/null-topics-project.git", + "web_url": "https://gitlab.com/user/null-topics-project", + "description": "Project with null topics", + "topics": None, + "star_count": 10, + "archived": False, + "default_branch": "main", + "namespace": {"path": "user"}, + } + ] + mock_urlopen([(json.dumps(response_json).encode(), {}, 200)]) + importer = GitLabImporter() + options = ImportOptions(mode=ImportMode.USER, target="user") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].topics == () + + def test_gitlab_archived_param_omitted_when_including( monkeypatch: pytest.MonkeyPatch, ) -> None: From 92ab260f51bdb5474b54e8fd88cba5e953a46329 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 8 Feb 2026 13:51:19 -0600 Subject: [PATCH 043/109] fix(test[codecommit]) Mock subprocess in _get_importer CodeCommit tests why: test_get_importer creates real CodeCommitImporter instances which call _check_aws_cli() running subprocess.run(["aws", "--version"]). Tests fail in CI environments without the AWS CLI installed. what: - Add monkeypatch for subprocess.run when service is codecommit/cc/aws - Add subprocess import to test file --- tests/cli/test_import_repos.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index b58049edd..cdae4a8ec 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -5,6 +5,7 @@ import json import logging import pathlib +import subprocess import sys import typing as t @@ -227,8 +228,18 @@ def test_get_importer( profile: str | None, expected_type_name: str, expected_error: str | None, + monkeypatch: MonkeyPatch, ) -> None: """Test _get_importer creates the correct importer type.""" + # Mock subprocess.run for CodeCommit tests (aws --version check) + if service in ("codecommit", "cc", "aws"): + monkeypatch.setattr( + "subprocess.run", + lambda cmd, **kwargs: subprocess.CompletedProcess( + cmd, 0, stdout="aws-cli/2.x", stderr="" + ), + ) + if expected_error: with pytest.raises(ValueError, match=expected_error): _get_importer( From a3cf645a242199c087d9f22f0b72250849d978e5 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 10:28:03 -0600 Subject: [PATCH 044/109] fix(cli/import[gitlab_groups]) Preserve namespace by default why: GitLab org imports were flattened because owner parsing used leaf namespace path, so subgroup ancestry was lost before workspace mapping. what: - Prefer namespace.full_path for GitLab owner derivation with path_with_namespace fallback - Add --flatten-groups flag for GitLab org imports to opt into flattened behavior - Keep nested subgroup workspace mapping as default and wire flatten_groups through CLI dispatch --- src/vcspull/_internal/remotes/gitlab.py | 10 +++- src/vcspull/cli/__init__.py | 1 + src/vcspull/cli/import_repos.py | 76 +++++++++++++++++++------ 3 files changed, 68 insertions(+), 19 deletions(-) diff --git a/src/vcspull/_internal/remotes/gitlab.py b/src/vcspull/_internal/remotes/gitlab.py index cb9e00557..e735e4d3c 100644 --- a/src/vcspull/_internal/remotes/gitlab.py +++ b/src/vcspull/_internal/remotes/gitlab.py @@ -286,9 +286,15 @@ def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo: # Use 'path' instead of 'name' for filesystem-safe name name = data.get("path", data.get("name", "")) - # Determine owner from namespace + # Prefer the full namespace path for subgroup-aware import behavior. namespace = data.get("namespace", {}) - owner = namespace.get("path", namespace.get("name", "")) + owner = namespace.get("full_path") + if not owner: + path_with_namespace = data.get("path_with_namespace") + if isinstance(path_with_namespace, str) and "/" in path_with_namespace: + owner = path_with_namespace.rsplit("/", 1)[0] + else: + owner = namespace.get("path", namespace.get("name", "")) # Check if it's a fork is_fork = data.get("forked_from_project") is not None diff --git a/src/vcspull/cli/__init__.py b/src/vcspull/cli/__init__.py index 14ac60a0e..454263f14 100644 --- a/src/vcspull/cli/__init__.py +++ b/src/vcspull/cli/__init__.py @@ -525,4 +525,5 @@ def cli(_args: list[str] | None = None) -> None: output_ndjson=getattr(args, "output_ndjson", False), color=getattr(args, "color", "auto"), use_https=getattr(args, "use_https", False), + flatten_groups=getattr(args, "flatten_groups", False), ) diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index 09fe48058..e2814a95d 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -201,6 +201,15 @@ def create_import_subparser(parser: argparse.ArgumentParser) -> None: dest="use_https", help="Use HTTPS clone URLs instead of SSH (default: SSH)", ) + output_group.add_argument( + "--flatten-groups", + action="store_true", + dest="flatten_groups", + help=( + "For GitLab --mode org, flatten subgroup repositories into the base " + "workspace instead of preserving subgroup paths" + ), + ) output_group.add_argument( "--color", choices=["auto", "always", "never"], @@ -317,6 +326,7 @@ def import_repos( output_ndjson: bool, color: str, use_https: bool = False, + flatten_groups: bool = False, ) -> None: """Import repositories from a remote service. @@ -364,6 +374,8 @@ def import_repos( Color mode use_https : bool Use HTTPS clone URLs instead of SSH (default: False, i.e., SSH) + flatten_groups : bool + For GitLab org imports, flatten subgroup paths into base workspace """ output_mode = get_output_mode(output_json, output_ndjson) formatter = OutputFormatter(output_mode) @@ -421,7 +433,6 @@ def import_repos( workspace_path = pathlib.Path(workspace).expanduser().resolve() cwd = pathlib.Path.cwd() home = pathlib.Path.home() - workspace_label = workspace_root_label(workspace_path, cwd=cwd, home=home) # Resolve config file try: @@ -587,29 +598,60 @@ def import_repos( raw_config = {} # Add repositories to config - if workspace_label in raw_config and not isinstance( - raw_config[workspace_label], dict - ): - log.error( - "%s✗%s Workspace section '%s' is not a mapping in config", - Fore.RED, - Style.RESET_ALL, - workspace_label, - ) - return - - if workspace_label not in raw_config: - raw_config[workspace_label] = {} - + checked_labels: set[str] = set() added_count = 0 skipped_count = 0 for repo in repos: - if repo.name in raw_config[workspace_label]: + # Determine workspace for this repo + repo_workspace_path = workspace_path + + preserve_group_structure = ( + normalized_service == "gitlab" + and options.mode == ImportMode.ORG + and not flatten_groups + ) + if preserve_group_structure and repo.owner.startswith(options.target): + # Check if it is a subdirectory + if repo.owner == options.target: + subpath = "" + elif repo.owner.startswith(options.target + "/"): + subpath = repo.owner[len(options.target) + 1 :] + else: + subpath = "" + + if subpath: + repo_workspace_path = workspace_path / subpath + + repo_workspace_label = workspace_root_label( + repo_workspace_path, cwd=cwd, home=home + ) + + if repo_workspace_label not in checked_labels: + if repo_workspace_label in raw_config and not isinstance( + raw_config[repo_workspace_label], dict + ): + log.error( + "%s✗%s Workspace section '%s' is not a mapping in config", + Fore.RED, + Style.RESET_ALL, + repo_workspace_label, + ) + checked_labels.add(repo_workspace_label) + + if repo_workspace_label in raw_config and not isinstance( + raw_config[repo_workspace_label], dict + ): + continue + + if repo_workspace_label not in raw_config: + raw_config[repo_workspace_label] = {} + + if repo.name in raw_config[repo_workspace_label]: skipped_count += 1 continue - raw_config[workspace_label][repo.name] = { + raw_config[repo_workspace_label][repo.name] = { "repo": repo.to_vcspull_url(use_ssh=not use_https), } added_count += 1 From fc6b152a8c7be9006fe0d45974548f84c01b2b8d Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 10:28:08 -0600 Subject: [PATCH 045/109] test(cli/import[gitlab]) Cover nested and flatten group variants why: We need regression coverage for full namespace ownership and optional flattening semantics in GitLab org imports. what: - Add GitLab owner parsing tests for namespace.full_path and path_with_namespace fallback - Extend import CLI tests for --flatten-groups argument handling - Expand nested-group matrix to cover flatten flag and subdirectory workspace roots --- tests/_internal/remotes/test_gitlab.py | 83 ++++++++++ tests/cli/test_import_repos.py | 208 +++++++++++++++++++++++++ 2 files changed, 291 insertions(+) diff --git a/tests/_internal/remotes/test_gitlab.py b/tests/_internal/remotes/test_gitlab.py index d6ab0080c..c9aacd954 100644 --- a/tests/_internal/remotes/test_gitlab.py +++ b/tests/_internal/remotes/test_gitlab.py @@ -54,6 +54,89 @@ def test_gitlab_fetch_group( assert repos[0].name == "group-project" +def test_gitlab_owner_uses_namespace_full_path( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test GitLab owner preserves full namespace path when available.""" + response_json = [ + { + "path": "group-project", + "name": "Group Project", + "path_with_namespace": ( + "vcs-python-group-test/vcs-python-subgroup-test/group-project" + ), + "http_url_to_repo": ( + "https://gitlab.com/vcs-python-group-test/" + "vcs-python-subgroup-test/group-project.git" + ), + "ssh_url_to_repo": ( + "git@gitlab.com:vcs-python-group-test/" + "vcs-python-subgroup-test/group-project.git" + ), + "web_url": ( + "https://gitlab.com/vcs-python-group-test/" + "vcs-python-subgroup-test/group-project" + ), + "description": "Group project", + "topics": [], + "star_count": 50, + "archived": False, + "default_branch": "main", + "namespace": { + "path": "vcs-python-subgroup-test", + "full_path": "vcs-python-group-test/vcs-python-subgroup-test", + }, + } + ] + mock_urlopen([(json.dumps(response_json).encode(), {}, 200)]) + importer = GitLabImporter() + options = ImportOptions(mode=ImportMode.ORG, target="vcs-python-group-test") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].owner == "vcs-python-group-test/vcs-python-subgroup-test" + + +def test_gitlab_owner_falls_back_to_path_with_namespace( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test owner derivation uses path_with_namespace when full_path is missing.""" + response_json = [ + { + "path": "group-project", + "name": "Group Project", + "path_with_namespace": ( + "vcs-python-group-test/vcs-python-subgroup-test/group-project" + ), + "http_url_to_repo": ( + "https://gitlab.com/vcs-python-group-test/" + "vcs-python-subgroup-test/group-project.git" + ), + "ssh_url_to_repo": ( + "git@gitlab.com:vcs-python-group-test/" + "vcs-python-subgroup-test/group-project.git" + ), + "web_url": ( + "https://gitlab.com/vcs-python-group-test/" + "vcs-python-subgroup-test/group-project" + ), + "description": "Group project", + "topics": [], + "star_count": 50, + "archived": False, + "default_branch": "main", + "namespace": { + "path": "vcs-python-subgroup-test", + }, + } + ] + mock_urlopen([(json.dumps(response_json).encode(), {}, 200)]) + importer = GitLabImporter() + options = ImportOptions(mode=ImportMode.ORG, target="vcs-python-group-test") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].owner == "vcs-python-group-test/vcs-python-subgroup-test" + + def test_gitlab_search_requires_auth( monkeypatch: pytest.MonkeyPatch, ) -> None: diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index cdae4a8ec..0647856a7 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -26,6 +26,7 @@ _resolve_config_file, import_repos, ) +from vcspull.config import workspace_root_label # Get the actual module (not the function from __init__.py) import_repos_mod = sys.modules["vcspull.cli.import_repos"] @@ -1380,6 +1381,7 @@ def test_import_https_flag_via_cli(capsys: pytest.CaptureFixture[str]) -> None: ["import", "github", "testuser", "-w", "/tmp/repos", "--https"] ) assert args.use_https is True + assert args.flatten_groups is False def test_import_ssh_default_via_cli(capsys: pytest.CaptureFixture[str]) -> None: @@ -1389,6 +1391,20 @@ def test_import_ssh_default_via_cli(capsys: pytest.CaptureFixture[str]) -> None: parser = create_parser(return_subparsers=False) args = parser.parse_args(["import", "github", "testuser", "-w", "/tmp/repos"]) assert args.use_https is False + assert args.flatten_groups is False + + +def test_import_flatten_groups_flag_via_cli( + capsys: pytest.CaptureFixture[str], +) -> None: + """Test that --flatten-groups flag is recognized by the CLI parser.""" + from vcspull.cli import create_parser + + parser = create_parser(return_subparsers=False) + args = parser.parse_args( + ["import", "gitlab", "group/subgroup", "-w", "/tmp/repos", "--flatten-groups"] + ) + assert args.flatten_groups is True def test_import_repos_rejects_non_yaml_config( @@ -1498,3 +1514,195 @@ def fetch_repos( ) assert "not a valid YAML mapping" in caplog.text + + +class NestedGroupImportFixture(t.NamedTuple): + """Fixture for nested-group workspace persistence cases.""" + + test_id: str + target: str + mode: str + flatten_groups: bool + workspace_relpath: str + mock_repos: list[RemoteRepo] + expected_sections: dict[str, tuple[str, ...]] + + +NESTED_GROUP_IMPORT_FIXTURES: list[NestedGroupImportFixture] = [ + NestedGroupImportFixture( + test_id="comment-example-relative-subpaths", + target="a/b", + mode="org", + flatten_groups=False, + workspace_relpath="repos", + mock_repos=[ + _make_repo("h", owner="a/b"), + _make_repo("d", owner="a/b/c"), + _make_repo("e", owner="a/b/c"), + _make_repo("g", owner="a/b/f"), + ], + expected_sections={ + "": ("h",), + "c": ("d", "e"), + "f": ("g",), + }, + ), + NestedGroupImportFixture( + test_id="deep-nesting-under-target", + target="a/b", + mode="org", + flatten_groups=False, + workspace_relpath="repos", + mock_repos=[ + _make_repo("r1", owner="a/b/c/d"), + _make_repo("r2", owner="a/b/c/d/e"), + ], + expected_sections={ + "c/d": ("r1",), + "c/d/e": ("r2",), + }, + ), + NestedGroupImportFixture( + test_id="non-org-mode-no-subpathing", + target="a/b", + mode="user", + flatten_groups=False, + workspace_relpath="repos", + mock_repos=[ + _make_repo("h", owner="a/b"), + _make_repo("d", owner="a/b/c"), + _make_repo("g", owner="a/b/f"), + ], + expected_sections={ + "": ("h", "d", "g"), + }, + ), + NestedGroupImportFixture( + test_id="owner-outside-target-fallback-base", + target="a/b", + mode="org", + flatten_groups=False, + workspace_relpath="repos", + mock_repos=[ + _make_repo("inside", owner="a/b/c"), + _make_repo("outside", owner="z/y"), + ], + expected_sections={ + "c": ("inside",), + "": ("outside",), + }, + ), + NestedGroupImportFixture( + test_id="flatten-groups-flag-uses-single-workspace", + target="a/b", + mode="org", + flatten_groups=True, + workspace_relpath="repos", + mock_repos=[ + _make_repo("h", owner="a/b"), + _make_repo("d", owner="a/b/c"), + _make_repo("g", owner="a/b/f"), + ], + expected_sections={ + "": ("h", "d", "g"), + }, + ), + NestedGroupImportFixture( + test_id="workspace-subdirectory-root-is-supported", + target="a/b", + mode="org", + flatten_groups=False, + workspace_relpath="projects/python", + mock_repos=[ + _make_repo("h", owner="a/b"), + _make_repo("d", owner="a/b/c"), + ], + expected_sections={ + "": ("h",), + "c": ("d",), + }, + ), +] + + +@pytest.mark.parametrize( + list(NestedGroupImportFixture._fields), + NESTED_GROUP_IMPORT_FIXTURES, + ids=[fixture.test_id for fixture in NESTED_GROUP_IMPORT_FIXTURES], +) +def test_import_nested_groups( + test_id: str, + target: str, + mode: str, + flatten_groups: bool, + workspace_relpath: str, + mock_repos: list[RemoteRepo], + expected_sections: dict[str, tuple[str, ...]], + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test that nested groups are preserved in config.""" + import yaml + + del test_id + caplog.set_level(logging.INFO) + monkeypatch.setenv("HOME", str(tmp_path)) + + workspace = tmp_path / workspace_relpath + workspace.mkdir(parents=True) + config_file = tmp_path / ".vcspull.yaml" + + class MockImporter: + service_name = "GitLab" + + def fetch_repos(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: + yield from mock_repos + + # Mock the importer factory so import_repos() exercises only workspace mapping. + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service="gitlab", + target=target, + workspace=str(workspace), + mode=mode, + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(config_file), + dry_run=False, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + flatten_groups=flatten_groups, + ) + + assert config_file.exists() + with config_file.open() as f: + config = yaml.safe_load(f) + + cwd = pathlib.Path.cwd() + home = pathlib.Path.home() + expected_labels: dict[str, tuple[str, ...]] = {} + for subpath, repo_names in expected_sections.items(): + expected_path = workspace if not subpath else workspace / subpath + label = workspace_root_label(expected_path, cwd=cwd, home=home) + expected_labels[label] = repo_names + + assert set(config.keys()) == set(expected_labels.keys()) + for label, expected_repo_names in expected_labels.items(): + assert isinstance(config[label], dict) + assert set(config[label].keys()) == set(expected_repo_names) From 3ef1fe25b5133c469dfafc8434292844a7d65e96 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 10:43:26 -0600 Subject: [PATCH 046/109] docs(CHANGES): Clarify GitLab subgroup workspace mapping why: Import behavior now preserves GitLab subgroup namespaces under workspace roots by default, with an explicit flatten override. what: - Add CHANGES entry describing default subgroup-to-workspace expansion - Document --flatten-groups override with usage example - Tighten import command description wording in cli/__init__.py --- CHANGES | 24 ++++++++++++++++++++++++ src/vcspull/cli/__init__.py | 2 ++ 2 files changed, 26 insertions(+) diff --git a/CHANGES b/CHANGES index 01080c5d3..f58d20a32 100644 --- a/CHANGES +++ b/CHANGES @@ -48,6 +48,30 @@ $ vcspull import github torvalds -w ~/repos/linux --mode user $ vcspull import github torvalds -w ~/repos/linux --mode user --https ``` +#### `vcspull import`: GitLab subgroups map to workspace roots (#510) + +For GitLab organization/group imports, subgroup namespaces are now preserved +under the selected workspace root by default. + +Example: + +```console +$ vcspull import gitlab vcs-python-group-test -w ~/projects/python --mode org +``` + +This writes repositories into workspace sections like: + +- `~/projects/python/` +- `~/projects/python//` +- `~/projects/python///` + +Use `--flatten-groups` to import all subgroup repositories into a single +workspace root: + +```console +$ vcspull import gitlab vcs-python-group-test -w ~/projects/python --mode org --flatten-groups +``` + #### `vcspull import` command for remote repository discovery (#510) Import repositories from GitHub, GitLab, Codeberg/Gitea/Forgejo, and AWS diff --git a/src/vcspull/cli/__init__.py b/src/vcspull/cli/__init__.py index 454263f14..45d231471 100644 --- a/src/vcspull/cli/__init__.py +++ b/src/vcspull/cli/__init__.py @@ -252,6 +252,8 @@ def build_description( or AWS CodeCommit and adds them to the vcspull configuration. For GitLab, you can specify subgroups using slash notation (e.g., parent/child). + In org mode, subgroup paths are preserved under the workspace root by + default; use --flatten-groups to collapse them into a single workspace. """, ( ( From 1a40dc6bf795fee5a456743dd24fbfb1fb7a5de2 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 12:12:07 -0600 Subject: [PATCH 047/109] test(remotes/gitlab[mock_data]) Use realistic namespace fields in subgroup tests why: The two subgroup tests had unrealistic mock data that put the full hierarchical path in namespace.path, causing them to exercise the last-resort fallback instead of the primary full_path code path. what: - Fix namespace mock in test_gitlab_subgroup_url_encoding: path="child", full_path="parent/child" - Fix namespace mock in test_gitlab_deeply_nested_subgroup: path="d", full_path="a/b/c/d" - Add owner assertions to verify end-to-end namespace derivation --- tests/_internal/remotes/test_gitlab.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/_internal/remotes/test_gitlab.py b/tests/_internal/remotes/test_gitlab.py index c9aacd954..087d55d2a 100644 --- a/tests/_internal/remotes/test_gitlab.py +++ b/tests/_internal/remotes/test_gitlab.py @@ -279,7 +279,7 @@ def test_gitlab_subgroup_url_encoding( "star_count": 10, "archived": False, "default_branch": "main", - "namespace": {"path": "parent/child"}, + "namespace": {"path": "child", "full_path": "parent/child"}, } ] @@ -306,6 +306,7 @@ def urlopen_capture( # Verify repos were returned assert len(repos) == 1 assert repos[0].name == "subgroup-project" + assert repos[0].owner == "parent/child" def test_gitlab_deeply_nested_subgroup( @@ -326,7 +327,7 @@ def test_gitlab_deeply_nested_subgroup( "star_count": 5, "archived": False, "default_branch": "main", - "namespace": {"path": "a/b/c/d"}, + "namespace": {"path": "d", "full_path": "a/b/c/d"}, } ] @@ -352,6 +353,7 @@ def urlopen_capture( assert len(repos) == 1 assert repos[0].name == "deep-project" + assert repos[0].owner == "a/b/c/d" def test_gitlab_handles_null_topics( From dbb603f8a4e3a3226a291bf5e975d6efa35a4222 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 12:12:13 -0600 Subject: [PATCH 048/109] test(remotes/gitlab[conftest]) Add full_path to fixture namespace mock why: The gitlab_user_projects_response fixture was missing full_path, inconsistent with real GitLab API responses and the newer test mocks. what: - Add full_path="testuser" to namespace dict in gitlab fixture --- tests/_internal/remotes/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/_internal/remotes/conftest.py b/tests/_internal/remotes/conftest.py index 0fd0cd9bf..5d94f55ef 100644 --- a/tests/_internal/remotes/conftest.py +++ b/tests/_internal/remotes/conftest.py @@ -189,7 +189,7 @@ def gitlab_user_projects_response() -> bytes: "star_count": 20, "archived": False, "default_branch": "main", - "namespace": {"path": "testuser"}, + "namespace": {"path": "testuser", "full_path": "testuser"}, }, ] ).encode() From 22e83052f57d8f50905db122eb87c6eb2ab959bc Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 12:57:35 -0600 Subject: [PATCH 049/109] fix(remotes/github[_log_rate_limit]) Guard int() cast on rate-limit header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: HTTP headers are always strings, but proxies or malformed responses can return non-numeric values like "unlimited" — int() raises ValueError. what: - Wrap int(remaining) in try/except (ValueError, TypeError) and return early - Add parametrized tests for valid, non-numeric, and missing headers --- src/vcspull/_internal/remotes/github.py | 5 +- tests/_internal/remotes/test_github.py | 70 +++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/src/vcspull/_internal/remotes/github.py b/src/vcspull/_internal/remotes/github.py index a191e34cf..6b08f5053 100644 --- a/src/vcspull/_internal/remotes/github.py +++ b/src/vcspull/_internal/remotes/github.py @@ -298,7 +298,10 @@ def _log_rate_limit(self, headers: dict[str, str]) -> None: limit = headers.get("x-ratelimit-limit") if remaining is not None and limit is not None: - remaining_int = int(remaining) + try: + remaining_int = int(remaining) + except (ValueError, TypeError): + return if remaining_int < 10: log.warning( "GitHub API rate limit low: %s/%s remaining", diff --git a/tests/_internal/remotes/test_github.py b/tests/_internal/remotes/test_github.py index b731f49d2..40771ce60 100644 --- a/tests/_internal/remotes/test_github.py +++ b/tests/_internal/remotes/test_github.py @@ -420,3 +420,73 @@ def test_github_limit_respected( options = ImportOptions(mode=ImportMode.USER, target="user", limit=3) repos = list(importer.fetch_repos(options)) assert len(repos) == 3 + + +class LogRateLimitFixture(t.NamedTuple): + """Fixture for _log_rate_limit test cases.""" + + test_id: str + headers: dict[str, str] + expected_log_level: str | None + expected_message_fragment: str | None + + +LOG_RATE_LIMIT_FIXTURES: list[LogRateLimitFixture] = [ + LogRateLimitFixture( + test_id="valid-headers-low-remaining", + headers={"x-ratelimit-remaining": "5", "x-ratelimit-limit": "60"}, + expected_log_level="warning", + expected_message_fragment="rate limit low", + ), + LogRateLimitFixture( + test_id="valid-headers-sufficient-remaining", + headers={"x-ratelimit-remaining": "50", "x-ratelimit-limit": "60"}, + expected_log_level="debug", + expected_message_fragment="rate limit", + ), + LogRateLimitFixture( + test_id="non-numeric-remaining-header", + headers={"x-ratelimit-remaining": "unlimited", "x-ratelimit-limit": "60"}, + expected_log_level=None, + expected_message_fragment=None, + ), + LogRateLimitFixture( + test_id="missing-remaining-header", + headers={"x-ratelimit-limit": "60"}, + expected_log_level=None, + expected_message_fragment=None, + ), + LogRateLimitFixture( + test_id="missing-both-headers", + headers={}, + expected_log_level=None, + expected_message_fragment=None, + ), +] + + +@pytest.mark.parametrize( + list(LogRateLimitFixture._fields), + LOG_RATE_LIMIT_FIXTURES, + ids=[f.test_id for f in LOG_RATE_LIMIT_FIXTURES], +) +def test_log_rate_limit( + test_id: str, + headers: dict[str, str], + expected_log_level: str | None, + expected_message_fragment: str | None, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test _log_rate_limit handles various header scenarios.""" + import logging + + caplog.set_level(logging.DEBUG) + importer = GitHubImporter() + # Should not raise on any input + importer._log_rate_limit(headers) + + if expected_message_fragment is not None: + assert expected_message_fragment in caplog.text.lower() + else: + # No rate limit message should appear + assert "rate limit" not in caplog.text.lower() From 875c67f8dd7fc199fabd604e620f2f6f6ab2e76f Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 12:58:10 -0600 Subject: [PATCH 050/109] fix(remotes/base[_handle_http_error]) Coerce message to str before .lower() why: JSON error responses can have non-string "message" values (dict, int); calling .lower() on those raises AttributeError. what: - Wrap error_data.get("message", exc) with str() on line 384 - Add parametrized tests for string, dict, int, and invalid-JSON bodies --- src/vcspull/_internal/remotes/base.py | 2 +- tests/_internal/remotes/test_base.py | 95 +++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 1 deletion(-) diff --git a/src/vcspull/_internal/remotes/base.py b/src/vcspull/_internal/remotes/base.py index e7d865769..f38b9b6c8 100644 --- a/src/vcspull/_internal/remotes/base.py +++ b/src/vcspull/_internal/remotes/base.py @@ -381,7 +381,7 @@ def _handle_http_error( try: body = exc.read().decode("utf-8") error_data = json.loads(body) - message = error_data.get("message", str(exc)) + message = str(error_data.get("message", exc)) except (json.JSONDecodeError, UnicodeDecodeError): message = str(exc) diff --git a/tests/_internal/remotes/test_base.py b/tests/_internal/remotes/test_base.py index d9456e4e9..91237a659 100644 --- a/tests/_internal/remotes/test_base.py +++ b/tests/_internal/remotes/test_base.py @@ -398,3 +398,98 @@ def test_import_mode_values() -> None: assert ImportMode.USER.value == "user" assert ImportMode.ORG.value == "org" assert ImportMode.SEARCH.value == "search" + + +class HandleHttpErrorFixture(t.NamedTuple): + """Fixture for HTTPClient._handle_http_error test cases.""" + + test_id: str + status_code: int + response_body: str + expected_error_type: str + expected_message_contains: str + + +HANDLE_HTTP_ERROR_FIXTURES: list[HandleHttpErrorFixture] = [ + HandleHttpErrorFixture( + test_id="string-message-401", + status_code=401, + response_body='{"message": "Bad credentials"}', + expected_error_type="AuthenticationError", + expected_message_contains="Bad credentials", + ), + HandleHttpErrorFixture( + test_id="dict-message-403", + status_code=403, + response_body='{"message": {"error": "forbidden"}}', + expected_error_type="AuthenticationError", + expected_message_contains="forbidden", + ), + HandleHttpErrorFixture( + test_id="int-message-404", + status_code=404, + response_body='{"message": 42}', + expected_error_type="NotFoundError", + expected_message_contains="42", + ), + HandleHttpErrorFixture( + test_id="rate-limit-string-403", + status_code=403, + response_body='{"message": "API rate limit exceeded"}', + expected_error_type="RateLimitError", + expected_message_contains="rate limit", + ), + HandleHttpErrorFixture( + test_id="invalid-json-body-500", + status_code=500, + response_body="Server Error", + expected_error_type="ServiceUnavailableError", + expected_message_contains="service unavailable", + ), +] + + +@pytest.mark.parametrize( + list(HandleHttpErrorFixture._fields), + HANDLE_HTTP_ERROR_FIXTURES, + ids=[f.test_id for f in HANDLE_HTTP_ERROR_FIXTURES], +) +def test_handle_http_error( + test_id: str, + status_code: int, + response_body: str, + expected_error_type: str, + expected_message_contains: str, +) -> None: + """Test HTTPClient._handle_http_error with various response bodies.""" + import io + import urllib.error + + from vcspull._internal.remotes.base import ( + AuthenticationError, + HTTPClient, + NotFoundError, + RateLimitError, + ServiceUnavailableError, + ) + + error_classes = { + "AuthenticationError": AuthenticationError, + "RateLimitError": RateLimitError, + "NotFoundError": NotFoundError, + "ServiceUnavailableError": ServiceUnavailableError, + } + + client = HTTPClient("https://api.example.com") + exc = urllib.error.HTTPError( + url="https://api.example.com/test", + code=status_code, + msg="Error", + hdrs=None, # type: ignore[arg-type] + fp=io.BytesIO(response_body.encode()), + ) + + with pytest.raises(error_classes[expected_error_type], match="(?i).*") as exc_info: + client._handle_http_error(exc, "TestService") + + assert expected_message_contains.lower() in str(exc_info.value).lower() From de9236ea79392156753d9619b77d2c3500179b37 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 12:58:56 -0600 Subject: [PATCH 051/109] fix(cli/import[language-warning]) Warn when --language used with GitLab/CodeCommit why: GitLab and CodeCommit APIs do not return language metadata, so --language silently filters out all repositories with no explanation. what: - Add warning log when language filter is set for gitlab/codecommit - Add parametrized tests for warning presence/absence per service --- src/vcspull/cli/import_repos.py | 10 +++ tests/cli/test_import_repos.py | 104 ++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index e2814a95d..5371b3ec2 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -429,6 +429,16 @@ def import_repos( limit=limit, ) + # Warn if --language is used with services that don't return language info + if options.language and normalized_service in ("gitlab", "codecommit"): + log.warning( + "%s!%s %s does not return language metadata; " + "--language filter may exclude all results", + Fore.YELLOW, + Style.RESET_ALL, + importer.service_name, + ) + # Resolve workspace path workspace_path = pathlib.Path(workspace).expanduser().resolve() cwd = pathlib.Path.cwd() diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index 0647856a7..5aa4ce51a 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -1706,3 +1706,107 @@ def fetch_repos(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: for label, expected_repo_names in expected_labels.items(): assert isinstance(config[label], dict) assert set(config[label].keys()) == set(expected_repo_names) + + +class LanguageWarningFixture(t.NamedTuple): + """Fixture for --language warning test cases.""" + + test_id: str + service: str + language: str | None + expect_warning: bool + + +LANGUAGE_WARNING_FIXTURES: list[LanguageWarningFixture] = [ + LanguageWarningFixture( + test_id="gitlab-with-language-warns", + service="gitlab", + language="Python", + expect_warning=True, + ), + LanguageWarningFixture( + test_id="codecommit-with-language-warns", + service="codecommit", + language="Python", + expect_warning=True, + ), + LanguageWarningFixture( + test_id="github-with-language-no-warning", + service="github", + language="Python", + expect_warning=False, + ), + LanguageWarningFixture( + test_id="gitlab-without-language-no-warning", + service="gitlab", + language=None, + expect_warning=False, + ), +] + + +@pytest.mark.parametrize( + list(LanguageWarningFixture._fields), + LANGUAGE_WARNING_FIXTURES, + ids=[f.test_id for f in LANGUAGE_WARNING_FIXTURES], +) +def test_import_repos_language_warning( + test_id: str, + service: str, + language: str | None, + expect_warning: bool, + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test that --language warns for services without language metadata.""" + caplog.set_level(logging.WARNING) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + + class MockImporter: + service_name = {"gitlab": "GitLab", "codecommit": "CodeCommit"}.get( + service, "GitHub" + ) + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + return iter([]) + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service=service, + target="testuser" if service != "codecommit" else "", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region="us-east-1" if service == "codecommit" else None, + profile=None, + language=language, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(tmp_path / "config.yaml"), + dry_run=True, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + if expect_warning: + assert "does not return language metadata" in caplog.text + else: + assert "does not return language metadata" not in caplog.text From 5bf0e7ed5a0770801c3b6d10bddf76e8499c519d Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 13:00:02 -0600 Subject: [PATCH 052/109] fix(cli/import[input-eof]) Handle EOFError and non-TTY stdin in confirmation why: input() raises EOFError when stdin is piped/closed and hangs in non-interactive contexts (CI, cron) where no TTY is attached. what: - Add sys.stdin.isatty() check before prompting, abort with message - Wrap input() in try/except EOFError, treat as abort - Add tests for EOFError and non-TTY scenarios --- src/vcspull/cli/import_repos.py | 19 ++++- tests/cli/test_import_repos.py | 126 +++++++++++++++++++++++++++++++- 2 files changed, 140 insertions(+), 5 deletions(-) diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index 5371b3ec2..b091aa25d 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -5,6 +5,7 @@ import argparse import logging import pathlib +import sys import typing as t from colorama import Fore, Style @@ -576,10 +577,20 @@ def import_repos( # Confirm with user if not yes and output_mode.value == "human": - confirm = input( - f"\n{Fore.CYAN}Import {len(repos)} repositories to " - f"{display_config_path}? [y/N]: {Style.RESET_ALL}", - ).lower() + if not sys.stdin.isatty(): + log.info( + "%s✗%s Non-interactive mode: use --yes to skip confirmation.", + Fore.RED, + Style.RESET_ALL, + ) + return + try: + confirm = input( + f"\n{Fore.CYAN}Import {len(repos)} repositories to " + f"{display_config_path}? [y/N]: {Style.RESET_ALL}", + ).lower() + except EOFError: + confirm = "" if confirm not in {"y", "yes"}: log.info("%s✗%s Aborted by user.", Fore.RED, Style.RESET_ALL) return diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index 5aa4ce51a..9f36db89c 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -688,8 +688,9 @@ def test_import_repos_user_abort( workspace.mkdir() config_file = tmp_path / ".vcspull.yaml" - # Mock user input + # Mock user input and ensure isatty returns True so we reach input() monkeypatch.setattr("builtins.input", lambda _: "n") + monkeypatch.setattr("sys.stdin", type("FakeTTY", (), {"isatty": lambda self: True})()) # Mock the importer class MockImporter: @@ -734,6 +735,129 @@ def fetch_repos( assert not config_file.exists() +def test_import_repos_eoferror_aborts( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos aborts gracefully on EOFError from input().""" + caplog.set_level(logging.INFO) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + config_file = tmp_path / ".vcspull.yaml" + + # Mock input() to raise EOFError (e.g., piped stdin) + def raise_eof(_: str) -> str: + raise EOFError + + monkeypatch.setattr("builtins.input", raise_eof) + # Ensure isatty returns True so we reach input() + monkeypatch.setattr("sys.stdin", type("FakeTTY", (), {"isatty": lambda self: True})()) + + class MockImporter: + service_name = "MockService" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + yield _make_repo("repo1") + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service="github", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(config_file), + dry_run=False, + yes=False, + output_json=False, + output_ndjson=False, + color="never", + ) + + assert "Aborted by user" in caplog.text + assert not config_file.exists() + + +def test_import_repos_non_tty_aborts( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos aborts when stdin is not a TTY.""" + caplog.set_level(logging.INFO) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + config_file = tmp_path / ".vcspull.yaml" + + # Mock stdin.isatty() to return False + monkeypatch.setattr( + "sys.stdin", type("FakeNonTTY", (), {"isatty": lambda self: False})() + ) + + class MockImporter: + service_name = "MockService" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + yield _make_repo("repo1") + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service="github", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(config_file), + dry_run=False, + yes=False, + output_json=False, + output_ndjson=False, + color="never", + ) + + assert "Non-interactive mode" in caplog.text + assert not config_file.exists() + + def test_import_repos_skips_existing( tmp_path: pathlib.Path, monkeypatch: MonkeyPatch, From 9f82b26d75e4db0cce71f7b98e4f563eb0ecbc76 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 13:00:38 -0600 Subject: [PATCH 053/109] fix(remotes/github[_parse_repo]) Use .get() for required API fields why: data["name"], data["clone_url"], data["html_url"] raise KeyError on incomplete API responses; all other importers already use .get() defensively. what: - Change direct dict access to .get() with empty string defaults - Add test with incomplete API response (missing name, clone_url, html_url) --- src/vcspull/_internal/remotes/github.py | 6 ++-- tests/_internal/remotes/test_github.py | 41 +++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/src/vcspull/_internal/remotes/github.py b/src/vcspull/_internal/remotes/github.py index 6b08f5053..0e5298f35 100644 --- a/src/vcspull/_internal/remotes/github.py +++ b/src/vcspull/_internal/remotes/github.py @@ -272,10 +272,10 @@ def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo: Parsed repository information """ return RemoteRepo( - name=data["name"], - clone_url=data["clone_url"], + name=data.get("name", ""), + clone_url=data.get("clone_url", ""), ssh_url=data.get("ssh_url", ""), - html_url=data["html_url"], + html_url=data.get("html_url", ""), description=data.get("description"), language=data.get("language"), topics=tuple(data.get("topics") or []), diff --git a/tests/_internal/remotes/test_github.py b/tests/_internal/remotes/test_github.py index 40771ce60..126d3aeb7 100644 --- a/tests/_internal/remotes/test_github.py +++ b/tests/_internal/remotes/test_github.py @@ -490,3 +490,44 @@ def test_log_rate_limit( else: # No rate limit message should appear assert "rate limit" not in caplog.text.lower() + + +def test_github_parse_repo_missing_keys( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test GitHub _parse_repo handles incomplete API responses gracefully. + + GitHub API responses may lack keys like 'name', 'clone_url', or 'html_url' + in edge cases (partial responses, API changes). Using .get() with defaults + prevents KeyError crashes. + """ + response_json = [ + { + # Missing: name, clone_url, html_url, ssh_url + "description": "Incomplete repo data", + "language": "Python", + "topics": ["test"], + "stargazers_count": 5, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "user"}, + } + ] + mock_urlopen( + [ + ( + json.dumps(response_json).encode(), + {"x-ratelimit-remaining": "100", "x-ratelimit-limit": "60"}, + 200, + ) + ] + ) + importer = GitHubImporter() + options = ImportOptions(mode=ImportMode.USER, target="user") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].name == "" + assert repos[0].clone_url == "" + assert repos[0].html_url == "" + assert repos[0].ssh_url == "" From cb2870ff64bed89376374128359affe2fdae04e8 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 13:01:21 -0600 Subject: [PATCH 054/109] fix(remotes/gitlab[_fetch_search]) Add archived=false param to search why: _fetch_search lacked the archived=false parameter that _paginate_repos already includes, causing search to return archived projects unexpectedly. what: - Add archived=false to search params when include_archived is False - Add tests verifying archived param presence/absence in search requests --- src/vcspull/_internal/remotes/gitlab.py | 3 + tests/_internal/remotes/test_gitlab.py | 86 +++++++++++++++++++++++++ 2 files changed, 89 insertions(+) diff --git a/src/vcspull/_internal/remotes/gitlab.py b/src/vcspull/_internal/remotes/gitlab.py index e735e4d3c..c523718ac 100644 --- a/src/vcspull/_internal/remotes/gitlab.py +++ b/src/vcspull/_internal/remotes/gitlab.py @@ -179,6 +179,9 @@ def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: "page": page, } + if not options.include_archived: + params["archived"] = "false" + data, _headers = self._client.get( endpoint, params=params, diff --git a/tests/_internal/remotes/test_gitlab.py b/tests/_internal/remotes/test_gitlab.py index 087d55d2a..90bf7a96a 100644 --- a/tests/_internal/remotes/test_gitlab.py +++ b/tests/_internal/remotes/test_gitlab.py @@ -473,3 +473,89 @@ def urlopen_capture( assert "archived=false" in captured_urls[0], ( f"Expected 'archived=false' in URL, got: {captured_urls[0]}" ) + + +def test_gitlab_search_archived_param_false_when_excluding( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test that _fetch_search includes archived=false when excluding archived.""" + captured_urls: list[str] = [] + + search_response = [ + { + "path": "search-result", + "name": "Search Result", + "http_url_to_repo": "https://gitlab.com/user/search-result.git", + "ssh_url_to_repo": "git@gitlab.com:user/search-result.git", + "web_url": "https://gitlab.com/user/search-result", + "description": "Found", + "topics": [], + "star_count": 100, + "archived": False, + "default_branch": "main", + "namespace": {"path": "user"}, + } + ] + + def urlopen_capture( + request: urllib.request.Request, + timeout: int | None = None, + ) -> MockHTTPResponse: + captured_urls.append(request.full_url) + return MockHTTPResponse(json.dumps(search_response).encode()) + + monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) + + importer = GitLabImporter(token="test-token") + options = ImportOptions( + mode=ImportMode.SEARCH, target="test", include_archived=False + ) + list(importer.fetch_repos(options)) + + assert len(captured_urls) == 1 + assert "archived=false" in captured_urls[0], ( + f"Expected 'archived=false' in search URL, got: {captured_urls[0]}" + ) + + +def test_gitlab_search_archived_param_omitted_when_including( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test that _fetch_search omits archived param when including archived.""" + captured_urls: list[str] = [] + + search_response = [ + { + "path": "search-result", + "name": "Search Result", + "http_url_to_repo": "https://gitlab.com/user/search-result.git", + "ssh_url_to_repo": "git@gitlab.com:user/search-result.git", + "web_url": "https://gitlab.com/user/search-result", + "description": "Found", + "topics": [], + "star_count": 100, + "archived": False, + "default_branch": "main", + "namespace": {"path": "user"}, + } + ] + + def urlopen_capture( + request: urllib.request.Request, + timeout: int | None = None, + ) -> MockHTTPResponse: + captured_urls.append(request.full_url) + return MockHTTPResponse(json.dumps(search_response).encode()) + + monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) + + importer = GitLabImporter(token="test-token") + options = ImportOptions( + mode=ImportMode.SEARCH, target="test", include_archived=True + ) + list(importer.fetch_repos(options)) + + assert len(captured_urls) == 1 + assert "archived=" not in captured_urls[0], ( + f"Expected no 'archived' param in search URL, got: {captured_urls[0]}" + ) From c297d0532475e22f78d8e5dcf667b6d3a681b622 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 13:01:45 -0600 Subject: [PATCH 055/109] style(cli[__init__]) Use t.overload instead of from typing import overload why: Project convention requires import typing as t and namespace access. what: - Remove 'from typing import overload' import - Change @overload decorators to @t.overload --- src/vcspull/cli/__init__.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/vcspull/cli/__init__.py b/src/vcspull/cli/__init__.py index 45d231471..1a3d7f466 100644 --- a/src/vcspull/cli/__init__.py +++ b/src/vcspull/cli/__init__.py @@ -7,8 +7,6 @@ import pathlib import textwrap import typing as t -from typing import overload - from libvcs.__about__ import __version__ as libvcs_version from vcspull.__about__ import __version__ @@ -271,13 +269,13 @@ def build_description( ) -@overload +@t.overload def create_parser( return_subparsers: t.Literal[True], ) -> tuple[argparse.ArgumentParser, t.Any]: ... -@overload +@t.overload def create_parser(return_subparsers: t.Literal[False]) -> argparse.ArgumentParser: ... From 21cccacc89c9a4ed59dc7bec391b22e6233f0fc0 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 13:02:35 -0600 Subject: [PATCH 056/109] fix(cli/import[codeberg-url]) Pass --url to Codeberg importer why: Codeberg hardcoded base_url="https://codeberg.org", ignoring the --url flag; self-hosted Codeberg instances couldn't be used. what: - Use base_url parameter with fallback to default Codeberg URL - Add tests verifying custom and default URL usage --- src/vcspull/cli/import_repos.py | 2 +- tests/cli/test_import_repos.py | 35 +++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index b091aa25d..da47e62dd 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -263,7 +263,7 @@ def _get_importer( return GitLabImporter(token=token, base_url=base_url) if normalized == "codeberg": - return GiteaImporter(token=token, base_url="https://codeberg.org") + return GiteaImporter(token=token, base_url=base_url or "https://codeberg.org") if normalized in ("gitea", "forgejo"): if not base_url: diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index 9f36db89c..b20c40035 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -132,6 +132,16 @@ class GetImporterFixture(t.NamedTuple): expected_type_name="GiteaImporter", expected_error=None, ), + GetImporterFixture( + test_id="codeberg-custom-url", + service="codeberg", + token=None, + base_url="https://my-codeberg-mirror.example.com", + region=None, + profile=None, + expected_type_name="GiteaImporter", + expected_error=None, + ), GetImporterFixture( test_id="gitea-with-url", service="gitea", @@ -261,6 +271,31 @@ def test_get_importer( assert type(importer).__name__ == expected_type_name +def test_codeberg_custom_url_used() -> None: + """Test that Codeberg importer uses custom base_url when provided.""" + importer = _get_importer( + "codeberg", + token=None, + base_url="https://my-codeberg.example.com", + region=None, + profile=None, + ) + # GiteaImporter stores base_url on its _client + assert importer._client.base_url == "https://my-codeberg.example.com/api/v1" + + +def test_codeberg_default_url_used() -> None: + """Test that Codeberg importer uses default URL when no base_url.""" + importer = _get_importer( + "codeberg", + token=None, + base_url=None, + region=None, + profile=None, + ) + assert importer._client.base_url == "https://codeberg.org/api/v1" + + def test_service_aliases_coverage() -> None: """Test that SERVICE_ALIASES covers expected services.""" expected_aliases = { From 4ce711fa4ce650fb068eed72932920cd8614aee8 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 13:03:33 -0600 Subject: [PATCH 057/109] fix(remotes/base[ImportOptions]) Validate limit >= 1 in __post_init__ why: limit=0 or negative silently returns no repos (pagination loop never executes); fail-fast with ValueError is clearer. what: - Add __post_init__ validation raising ValueError if limit < 1 - Add parametrized tests for zero, negative, and valid limit values --- src/vcspull/_internal/remotes/base.py | 6 +++++ tests/_internal/remotes/test_base.py | 36 +++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/src/vcspull/_internal/remotes/base.py b/src/vcspull/_internal/remotes/base.py index f38b9b6c8..a826b4ebd 100644 --- a/src/vcspull/_internal/remotes/base.py +++ b/src/vcspull/_internal/remotes/base.py @@ -234,6 +234,12 @@ class ImportOptions: min_stars: int = 0 limit: int = 100 + def __post_init__(self) -> None: + """Validate options after initialization.""" + if self.limit < 1: + msg = f"limit must be >= 1, got {self.limit}" + raise ValueError(msg) + class HTTPClient: """Simple HTTP client using urllib for making API requests.""" diff --git a/tests/_internal/remotes/test_base.py b/tests/_internal/remotes/test_base.py index 91237a659..b9120b332 100644 --- a/tests/_internal/remotes/test_base.py +++ b/tests/_internal/remotes/test_base.py @@ -400,6 +400,42 @@ def test_import_mode_values() -> None: assert ImportMode.SEARCH.value == "search" +class InvalidLimitFixture(t.NamedTuple): + """Fixture for invalid ImportOptions.limit test cases.""" + + test_id: str + limit: int + + +INVALID_LIMIT_FIXTURES: list[InvalidLimitFixture] = [ + InvalidLimitFixture(test_id="zero-limit", limit=0), + InvalidLimitFixture(test_id="negative-limit", limit=-1), + InvalidLimitFixture(test_id="large-negative-limit", limit=-100), +] + + +@pytest.mark.parametrize( + list(InvalidLimitFixture._fields), + INVALID_LIMIT_FIXTURES, + ids=[f.test_id for f in INVALID_LIMIT_FIXTURES], +) +def test_import_options_rejects_invalid_limit( + test_id: str, + limit: int, +) -> None: + """Test ImportOptions raises ValueError for limit < 1.""" + with pytest.raises(ValueError, match="limit must be >= 1"): + ImportOptions(limit=limit) + + +def test_import_options_accepts_valid_limit() -> None: + """Test ImportOptions accepts limit >= 1.""" + options = ImportOptions(limit=1) + assert options.limit == 1 + options = ImportOptions(limit=500) + assert options.limit == 500 + + class HandleHttpErrorFixture(t.NamedTuple): """Fixture for HTTPClient._handle_http_error test cases.""" From 640757f243e3dff1f6257c44df1d738b3fa11ff7 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 13:05:03 -0600 Subject: [PATCH 058/109] fix(config[atomic-write]) Use temp-file-then-rename for config saves MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: write_text() is not atomic — a crash mid-write corrupts the config file; atomic rename guarantees the file is either fully written or untouched. what: - Add _atomic_write() helper using mkstemp + os.replace - Update save_config_yaml and save_config_yaml_with_items to use it - Add tests for atomic write and error-recovery behavior --- src/vcspull/config.py | 32 +++++++++++++++++++-- tests/test_config_writer.py | 56 +++++++++++++++++++++++++++++++++++-- 2 files changed, 84 insertions(+), 4 deletions(-) diff --git a/src/vcspull/config.py b/src/vcspull/config.py index 803f4eb3b..f5ac0c446 100644 --- a/src/vcspull/config.py +++ b/src/vcspull/config.py @@ -2,11 +2,13 @@ from __future__ import annotations +import contextlib import copy import fnmatch import logging import os import pathlib +import tempfile import typing as t from collections.abc import Callable @@ -460,6 +462,32 @@ def is_config_file( return any(filename.endswith(e) for e in extensions) +def _atomic_write(target: pathlib.Path, content: str) -> None: + """Write content to a file atomically via temp-file-then-rename. + + Parameters + ---------- + target : pathlib.Path + Destination file path + content : str + Content to write + """ + fd, tmp_path = tempfile.mkstemp( + dir=target.parent, + prefix=f".{target.name}.", + suffix=".tmp", + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + f.write(content) + os.replace(tmp_path, target) + except BaseException: + # Clean up the temp file on any failure + with contextlib.suppress(OSError): + os.unlink(tmp_path) + raise + + def save_config_yaml(config_file_path: pathlib.Path, data: dict[t.Any, t.Any]) -> None: """Save configuration data to a YAML file. @@ -475,7 +503,7 @@ def save_config_yaml(config_file_path: pathlib.Path, data: dict[t.Any, t.Any]) - content=data, indent=2, ) - config_file_path.write_text(yaml_content, encoding="utf-8") + _atomic_write(config_file_path, yaml_content) def save_config_yaml_with_items( @@ -498,7 +526,7 @@ def save_config_yaml_with_items( if yaml_content: yaml_content += "\n" - config_file_path.write_text(yaml_content, encoding="utf-8") + _atomic_write(config_file_path, yaml_content) def merge_duplicate_workspace_root_entries( diff --git a/tests/test_config_writer.py b/tests/test_config_writer.py index fe4f547b4..2bbb6c520 100644 --- a/tests/test_config_writer.py +++ b/tests/test_config_writer.py @@ -1,13 +1,14 @@ -"""Tests for duplicate-preserving config writer utilities.""" +"""Tests for config writer utilities.""" from __future__ import annotations +import os import textwrap import typing as t import pytest -from vcspull.config import save_config_yaml_with_items +from vcspull.config import save_config_yaml, save_config_yaml_with_items if t.TYPE_CHECKING: import pathlib @@ -54,3 +55,54 @@ def test_save_config_yaml_with_items_preserves_duplicate_sections( yaml_text = config_path.read_text(encoding="utf-8") assert yaml_text == expected_yaml + + +def test_save_config_yaml_atomic_write( + tmp_path: pathlib.Path, +) -> None: + """Test that save_config_yaml uses atomic write (no temp files left).""" + config_path = tmp_path / ".vcspull.yaml" + data = {"~/code/": {"myrepo": {"repo": "git+https://example.com/repo.git"}}} + + save_config_yaml(config_path, data) + + # File should exist with correct content + assert config_path.exists() + content = config_path.read_text(encoding="utf-8") + assert "myrepo" in content + + # No temp files should be left in the directory + tmp_files = [f for f in tmp_path.iterdir() if f.name.startswith(".")] + assert tmp_files == [config_path] + + +def test_save_config_yaml_atomic_preserves_existing_on_error( + tmp_path: pathlib.Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test that existing config is preserved if atomic write fails.""" + config_path = tmp_path / ".vcspull.yaml" + original_content = "~/code/:\n existing: {repo: git+https://example.com/repo.git}\n" + config_path.write_text(original_content, encoding="utf-8") + + # Mock os.replace to simulate a failure after temp file is written + original_replace = os.replace + + def failing_replace(src: str, dst: str) -> None: + # Remove the temp file to simulate cleanup + raise OSError("Simulated disk error") + + monkeypatch.setattr("os.replace", failing_replace) + + data = {"~/new/": {"newrepo": {"repo": "git+https://example.com/new.git"}}} + with pytest.raises(OSError, match="Simulated disk error"): + save_config_yaml(config_path, data) + + # Original file should be untouched + assert config_path.read_text(encoding="utf-8") == original_content + + # No temp files should remain + tmp_files = [ + f for f in tmp_path.iterdir() if f.name.startswith(".") and f != config_path + ] + assert tmp_files == [] From 183ef6bbb0280c09366a8ec04f71e33a823bb4da Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 13:06:53 -0600 Subject: [PATCH 059/109] chore(lint) Fix ruff and mypy issues from review fixes why: Ruff flagged os.replace/os.unlink (use pathlib), unused variables, and string literal exceptions; mypy flagged union-attr on _client access. what: - Use pathlib.Path.replace/unlink instead of os.replace/os.unlink - Add isinstance narrowing for GiteaImporter in Codeberg URL tests - Remove unused variables and fix exception string patterns - Fix import sorting in cli/__init__.py --- src/vcspull/cli/__init__.py | 1 + src/vcspull/config.py | 4 ++-- tests/_internal/remotes/test_base.py | 2 +- tests/cli/test_import_repos.py | 15 ++++++++++++--- tests/test_config_writer.py | 18 ++++++++++-------- 5 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/vcspull/cli/__init__.py b/src/vcspull/cli/__init__.py index 1a3d7f466..f6e39bad2 100644 --- a/src/vcspull/cli/__init__.py +++ b/src/vcspull/cli/__init__.py @@ -7,6 +7,7 @@ import pathlib import textwrap import typing as t + from libvcs.__about__ import __version__ as libvcs_version from vcspull.__about__ import __version__ diff --git a/src/vcspull/config.py b/src/vcspull/config.py index f5ac0c446..88662f845 100644 --- a/src/vcspull/config.py +++ b/src/vcspull/config.py @@ -480,11 +480,11 @@ def _atomic_write(target: pathlib.Path, content: str) -> None: try: with os.fdopen(fd, "w", encoding="utf-8") as f: f.write(content) - os.replace(tmp_path, target) + pathlib.Path(tmp_path).replace(target) except BaseException: # Clean up the temp file on any failure with contextlib.suppress(OSError): - os.unlink(tmp_path) + pathlib.Path(tmp_path).unlink() raise diff --git a/tests/_internal/remotes/test_base.py b/tests/_internal/remotes/test_base.py index b9120b332..4ff8e8919 100644 --- a/tests/_internal/remotes/test_base.py +++ b/tests/_internal/remotes/test_base.py @@ -525,7 +525,7 @@ def test_handle_http_error( fp=io.BytesIO(response_body.encode()), ) - with pytest.raises(error_classes[expected_error_type], match="(?i).*") as exc_info: + with pytest.raises(error_classes[expected_error_type]) as exc_info: client._handle_http_error(exc, "TestService") assert expected_message_contains.lower() in str(exc_info.value).lower() diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index b20c40035..20832aa04 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -273,6 +273,8 @@ def test_get_importer( def test_codeberg_custom_url_used() -> None: """Test that Codeberg importer uses custom base_url when provided.""" + from vcspull._internal.remotes.gitea import GiteaImporter + importer = _get_importer( "codeberg", token=None, @@ -280,12 +282,14 @@ def test_codeberg_custom_url_used() -> None: region=None, profile=None, ) - # GiteaImporter stores base_url on its _client + assert isinstance(importer, GiteaImporter) assert importer._client.base_url == "https://my-codeberg.example.com/api/v1" def test_codeberg_default_url_used() -> None: """Test that Codeberg importer uses default URL when no base_url.""" + from vcspull._internal.remotes.gitea import GiteaImporter + importer = _get_importer( "codeberg", token=None, @@ -293,6 +297,7 @@ def test_codeberg_default_url_used() -> None: region=None, profile=None, ) + assert isinstance(importer, GiteaImporter) assert importer._client.base_url == "https://codeberg.org/api/v1" @@ -725,7 +730,9 @@ def test_import_repos_user_abort( # Mock user input and ensure isatty returns True so we reach input() monkeypatch.setattr("builtins.input", lambda _: "n") - monkeypatch.setattr("sys.stdin", type("FakeTTY", (), {"isatty": lambda self: True})()) + monkeypatch.setattr( + "sys.stdin", type("FakeTTY", (), {"isatty": lambda self: True})() + ) # Mock the importer class MockImporter: @@ -789,7 +796,9 @@ def raise_eof(_: str) -> str: monkeypatch.setattr("builtins.input", raise_eof) # Ensure isatty returns True so we reach input() - monkeypatch.setattr("sys.stdin", type("FakeTTY", (), {"isatty": lambda self: True})()) + monkeypatch.setattr( + "sys.stdin", type("FakeTTY", (), {"isatty": lambda self: True})() + ) class MockImporter: service_name = "MockService" diff --git a/tests/test_config_writer.py b/tests/test_config_writer.py index 2bbb6c520..21a2f2bfd 100644 --- a/tests/test_config_writer.py +++ b/tests/test_config_writer.py @@ -2,7 +2,6 @@ from __future__ import annotations -import os import textwrap import typing as t @@ -82,17 +81,20 @@ def test_save_config_yaml_atomic_preserves_existing_on_error( ) -> None: """Test that existing config is preserved if atomic write fails.""" config_path = tmp_path / ".vcspull.yaml" - original_content = "~/code/:\n existing: {repo: git+https://example.com/repo.git}\n" + original_content = ( + "~/code/:\n existing: {repo: git+https://example.com/repo.git}\n" + ) config_path.write_text(original_content, encoding="utf-8") - # Mock os.replace to simulate a failure after temp file is written - original_replace = os.replace + # Mock Path.replace to simulate a failure after temp file is written + disk_error_msg = "Simulated disk error" - def failing_replace(src: str, dst: str) -> None: - # Remove the temp file to simulate cleanup - raise OSError("Simulated disk error") + import pathlib as _pathlib - monkeypatch.setattr("os.replace", failing_replace) + def failing_replace(self: _pathlib.Path, target: t.Any) -> _pathlib.Path: + raise OSError(disk_error_msg) + + monkeypatch.setattr(_pathlib.Path, "replace", failing_replace) data = {"~/new/": {"newrepo": {"repo": "git+https://example.com/new.git"}}} with pytest.raises(OSError, match="Simulated disk error"): From 37af8d540b82416fcedba453f8f06962e0ffbcfc Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 13:54:04 -0600 Subject: [PATCH 060/109] fix(cli/_output[finalize]) Emit empty JSON array when buffer is empty why: OutputFormatter.finalize() silently emitted nothing when the JSON buffer was empty, producing invalid output for consumers expecting a JSON array (e.g. piping to jq). what: - Remove `and self._json_buffer` guard so `[]` is always written in JSON mode - Add test for empty-buffer finalization --- src/vcspull/cli/_output.py | 2 +- tests/cli/test_plan_output_helpers.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/vcspull/cli/_output.py b/src/vcspull/cli/_output.py index 11f257e3f..4e92fa0a3 100644 --- a/src/vcspull/cli/_output.py +++ b/src/vcspull/cli/_output.py @@ -184,7 +184,7 @@ def emit_text(self, text: str) -> None: def finalize(self) -> None: """Finalize output (flush JSON buffer if needed).""" - if self.mode == OutputMode.JSON and self._json_buffer: + if self.mode == OutputMode.JSON: sys.stdout.write(json.dumps(self._json_buffer, indent=2) + "\n") sys.stdout.flush() self._json_buffer.clear() diff --git a/tests/cli/test_plan_output_helpers.py b/tests/cli/test_plan_output_helpers.py index facbc89fc..c4f691a1e 100644 --- a/tests/cli/test_plan_output_helpers.py +++ b/tests/cli/test_plan_output_helpers.py @@ -159,6 +159,17 @@ def test_plan_summary_to_payload( assert "duration_ms" not in payload +def test_output_formatter_json_mode_empty_buffer_emits_empty_array() -> None: + """OutputFormatter should emit an empty JSON array when buffer has no items.""" + formatter = OutputFormatter(mode=OutputMode.JSON) + captured = io.StringIO() + with redirect_stdout(captured): + formatter.finalize() + + output = json.loads(captured.getvalue()) + assert output == [] + + def test_output_formatter_json_mode_finalises_buffer() -> None: """OutputFormatter should flush buffered JSON payloads on finalize.""" entry = PlanEntry( From 5a001e57629433c946088afdce3c4b716abcd05c Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 13:54:51 -0600 Subject: [PATCH 061/109] fix(config[find_home_config_files]) Respect filetype filter parameter why: The filetype parameter was accepted but never used in the existence checks, causing .json files to be returned (and MultipleConfigWarning raised) even when only yaml was requested. what: - Gate yaml/json existence checks on whether each type is in filetype - Add tests for yaml-only, json-only, and default-raises scenarios --- src/vcspull/config.py | 7 +++++-- tests/test_config_file.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/src/vcspull/config.py b/src/vcspull/config.py index 88662f845..69ec6db36 100644 --- a/src/vcspull/config.py +++ b/src/vcspull/config.py @@ -154,10 +154,13 @@ def find_home_config_files( filetype = ["json", "yaml"] configs: list[pathlib.Path] = [] + check_yaml = "yaml" in filetype + check_json = "json" in filetype + yaml_config = pathlib.Path("~/.vcspull.yaml").expanduser() - has_yaml_config = yaml_config.exists() + has_yaml_config = check_yaml and yaml_config.exists() json_config = pathlib.Path("~/.vcspull.json").expanduser() - has_json_config = json_config.exists() + has_json_config = check_json and json_config.exists() if not has_yaml_config and not has_json_config: log.debug( diff --git a/tests/test_config_file.py b/tests/test_config_file.py index ed59ca3f9..6a30b1fa7 100644 --- a/tests/test_config_file.py +++ b/tests/test_config_file.py @@ -210,6 +210,40 @@ def test_multiple_config_files_raises_exception(tmp_path: pathlib.Path) -> None: config.find_home_config_files() +def test_find_home_config_files_filetype_yaml_only(tmp_path: pathlib.Path) -> None: + """When filetype=['yaml'], only .yaml is returned even if .json exists.""" + (tmp_path / ".vcspull.yaml").touch() + (tmp_path / ".vcspull.json").touch() + with EnvironmentVarGuard() as env: + env.set("HOME", str(tmp_path)) + # Should NOT raise MultipleConfigWarning because json is filtered out + results = config.find_home_config_files(filetype=["yaml"]) + assert len(results) == 1 + assert results[0].suffix == ".yaml" + + +def test_find_home_config_files_filetype_json_only(tmp_path: pathlib.Path) -> None: + """When filetype=['json'], only .json is returned even if .yaml exists.""" + (tmp_path / ".vcspull.yaml").touch() + (tmp_path / ".vcspull.json").touch() + with EnvironmentVarGuard() as env: + env.set("HOME", str(tmp_path)) + results = config.find_home_config_files(filetype=["json"]) + assert len(results) == 1 + assert results[0].suffix == ".json" + + +def test_find_home_config_files_both_types_still_raises( + tmp_path: pathlib.Path, +) -> None: + """Default filetype still raises MultipleConfigWarning when both exist.""" + (tmp_path / ".vcspull.yaml").touch() + (tmp_path / ".vcspull.json").touch() + with EnvironmentVarGuard() as env, pytest.raises(exc.MultipleConfigWarning): + env.set("HOME", str(tmp_path)) + config.find_home_config_files() + + def test_in_dir( config_path: pathlib.Path, yaml_config: pathlib.Path, From ee0c301a2590608cb5366fe35730621b07beb033 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 13:56:03 -0600 Subject: [PATCH 062/109] fix(cli/import[config-resolve]) Catch MultipleConfigWarning why: MultipleConfigWarning inherits from VCSPullException, not ValueError, so the except clause at _resolve_config_file missed it and the exception propagated as an unhandled crash. what: - Add MultipleConfigWarning to the except clause - Import MultipleConfigWarning from vcspull.exc - Add test that monkeypatches _resolve_config_file to raise the warning --- src/vcspull/cli/import_repos.py | 5 ++- tests/cli/test_import_repos.py | 65 +++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index da47e62dd..d01253c1e 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -32,6 +32,7 @@ save_config_yaml, workspace_root_label, ) +from vcspull.exc import MultipleConfigWarning from ._colors import Colors, get_color_mode from ._output import OutputFormatter, get_output_mode @@ -448,8 +449,8 @@ def import_repos( # Resolve config file try: config_file_path = _resolve_config_file(config_path_str) - except ValueError as exc: - log.error("%s✗%s %s", Fore.RED, Style.RESET_ALL, exc) # noqa: TRY400 + except (ValueError, MultipleConfigWarning) as exc_: + log.error("%s✗%s %s", Fore.RED, Style.RESET_ALL, exc_) # noqa: TRY400 return display_config_path = str(PrivatePath(config_file_path)) diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index 20832aa04..9a301d20d 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -1628,6 +1628,71 @@ def fetch_repos( assert "Only YAML config files are supported" in caplog.text +def test_import_repos_catches_multiple_config_warning( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos logs error instead of crashing on MultipleConfigWarning.""" + from vcspull.exc import MultipleConfigWarning + + caplog.set_level(logging.ERROR) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + + class MockImporter: + service_name = "MockService" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + yield _make_repo("repo1") + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + # Mock _resolve_config_file to raise MultipleConfigWarning + def raise_multiple_config(_: str | None) -> pathlib.Path: + raise MultipleConfigWarning(MultipleConfigWarning.message) + + monkeypatch.setattr( + import_repos_mod, + "_resolve_config_file", + raise_multiple_config, + ) + + import_repos( + service="github", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=None, + dry_run=False, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + assert "Multiple configs" in caplog.text + + def test_import_repos_rejects_non_dict_config( tmp_path: pathlib.Path, monkeypatch: MonkeyPatch, From 0036f8e6a2dc0d4117a50d586f9d5a9b9f81f0fa Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 13:56:48 -0600 Subject: [PATCH 063/109] fix(cli/import[ImportOptions]) Catch ValueError for invalid limit why: ImportOptions.__post_init__ validates limit >= 1 but the construction was outside any try/except, so --limit 0 produced an unhandled traceback. what: - Wrap ImportOptions construction in try/except ValueError - Log the error and return early on invalid options - Add test verifying error message for limit=0 --- src/vcspull/cli/import_repos.py | 28 +++++++++-------- tests/cli/test_import_repos.py | 53 +++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 12 deletions(-) diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index d01253c1e..1410ca79a 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -418,18 +418,22 @@ def import_repos( else [] ) - options = ImportOptions( - mode=import_mode, - target=target, - base_url=base_url, - token=token, - include_forks=include_forks, - include_archived=include_archived, - language=language, - topics=topic_list, - min_stars=min_stars, - limit=limit, - ) + try: + options = ImportOptions( + mode=import_mode, + target=target, + base_url=base_url, + token=token, + include_forks=include_forks, + include_archived=include_archived, + language=language, + topics=topic_list, + min_stars=min_stars, + limit=limit, + ) + except ValueError as exc_: + log.error("%s✗%s %s", Fore.RED, Style.RESET_ALL, exc_) # noqa: TRY400 + return # Warn if --language is used with services that don't return language info if options.language and normalized_service in ("gitlab", "codecommit"): diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index 9a301d20d..53ea4b399 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -1693,6 +1693,59 @@ def raise_multiple_config(_: str | None) -> pathlib.Path: assert "Multiple configs" in caplog.text +def test_import_repos_invalid_limit( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos logs error for invalid limit (e.g. 0).""" + caplog.set_level(logging.ERROR) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + + class MockImporter: + service_name = "MockService" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + yield _make_repo("repo1") + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service="github", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=0, + config_path_str=str(tmp_path / "config.yaml"), + dry_run=False, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + assert "limit must be >= 1" in caplog.text + + def test_import_repos_rejects_non_dict_config( tmp_path: pathlib.Path, monkeypatch: MonkeyPatch, From 9a48f563beb879659b688cb3c31f8ecebe10757f Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 13:58:53 -0600 Subject: [PATCH 064/109] style(cli/import[colors]) Route color output through Colors helper why: The Colors helper was instantiated but only used 2 times; 13+ raw Fore/Style references bypassed --color=never and NO_COLOR support. what: - Replace all direct Fore.*/Style.RESET_ALL with colors.*() calls - Remove unused `from colorama import Fore, Style` import --- src/vcspull/cli/import_repos.py | 139 ++++++++++++-------------------- 1 file changed, 52 insertions(+), 87 deletions(-) diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index 1410ca79a..53ed00ae9 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -8,8 +8,6 @@ import sys import typing as t -from colorama import Fore, Style - from vcspull._internal.private_path import PrivatePath from vcspull._internal.remotes import ( AuthenticationError, @@ -393,19 +391,18 @@ def import_repos( profile=profile, ) except ValueError as exc: - log.error("%s✗%s %s", Fore.RED, Style.RESET_ALL, exc) # noqa: TRY400 + log.error("%s %s", colors.error("✗"), exc) # noqa: TRY400 return except DependencyError as exc: - log.error("%s✗%s %s", Fore.RED, Style.RESET_ALL, exc) # noqa: TRY400 + log.error("%s %s", colors.error("✗"), exc) # noqa: TRY400 return # Validate target for non-CodeCommit services normalized_service = SERVICE_ALIASES.get(service.lower(), service.lower()) if normalized_service != "codecommit" and not target: log.error( - "%s✗%s TARGET is required for %s", - Fore.RED, - Style.RESET_ALL, + "%s TARGET is required for %s", + colors.error("✗"), service, ) return @@ -432,16 +429,15 @@ def import_repos( limit=limit, ) except ValueError as exc_: - log.error("%s✗%s %s", Fore.RED, Style.RESET_ALL, exc_) # noqa: TRY400 + log.error("%s %s", colors.error("✗"), exc_) # noqa: TRY400 return # Warn if --language is used with services that don't return language info if options.language and normalized_service in ("gitlab", "codecommit"): log.warning( - "%s!%s %s does not return language metadata; " + "%s %s does not return language metadata; " "--language filter may exclude all results", - Fore.YELLOW, - Style.RESET_ALL, + colors.warning("!"), importer.service_name, ) @@ -454,19 +450,16 @@ def import_repos( try: config_file_path = _resolve_config_file(config_path_str) except (ValueError, MultipleConfigWarning) as exc_: - log.error("%s✗%s %s", Fore.RED, Style.RESET_ALL, exc_) # noqa: TRY400 + log.error("%s %s", colors.error("✗"), exc_) # noqa: TRY400 return display_config_path = str(PrivatePath(config_file_path)) # Fetch repositories if output_mode.value == "human": log.info( - "%s→%s Fetching repositories from %s%s%s...", - Fore.CYAN, - Style.RESET_ALL, - Fore.MAGENTA, - importer.service_name, - Style.RESET_ALL, + "%s Fetching repositories from %s...", + colors.info("→"), + colors.highlight(importer.service_name), ) repos: list[RemoteRepo] = [] @@ -480,65 +473,58 @@ def import_repos( # Log progress for human output if output_mode.value == "human" and len(repos) % 10 == 0: log.info( - "%s•%s Fetched %s%d%s repositories...", - Fore.BLUE, - Style.RESET_ALL, - Fore.CYAN, - len(repos), - Style.RESET_ALL, + "%s Fetched %s repositories...", + colors.muted("•"), + colors.info(str(len(repos))), ) except AuthenticationError as exc: log.error( # noqa: TRY400 - "%s✗%s Authentication error: %s", Fore.RED, Style.RESET_ALL, exc + "%s Authentication error: %s", colors.error("✗"), exc ) formatter.finalize() return except RateLimitError as exc: log.error( # noqa: TRY400 - "%s✗%s Rate limit exceeded: %s", Fore.RED, Style.RESET_ALL, exc + "%s Rate limit exceeded: %s", colors.error("✗"), exc ) formatter.finalize() return except NotFoundError as exc: - log.error("%s✗%s Not found: %s", Fore.RED, Style.RESET_ALL, exc) # noqa: TRY400 + log.error("%s Not found: %s", colors.error("✗"), exc) # noqa: TRY400 formatter.finalize() return except ServiceUnavailableError as exc: log.error( # noqa: TRY400 - "%s✗%s Service unavailable: %s", Fore.RED, Style.RESET_ALL, exc + "%s Service unavailable: %s", colors.error("✗"), exc ) formatter.finalize() return except ConfigurationError as exc: log.error( # noqa: TRY400 - "%s✗%s Configuration error: %s", Fore.RED, Style.RESET_ALL, exc + "%s Configuration error: %s", colors.error("✗"), exc ) formatter.finalize() return except RemoteImportError as exc: - log.error("%s✗%s Error: %s", Fore.RED, Style.RESET_ALL, exc) # noqa: TRY400 + log.error("%s Error: %s", colors.error("✗"), exc) # noqa: TRY400 formatter.finalize() return if not repos: if output_mode.value == "human": log.info( - "%s!%s No repositories found matching criteria.", - Fore.YELLOW, - Style.RESET_ALL, + "%s No repositories found matching criteria.", + colors.warning("!"), ) formatter.finalize() return if output_mode.value == "human": log.info( - "\n%s✓%s Found %s%d%s repositories", - Fore.GREEN, - Style.RESET_ALL, - Fore.CYAN, - len(repos), - Style.RESET_ALL, + "\n%s Found %s repositories", + colors.success("✓"), + colors.info(str(len(repos))), ) # Show preview in human mode @@ -547,23 +533,17 @@ def import_repos( stars_str = f" ★{repo.stars}" if repo.stars > 0 else "" lang_str = f" [{repo.language}]" if repo.language else "" log.info( - " %s+%s %s%s%s%s%s", - Fore.GREEN, - Style.RESET_ALL, - Fore.CYAN, - repo.name, - Style.RESET_ALL, + " %s %s%s%s", + colors.success("+"), + colors.info(repo.name), colors.muted(lang_str), colors.muted(stars_str), ) if len(repos) > 10: log.info( - " %s...%s and %s%d%s more", - Fore.BLUE, - Style.RESET_ALL, - Fore.CYAN, - len(repos) - 10, - Style.RESET_ALL, + " %s and %s more", + colors.muted("..."), + colors.info(str(len(repos) - 10)), ) formatter.finalize() @@ -571,12 +551,9 @@ def import_repos( # Handle dry-run if dry_run: log.info( - "\n%s→%s Dry run complete. Would write to %s%s%s", - Fore.YELLOW, - Style.RESET_ALL, - Fore.BLUE, - display_config_path, - Style.RESET_ALL, + "\n%s Dry run complete. Would write to %s", + colors.warning("→"), + colors.muted(display_config_path), ) return @@ -584,20 +561,19 @@ def import_repos( if not yes and output_mode.value == "human": if not sys.stdin.isatty(): log.info( - "%s✗%s Non-interactive mode: use --yes to skip confirmation.", - Fore.RED, - Style.RESET_ALL, + "%s Non-interactive mode: use --yes to skip confirmation.", + colors.error("✗"), ) return try: confirm = input( - f"\n{Fore.CYAN}Import {len(repos)} repositories to " - f"{display_config_path}? [y/N]: {Style.RESET_ALL}", + f"\n{colors.info('Import')} {len(repos)} repositories to " + f"{display_config_path}? [y/N]: ", ).lower() except EOFError: confirm = "" if confirm not in {"y", "yes"}: - log.info("%s✗%s Aborted by user.", Fore.RED, Style.RESET_ALL) + log.info("%s Aborted by user.", colors.error("✗")) return # Load existing config or create new @@ -614,9 +590,8 @@ def import_repos( if not isinstance(raw_config, dict): log.error( - "%s✗%s Config file is not a valid YAML mapping: %s", - Fore.RED, - Style.RESET_ALL, + "%s Config file is not a valid YAML mapping: %s", + colors.error("✗"), display_config_path, ) return @@ -658,9 +633,8 @@ def import_repos( raw_config[repo_workspace_label], dict ): log.error( - "%s✗%s Workspace section '%s' is not a mapping in config", - Fore.RED, - Style.RESET_ALL, + "%s Workspace section '%s' is not a mapping in config", + colors.error("✗"), repo_workspace_label, ) checked_labels.add(repo_workspace_label) @@ -684,9 +658,8 @@ def import_repos( if added_count == 0: log.info( - "%s✓%s All repositories already exist in config. Nothing to add.", - Fore.GREEN, - Style.RESET_ALL, + "%s All repositories already exist in config. Nothing to add.", + colors.success("✓"), ) return @@ -694,24 +667,16 @@ def import_repos( try: save_config_yaml(config_file_path, raw_config) log.info( - "%s✓%s Added %s%d%s repositories to %s%s%s", - Fore.GREEN, - Style.RESET_ALL, - Fore.CYAN, - added_count, - Style.RESET_ALL, - Fore.BLUE, - display_config_path, - Style.RESET_ALL, + "%s Added %s repositories to %s", + colors.success("✓"), + colors.info(str(added_count)), + colors.muted(display_config_path), ) if skipped_count > 0: log.info( - "%s!%s Skipped %s%d%s existing repositories", - Fore.YELLOW, - Style.RESET_ALL, - Fore.CYAN, - skipped_count, - Style.RESET_ALL, + "%s Skipped %s existing repositories", + colors.warning("!"), + colors.info(str(skipped_count)), ) except OSError: log.exception("Error saving config to %s", display_config_path) From 6febb654e8101282ee25a31c00def5bd246c4a26 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 14:01:31 -0600 Subject: [PATCH 065/109] fix(cli[import-exit-code]) Return non-zero exit code on import errors why: All error paths returned None and the CLI dispatcher ignored the return value, so `vcspull import unknownservice` exited with code 0. what: - Change import_repos return type to int (0=success, 1=error) - Return 1 on all error paths, 0 on success/abort/no-results - Wire up SystemExit in cli dispatcher when result is non-zero - Add tests for both error (non-zero) and success (zero) returns --- src/vcspull/cli/__init__.py | 4 +- src/vcspull/cli/import_repos.py | 46 ++++++++++------- tests/cli/test_import_repos.py | 91 +++++++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 20 deletions(-) diff --git a/src/vcspull/cli/__init__.py b/src/vcspull/cli/__init__.py index f6e39bad2..ba5886320 100644 --- a/src/vcspull/cli/__init__.py +++ b/src/vcspull/cli/__init__.py @@ -504,7 +504,7 @@ def cli(_args: list[str] | None = None) -> None: if args.service is None or args.workspace is None: _import_parser.print_help() return - import_repos( + result = import_repos( service=args.service, target=args.target, workspace=args.workspace, @@ -528,3 +528,5 @@ def cli(_args: list[str] | None = None) -> None: use_https=getattr(args, "use_https", False), flatten_groups=getattr(args, "flatten_groups", False), ) + if result: + raise SystemExit(result) diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index 53ed00ae9..f84e7ac5a 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -327,7 +327,7 @@ def import_repos( color: str, use_https: bool = False, flatten_groups: bool = False, -) -> None: +) -> int: """Import repositories from a remote service. Parameters @@ -376,6 +376,11 @@ def import_repos( Use HTTPS clone URLs instead of SSH (default: False, i.e., SSH) flatten_groups : bool For GitLab org imports, flatten subgroup paths into base workspace + + Returns + ------- + int + 0 on success, 1 on error """ output_mode = get_output_mode(output_json, output_ndjson) formatter = OutputFormatter(output_mode) @@ -392,10 +397,10 @@ def import_repos( ) except ValueError as exc: log.error("%s %s", colors.error("✗"), exc) # noqa: TRY400 - return + return 1 except DependencyError as exc: log.error("%s %s", colors.error("✗"), exc) # noqa: TRY400 - return + return 1 # Validate target for non-CodeCommit services normalized_service = SERVICE_ALIASES.get(service.lower(), service.lower()) @@ -405,7 +410,7 @@ def import_repos( colors.error("✗"), service, ) - return + return 1 # Build import options import_mode = ImportMode(mode) @@ -430,7 +435,7 @@ def import_repos( ) except ValueError as exc_: log.error("%s %s", colors.error("✗"), exc_) # noqa: TRY400 - return + return 1 # Warn if --language is used with services that don't return language info if options.language and normalized_service in ("gitlab", "codecommit"): @@ -451,7 +456,7 @@ def import_repos( config_file_path = _resolve_config_file(config_path_str) except (ValueError, MultipleConfigWarning) as exc_: log.error("%s %s", colors.error("✗"), exc_) # noqa: TRY400 - return + return 1 display_config_path = str(PrivatePath(config_file_path)) # Fetch repositories @@ -483,33 +488,33 @@ def import_repos( "%s Authentication error: %s", colors.error("✗"), exc ) formatter.finalize() - return + return 1 except RateLimitError as exc: log.error( # noqa: TRY400 "%s Rate limit exceeded: %s", colors.error("✗"), exc ) formatter.finalize() - return + return 1 except NotFoundError as exc: log.error("%s Not found: %s", colors.error("✗"), exc) # noqa: TRY400 formatter.finalize() - return + return 1 except ServiceUnavailableError as exc: log.error( # noqa: TRY400 "%s Service unavailable: %s", colors.error("✗"), exc ) formatter.finalize() - return + return 1 except ConfigurationError as exc: log.error( # noqa: TRY400 "%s Configuration error: %s", colors.error("✗"), exc ) formatter.finalize() - return + return 1 except RemoteImportError as exc: log.error("%s Error: %s", colors.error("✗"), exc) # noqa: TRY400 formatter.finalize() - return + return 1 if not repos: if output_mode.value == "human": @@ -518,7 +523,7 @@ def import_repos( colors.warning("!"), ) formatter.finalize() - return + return 0 if output_mode.value == "human": log.info( @@ -555,7 +560,7 @@ def import_repos( colors.warning("→"), colors.muted(display_config_path), ) - return + return 0 # Confirm with user if not yes and output_mode.value == "human": @@ -564,7 +569,7 @@ def import_repos( "%s Non-interactive mode: use --yes to skip confirmation.", colors.error("✗"), ) - return + return 0 try: confirm = input( f"\n{colors.info('Import')} {len(repos)} repositories to " @@ -574,7 +579,7 @@ def import_repos( confirm = "" if confirm not in {"y", "yes"}: log.info("%s Aborted by user.", colors.error("✗")) - return + return 0 # Load existing config or create new raw_config: dict[str, t.Any] @@ -586,7 +591,7 @@ def import_repos( raw_config = yaml.safe_load(f) or {} except (yaml.YAMLError, OSError): log.exception("Error loading config file") - return + return 1 if not isinstance(raw_config, dict): log.error( @@ -594,7 +599,7 @@ def import_repos( colors.error("✗"), display_config_path, ) - return + return 1 else: raw_config = {} @@ -661,7 +666,7 @@ def import_repos( "%s All repositories already exist in config. Nothing to add.", colors.success("✓"), ) - return + return 0 # Save config try: @@ -680,3 +685,6 @@ def import_repos( ) except OSError: log.exception("Error saving config to %s", display_config_path) + return 1 + + return 0 diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index 53ea4b399..9ed6151d2 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -1746,6 +1746,97 @@ def fetch_repos( assert "limit must be >= 1" in caplog.text +def test_import_repos_returns_nonzero_on_error( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos returns non-zero exit code on error.""" + caplog.set_level(logging.ERROR) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + + result = import_repos( + service="unknownservice", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(tmp_path / "config.yaml"), + dry_run=False, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + assert result != 0 + + +def test_import_repos_returns_zero_on_success( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test import_repos returns 0 on success.""" + caplog.set_level(logging.INFO) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + + class MockImporter: + service_name = "MockService" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + yield _make_repo("repo1") + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + result = import_repos( + service="github", + target="testuser", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region=None, + profile=None, + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(tmp_path / "config.yaml"), + dry_run=False, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + assert result == 0 + + def test_import_repos_rejects_non_dict_config( tmp_path: pathlib.Path, monkeypatch: MonkeyPatch, From 4c2350602309d6605fc486f91fa07b9423dd6ac3 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 14:02:24 -0600 Subject: [PATCH 066/109] fix(remotes[codecommit-pagination]) Handle nextToken in list-repositories why: The code made a single list-repositories call without handling the nextToken response field, so AWS CLI v1 users would silently lose repositories beyond the first page. what: - Replace single call with pagination loop that follows nextToken - Add test_fetch_repos_pagination verifying two pages are consumed --- src/vcspull/_internal/remotes/codecommit.py | 15 ++++-- tests/_internal/remotes/test_codecommit.py | 56 +++++++++++++++++++++ 2 files changed, 68 insertions(+), 3 deletions(-) diff --git a/src/vcspull/_internal/remotes/codecommit.py b/src/vcspull/_internal/remotes/codecommit.py index 380c62208..ba22856c0 100644 --- a/src/vcspull/_internal/remotes/codecommit.py +++ b/src/vcspull/_internal/remotes/codecommit.py @@ -196,9 +196,18 @@ def fetch_repos(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: DependencyError When AWS CLI is not installed """ - # List all repositories - data = self._run_aws_command("codecommit", "list-repositories") - repositories = data.get("repositories", []) + # List all repositories (paginate over nextToken) + repositories: list[dict[str, t.Any]] = [] + next_token: str | None = None + while True: + cmd_args = ["codecommit", "list-repositories"] + if next_token: + cmd_args.extend(["--next-token", next_token]) + data = self._run_aws_command(*cmd_args) + repositories.extend(data.get("repositories", [])) + next_token = data.get("nextToken") + if not next_token: + break if not repositories: return diff --git a/tests/_internal/remotes/test_codecommit.py b/tests/_internal/remotes/test_codecommit.py index ffcf1c758..4dfef1461 100644 --- a/tests/_internal/remotes/test_codecommit.py +++ b/tests/_internal/remotes/test_codecommit.py @@ -446,6 +446,62 @@ def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str assert len(batch_get_calls[1]) == 5 +def test_fetch_repos_pagination(monkeypatch: pytest.MonkeyPatch) -> None: + """Test fetch_repos handles nextToken pagination across list-repositories calls.""" + call_count = 0 + list_calls: list[list[str]] = [] + + def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str]: + nonlocal call_count + call_count += 1 + if call_count == 1: + return _aws_ok("aws-cli/2.x") + if "list-repositories" in cmd: + list_calls.append(cmd) + if "--next-token" not in cmd: + # First page: return 2 repos + nextToken + return _aws_ok( + json.dumps( + { + "repositories": [ + {"repositoryName": "page1-repo1"}, + {"repositoryName": "page1-repo2"}, + ], + "nextToken": "token-page2", + } + ) + ) + # Second page: return 1 repo, no nextToken + return _aws_ok( + json.dumps( + { + "repositories": [ + {"repositoryName": "page2-repo1"}, + ], + } + ) + ) + if "batch-get-repositories" in cmd: + names_idx = cmd.index("--repository-names") + 1 + repo_names = cmd[names_idx:] + repos = [_make_cc_repo(name) for name in repo_names] + return _aws_ok(json.dumps({"repositories": repos})) + return _aws_err(stderr="unknown command") + + monkeypatch.setattr("subprocess.run", mock_run) + importer = CodeCommitImporter() + options = ImportOptions() + repos = list(importer.fetch_repos(options)) + + # Should have consumed both pages + assert len(repos) == 3 + assert {r.name for r in repos} == {"page1-repo1", "page1-repo2", "page2-repo1"} + # Should have made 2 list-repositories calls + assert len(list_calls) == 2 + assert "--next-token" not in list_calls[0] + assert "--next-token" in list_calls[1] + + # --------------------------------------------------------------------------- # _parse_repo — region extraction # --------------------------------------------------------------------------- From a009a04001e82b93ea466a2ea697d9c006f9fc83 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 14:02:55 -0600 Subject: [PATCH 067/109] test(cli/import[resolve-config]) Add .yml extension test fixture why: RESOLVE_CONFIG_FIXTURES only tested .yaml; .yml is also accepted by _resolve_config_file but had no test coverage. what: - Add yml-extension-accepted fixture to RESOLVE_CONFIG_FIXTURES --- tests/cli/test_import_repos.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index 9ed6151d2..749a0f646 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -353,6 +353,12 @@ class ResolveConfigFixture(t.NamedTuple): home_configs=[], expected_suffix=".vcspull.yaml", ), + ResolveConfigFixture( + test_id="yml-extension-accepted", + config_path_str="/custom/config.yml", + home_configs=[], + expected_suffix="config.yml", + ), ] From 3d205b9ea8701198b456c4e35299eeb32cbb238b Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 14:03:53 -0600 Subject: [PATCH 068/109] fix(cli/import[unsupported-filters]) Warn about --topics/--min-stars on CodeCommit why: CodeCommit's API doesn't support topic or star-based filtering, but the CLI flags were accepted without any indication they would silently exclude all results. what: - Add warnings for --topics and --min-stars when service is codecommit - Add parametrized tests covering codecommit and github scenarios --- src/vcspull/cli/import_repos.py | 14 ++++ tests/cli/test_import_repos.py | 119 ++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index f84e7ac5a..82d8b0d93 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -445,6 +445,20 @@ def import_repos( colors.warning("!"), importer.service_name, ) + if options.topics and normalized_service == "codecommit": + log.warning( + "%s %s does not support topic filtering; " + "--topics filter may exclude all results", + colors.warning("!"), + importer.service_name, + ) + if options.min_stars > 0 and normalized_service == "codecommit": + log.warning( + "%s %s does not track star counts; " + "--min-stars filter may exclude all results", + colors.warning("!"), + importer.service_name, + ) # Resolve workspace path workspace_path = pathlib.Path(workspace).expanduser().resolve() diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index 749a0f646..469038b9b 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -2193,3 +2193,122 @@ def fetch_repos( assert "does not return language metadata" in caplog.text else: assert "does not return language metadata" not in caplog.text + + +class UnsupportedFilterFixture(t.NamedTuple): + """Fixture for unsupported CodeCommit filter warning test cases.""" + + test_id: str + service: str + topics: str | None + min_stars: int + expect_topics_warning: bool + expect_stars_warning: bool + + +UNSUPPORTED_FILTER_FIXTURES: list[UnsupportedFilterFixture] = [ + UnsupportedFilterFixture( + test_id="codecommit-with-topics-warns", + service="codecommit", + topics="python,cli", + min_stars=0, + expect_topics_warning=True, + expect_stars_warning=False, + ), + UnsupportedFilterFixture( + test_id="codecommit-with-min-stars-warns", + service="codecommit", + topics=None, + min_stars=10, + expect_topics_warning=False, + expect_stars_warning=True, + ), + UnsupportedFilterFixture( + test_id="codecommit-with-both-warns", + service="codecommit", + topics="python", + min_stars=5, + expect_topics_warning=True, + expect_stars_warning=True, + ), + UnsupportedFilterFixture( + test_id="github-with-topics-no-warning", + service="github", + topics="python,cli", + min_stars=10, + expect_topics_warning=False, + expect_stars_warning=False, + ), +] + + +@pytest.mark.parametrize( + list(UnsupportedFilterFixture._fields), + UNSUPPORTED_FILTER_FIXTURES, + ids=[f.test_id for f in UNSUPPORTED_FILTER_FIXTURES], +) +def test_import_repos_unsupported_filter_warning( + test_id: str, + service: str, + topics: str | None, + min_stars: int, + expect_topics_warning: bool, + expect_stars_warning: bool, + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test that --topics/--min-stars warn for CodeCommit.""" + caplog.set_level(logging.WARNING) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + + class MockImporter: + service_name = "CodeCommit" if service == "codecommit" else "GitHub" + + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + return iter([]) + + monkeypatch.setattr( + import_repos_mod, + "_get_importer", + lambda *args, **kwargs: MockImporter(), + ) + + import_repos( + service=service, + target="testuser" if service != "codecommit" else "", + workspace=str(workspace), + mode="user", + base_url=None, + token=None, + region="us-east-1" if service == "codecommit" else None, + profile=None, + language=None, + topics=topics, + min_stars=min_stars, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(tmp_path / "config.yaml"), + dry_run=True, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + if expect_topics_warning: + assert "does not support topic filtering" in caplog.text + else: + assert "does not support topic filtering" not in caplog.text + + if expect_stars_warning: + assert "does not track star counts" in caplog.text + else: + assert "does not track star counts" not in caplog.text From fc85ee8aa1389c2897690ef8cbb36954b0628bd8 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 14:04:43 -0600 Subject: [PATCH 069/109] fix(remotes[github-search-cap]) Warn when search exceeds 1000-result limit why: GitHub's search API returns at most 1000 results, but the code silently stopped paginating without informing the user that results were truncated. what: - Check total_count on first page; warn if > 1000 --- src/vcspull/_internal/remotes/github.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/vcspull/_internal/remotes/github.py b/src/vcspull/_internal/remotes/github.py index 0e5298f35..e088cab20 100644 --- a/src/vcspull/_internal/remotes/github.py +++ b/src/vcspull/_internal/remotes/github.py @@ -181,6 +181,14 @@ def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: self._log_rate_limit(headers) + total_count = data.get("total_count", 0) + if page == 1 and total_count > 1000: + log.warning( + "GitHub search returned %d total results but API limits " + "to 1000; consider narrowing your query", + total_count, + ) + items = data.get("items", []) if not items: break From cdf3b05bcccebb33c2bcb29c3337302047a3f006 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 17:38:45 -0600 Subject: [PATCH 070/109] docs(doctests[remotes,output]): Add doctests to feasible public methods why: CLAUDE.md requires all functions/methods to have working doctests. what: - Add doctests to RemoteImportError.__init__, ImportOptions.__post_init__, HTTPClient.__init__, HTTPClient._build_headers in base.py - Add doctests to GitHubImporter.__init__ and is_authenticated - Add doctests to GitLabImporter.__init__ and is_authenticated - Add doctests to GiteaImporter.__init__ and is_authenticated - Add doctests to PlanEntry.to_payload, PlanSummary.total, PlanSummary.to_payload, OutputFormatter.__init__, get_output_mode - Skip infeasible methods (network calls, subprocess, stdout writes) --- src/vcspull/_internal/remotes/base.py | 39 ++++++++++++++++- src/vcspull/_internal/remotes/gitea.py | 11 +++++ src/vcspull/_internal/remotes/github.py | 11 +++++ src/vcspull/_internal/remotes/gitlab.py | 11 +++++ src/vcspull/cli/_output.py | 56 +++++++++++++++++++++++-- 5 files changed, 124 insertions(+), 4 deletions(-) diff --git a/src/vcspull/_internal/remotes/base.py b/src/vcspull/_internal/remotes/base.py index a826b4ebd..cb888c2c3 100644 --- a/src/vcspull/_internal/remotes/base.py +++ b/src/vcspull/_internal/remotes/base.py @@ -35,6 +35,14 @@ def __init__(self, message: str, service: str | None = None) -> None: Error message service : str | None Name of the service that raised the error + + Examples + -------- + >>> err = RemoteImportError("connection failed", service="GitHub") + >>> str(err) + 'connection failed' + >>> err.service + 'GitHub' """ super().__init__(message) self.service = service @@ -235,7 +243,19 @@ class ImportOptions: limit: int = 100 def __post_init__(self) -> None: - """Validate options after initialization.""" + """Validate options after initialization. + + Examples + -------- + >>> opts = ImportOptions(limit=10) + >>> opts.limit + 10 + + >>> ImportOptions(limit=0) + Traceback (most recent call last): + ... + ValueError: limit must be >= 1, got 0 + """ if self.limit < 1: msg = f"limit must be >= 1, got {self.limit}" raise ValueError(msg) @@ -270,6 +290,12 @@ def __init__( User-Agent header value timeout : int Request timeout in seconds + + Examples + -------- + >>> client = HTTPClient("https://api.example.com/") + >>> client.base_url + 'https://api.example.com' """ self.base_url = base_url.rstrip("/") self.token = token @@ -285,6 +311,17 @@ def _build_headers(self) -> dict[str, str]: ------- dict[str, str] Request headers + + Examples + -------- + >>> client = HTTPClient("https://api.example.com", token="tok123") + >>> headers = client._build_headers() + >>> headers["Authorization"] + 'Bearer tok123' + + >>> client = HTTPClient("https://api.example.com") + >>> "Authorization" not in client._build_headers() + True """ headers = { "User-Agent": self.user_agent, diff --git a/src/vcspull/_internal/remotes/gitea.py b/src/vcspull/_internal/remotes/gitea.py index 690980e92..4f64ae1f7 100644 --- a/src/vcspull/_internal/remotes/gitea.py +++ b/src/vcspull/_internal/remotes/gitea.py @@ -52,6 +52,12 @@ def __init__( base_url : str | None Base URL for the Gitea instance. Required for generic Gitea. Defaults to Codeberg if not specified. + + Examples + -------- + >>> importer = GiteaImporter(token="fake", base_url="https://codeberg.org") + >>> importer.service_name + 'Gitea' """ self._base_url = (base_url or CODEBERG_API_URL).rstrip("/") @@ -86,6 +92,11 @@ def is_authenticated(self) -> bool: ------- bool True if a token is configured + + Examples + -------- + >>> GiteaImporter(token="fake", base_url="https://codeberg.org").is_authenticated + True """ return self._token is not None diff --git a/src/vcspull/_internal/remotes/github.py b/src/vcspull/_internal/remotes/github.py index e088cab20..ec2e9ed02 100644 --- a/src/vcspull/_internal/remotes/github.py +++ b/src/vcspull/_internal/remotes/github.py @@ -51,6 +51,12 @@ def __init__( GitHub API token. If not provided, will try GITHUB_TOKEN env var. base_url : str | None Base URL for GitHub Enterprise. Defaults to api.github.com. + + Examples + -------- + >>> importer = GitHubImporter(token="fake") + >>> importer.service_name + 'GitHub' """ self._token = token or get_token_from_env("GITHUB_TOKEN", "GH_TOKEN") self._base_url = (base_url or GITHUB_API_URL).rstrip("/") @@ -70,6 +76,11 @@ def is_authenticated(self) -> bool: ------- bool True if a token is configured + + Examples + -------- + >>> GitHubImporter(token="fake").is_authenticated + True """ return self._token is not None diff --git a/src/vcspull/_internal/remotes/gitlab.py b/src/vcspull/_internal/remotes/gitlab.py index c523718ac..5f2d6d15e 100644 --- a/src/vcspull/_internal/remotes/gitlab.py +++ b/src/vcspull/_internal/remotes/gitlab.py @@ -52,6 +52,12 @@ def __init__( GitLab API token. If not provided, will try GITLAB_TOKEN env var. base_url : str | None Base URL for self-hosted GitLab instances. Defaults to gitlab.com. + + Examples + -------- + >>> importer = GitLabImporter(token="fake") + >>> importer.service_name + 'GitLab' """ self._token = token or get_token_from_env("GITLAB_TOKEN", "GL_TOKEN") self._base_url = (base_url or GITLAB_API_URL).rstrip("/") @@ -71,6 +77,11 @@ def is_authenticated(self) -> bool: ------- bool True if a token is configured + + Examples + -------- + >>> GitLabImporter(token="fake").is_authenticated + True """ return self._token is not None diff --git a/src/vcspull/cli/_output.py b/src/vcspull/cli/_output.py index 4e92fa0a3..c2add1d2d 100644 --- a/src/vcspull/cli/_output.py +++ b/src/vcspull/cli/_output.py @@ -48,7 +48,25 @@ class PlanEntry: diagnostics: list[str] = field(default_factory=list) def to_payload(self) -> dict[str, t.Any]: - """Convert the plan entry into a serialisable payload.""" + """Convert the plan entry into a serialisable payload. + + Examples + -------- + >>> entry = PlanEntry( + ... name="myrepo", + ... path="/home/user/repos/myrepo", + ... workspace_root="/home/user/repos", + ... action=PlanAction.CLONE, + ... url="git+https://github.com/user/myrepo.git", + ... ) + >>> payload = entry.to_payload() + >>> payload["name"] + 'myrepo' + >>> payload["action"] + 'clone' + >>> payload["format_version"] + '1' + """ payload: dict[str, t.Any] = { "format_version": "1", "type": "operation", @@ -94,11 +112,28 @@ class PlanSummary: duration_ms: int | None = None def total(self) -> int: - """Return the total number of repositories accounted for.""" + """Return the total number of repositories accounted for. + + Examples + -------- + >>> summary = PlanSummary(clone=2, update=3, unchanged=1) + >>> summary.total() + 6 + """ return self.clone + self.update + self.unchanged + self.blocked + self.errors def to_payload(self) -> dict[str, t.Any]: - """Convert the summary to a serialisable payload.""" + """Convert the summary to a serialisable payload. + + Examples + -------- + >>> summary = PlanSummary(clone=1, update=2) + >>> payload = summary.to_payload() + >>> payload["type"] + 'summary' + >>> payload["total"] + 3 + """ payload: dict[str, t.Any] = { "format_version": "1", "type": "summary", @@ -143,6 +178,12 @@ def __init__(self, mode: OutputMode = OutputMode.HUMAN) -> None: ---------- mode : OutputMode The output mode to use (human, json, ndjson) + + Examples + -------- + >>> formatter = OutputFormatter(OutputMode.JSON) + >>> formatter.mode + """ self.mode = mode self._json_buffer: list[dict[str, t.Any]] = [] @@ -204,6 +245,15 @@ def get_output_mode(json_flag: bool, ndjson_flag: bool) -> OutputMode: ------- OutputMode The determined output mode (NDJSON takes precedence over JSON) + + Examples + -------- + >>> get_output_mode(json_flag=False, ndjson_flag=False) + + >>> get_output_mode(json_flag=True, ndjson_flag=False) + + >>> get_output_mode(json_flag=True, ndjson_flag=True) + """ if ndjson_flag: return OutputMode.NDJSON From ab77872cfe07a4be9266c476f54741061e8b4631 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 17:39:32 -0600 Subject: [PATCH 071/109] test(cli/import[config-write]) Replace yaml.dump with save_config_yaml why: CLAUDE.md requires using project helper save_config_yaml instead of direct yaml.dump for config creation in tests. what: - Replace 2 yaml.dump calls with save_config_yaml in test_import_repos_skips_existing and test_import_repos_all_existing - Add save_config_yaml import from vcspull.config - Remove unused yaml import from test_import_repos_all_existing (auto-fixed by ruff) --- tests/cli/test_import_repos.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index 469038b9b..7632381be 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -26,7 +26,7 @@ _resolve_config_file, import_repos, ) -from vcspull.config import workspace_root_label +from vcspull.config import save_config_yaml, workspace_root_label # Get the actual module (not the function from __init__.py) import_repos_mod = sys.modules["vcspull.cli.import_repos"] @@ -929,7 +929,7 @@ def test_import_repos_skips_existing( "repo1": {"repo": "git+https://github.com/testuser/repo1.git"}, } } - config_file.write_text(yaml.dump(existing_config), encoding="utf-8") + save_config_yaml(config_file, existing_config) # Mock the importer to return repo1 (existing) and repo2 (new) class MockImporter: @@ -987,8 +987,6 @@ def test_import_repos_all_existing( caplog: pytest.LogCaptureFixture, ) -> None: """Test import_repos handles all repos already existing.""" - import yaml - caplog.set_level(logging.INFO) monkeypatch.setenv("HOME", str(tmp_path)) @@ -1002,7 +1000,7 @@ def test_import_repos_all_existing( "repo1": {"repo": "git+https://github.com/testuser/repo1.git"}, } } - config_file.write_text(yaml.dump(existing_config), encoding="utf-8") + save_config_yaml(config_file, existing_config) # Mock the importer to return only repo1 (existing) class MockImporter: From 221e4b1219b2d7648adaf7e4cc8f810bb5e2c918 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 17:45:54 -0600 Subject: [PATCH 072/109] test(mocks[documentation]): Add comments to all monkeypatch.setattr calls why: CLAUDE.md requires documenting every mock with comments explaining WHAT is being mocked and WHY. what: - Add WHAT+WHY comments to ~19 monkeypatch.setattr calls in test_codecommit.py - Add WHAT+WHY comments to ~6 monkeypatch.setattr calls in test_gitlab.py - Add WHAT+WHY comments to ~3 monkeypatch.setattr calls in test_pagination_duplicates.py - Add WHAT+WHY comment to 1 monkeypatch.setattr call in conftest.py - Add WHAT+WHY comments to ~21 monkeypatch.setattr calls in test_import_repos.py --- tests/_internal/remotes/conftest.py | 1 + tests/_internal/remotes/test_codecommit.py | 28 ++++++++++++++++ tests/_internal/remotes/test_gitlab.py | 6 ++++ .../remotes/test_pagination_duplicates.py | 3 ++ tests/cli/test_import_repos.py | 33 ++++++++++++++++--- 5 files changed, 67 insertions(+), 4 deletions(-) diff --git a/tests/_internal/remotes/conftest.py b/tests/_internal/remotes/conftest.py index 5d94f55ef..8a902740e 100644 --- a/tests/_internal/remotes/conftest.py +++ b/tests/_internal/remotes/conftest.py @@ -85,6 +85,7 @@ def urlopen_side_effect( call_count += 1 return MockHTTPResponse(body, headers, status) + # Mock urlopen: return pre-configured responses to avoid real HTTP requests monkeypatch.setattr("urllib.request.urlopen", urlopen_side_effect) return _mock diff --git a/tests/_internal/remotes/test_codecommit.py b/tests/_internal/remotes/test_codecommit.py index 4dfef1461..cc0cb7817 100644 --- a/tests/_internal/remotes/test_codecommit.py +++ b/tests/_internal/remotes/test_codecommit.py @@ -72,6 +72,7 @@ def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str msg = "aws" raise FileNotFoundError(msg) + # Mock subprocess.run: simulate aws binary not found (FileNotFoundError) monkeypatch.setattr("subprocess.run", mock_run) with pytest.raises(DependencyError, match="not installed"): @@ -82,6 +83,7 @@ def test_check_aws_cli_nonzero_returncode(monkeypatch: pytest.MonkeyPatch) -> No """Test _check_aws_cli raises DependencyError for non-zero returncode.""" from vcspull._internal.remotes.base import DependencyError + # Mock subprocess.run: simulate aws CLI returning non-zero exit code monkeypatch.setattr("subprocess.run", lambda cmd, **kw: _aws_err()) with pytest.raises(DependencyError, match="not installed"): @@ -95,6 +97,7 @@ def test_check_aws_cli_nonzero_returncode(monkeypatch: pytest.MonkeyPatch) -> No def test_build_aws_command_no_flags(monkeypatch: pytest.MonkeyPatch) -> None: """Test _build_aws_command with no region/profile.""" + # Mock subprocess.run: allow CodeCommitImporter construction (aws --version check) monkeypatch.setattr("subprocess.run", lambda cmd, **kw: _aws_ok("aws-cli/2.x")) importer = CodeCommitImporter() @@ -104,6 +107,7 @@ def test_build_aws_command_no_flags(monkeypatch: pytest.MonkeyPatch) -> None: def test_build_aws_command_with_region(monkeypatch: pytest.MonkeyPatch) -> None: """Test _build_aws_command appends --region.""" + # Mock subprocess.run: allow CodeCommitImporter construction (aws --version check) monkeypatch.setattr("subprocess.run", lambda cmd, **kw: _aws_ok("aws-cli/2.x")) importer = CodeCommitImporter(region="eu-west-1") @@ -121,6 +125,7 @@ def test_build_aws_command_with_region(monkeypatch: pytest.MonkeyPatch) -> None: def test_build_aws_command_with_profile(monkeypatch: pytest.MonkeyPatch) -> None: """Test _build_aws_command appends --profile.""" + # Mock subprocess.run: allow CodeCommitImporter construction (aws --version check) monkeypatch.setattr("subprocess.run", lambda cmd, **kw: _aws_ok("aws-cli/2.x")) importer = CodeCommitImporter(profile="myprofile") @@ -140,6 +145,7 @@ def test_build_aws_command_with_region_and_profile( monkeypatch: pytest.MonkeyPatch, ) -> None: """Test _build_aws_command with both region and profile.""" + # Mock subprocess.run: allow CodeCommitImporter construction (aws --version check) monkeypatch.setattr("subprocess.run", lambda cmd, **kw: _aws_ok("aws-cli/2.x")) importer = CodeCommitImporter(region="ap-south-1", profile="prod") @@ -225,6 +231,8 @@ def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str # Subsequent calls fail with the test error return _aws_err(stderr=stderr) + # Mock subprocess.run: first call passes aws --version, subsequent calls fail + # with the specific AWS CLI error under test monkeypatch.setattr("subprocess.run", mock_run) importer = CodeCommitImporter() @@ -246,6 +254,7 @@ def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str return _aws_ok("aws-cli/2.x") return _aws_ok(stdout="not valid json {{{") + # Mock subprocess.run: first call passes aws --version, second returns invalid JSON monkeypatch.setattr("subprocess.run", mock_run) importer = CodeCommitImporter() @@ -267,6 +276,8 @@ def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str msg = "aws" raise FileNotFoundError(msg) + # Mock subprocess.run: first call passes aws --version, second raises + # FileNotFoundError to simulate aws binary disappearing mid-session monkeypatch.setattr("subprocess.run", mock_run) importer = CodeCommitImporter() @@ -306,6 +317,8 @@ def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str return _aws_ok(json.dumps({"repositories": repos_data})) return _aws_err(stderr="unknown command") + # Mock subprocess.run: simulate aws --version, list-repositories, and + # batch-get-repositories responses to test the full fetch pipeline monkeypatch.setattr("subprocess.run", mock_run) importer = CodeCommitImporter() options = ImportOptions() @@ -329,6 +342,7 @@ def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str return _aws_ok(json.dumps({"repositories": []})) return _aws_err(stderr="unknown command") + # Mock subprocess.run: simulate aws --version and empty list-repositories response monkeypatch.setattr("subprocess.run", mock_run) importer = CodeCommitImporter() options = ImportOptions() @@ -366,6 +380,8 @@ def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str return _aws_ok(json.dumps({"repositories": repos_data})) return _aws_err(stderr="unknown command") + # Mock subprocess.run: simulate aws --version, list-repositories with + # multiple repos, and batch-get for only the name-filtered subset monkeypatch.setattr("subprocess.run", mock_run) importer = CodeCommitImporter() options = ImportOptions(target="django") @@ -396,6 +412,7 @@ def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str return _aws_ok(json.dumps({"repositories": repos_data})) return _aws_err(stderr="unknown command") + # Mock subprocess.run: simulate full pipeline to verify limit is respected monkeypatch.setattr("subprocess.run", mock_run) importer = CodeCommitImporter() options = ImportOptions(limit=2) @@ -435,6 +452,7 @@ def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str return _aws_ok(json.dumps({"repositories": repos})) return _aws_err(stderr="unknown command") + # Mock subprocess.run: simulate 30 repos to verify batch-get splits at 25 monkeypatch.setattr("subprocess.run", mock_run) importer = CodeCommitImporter() options = ImportOptions(limit=100) @@ -488,6 +506,8 @@ def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str return _aws_ok(json.dumps({"repositories": repos})) return _aws_err(stderr="unknown command") + # Mock subprocess.run: simulate paginated list-repositories with nextToken + # to verify the importer follows pagination tokens across pages monkeypatch.setattr("subprocess.run", mock_run) importer = CodeCommitImporter() options = ImportOptions() @@ -509,6 +529,7 @@ def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str def test_parse_repo_region_from_clone_url(monkeypatch: pytest.MonkeyPatch) -> None: """Test _parse_repo extracts region from clone URL when not set.""" + # Mock subprocess.run: allow CodeCommitImporter construction (aws --version check) monkeypatch.setattr("subprocess.run", lambda cmd, **kw: _aws_ok("aws-cli/2.x")) # No region set — should extract from clone URL @@ -522,6 +543,7 @@ def test_parse_repo_region_from_clone_url(monkeypatch: pytest.MonkeyPatch) -> No def test_parse_repo_region_explicit(monkeypatch: pytest.MonkeyPatch) -> None: """Test _parse_repo uses explicit region when set.""" + # Mock subprocess.run: allow CodeCommitImporter construction (aws --version check) monkeypatch.setattr("subprocess.run", lambda cmd, **kw: _aws_ok("aws-cli/2.x")) importer = CodeCommitImporter(region="eu-central-1") @@ -534,6 +556,7 @@ def test_parse_repo_region_explicit(monkeypatch: pytest.MonkeyPatch) -> None: def test_parse_repo_fallback_region(monkeypatch: pytest.MonkeyPatch) -> None: """Test _parse_repo falls back to us-east-1 when no region info available.""" + # Mock subprocess.run: allow CodeCommitImporter construction (aws --version check) monkeypatch.setattr("subprocess.run", lambda cmd, **kw: _aws_ok("aws-cli/2.x")) importer = CodeCommitImporter(region=None) @@ -551,6 +574,7 @@ def test_parse_repo_fallback_region(monkeypatch: pytest.MonkeyPatch) -> None: def test_parse_repo_fields(monkeypatch: pytest.MonkeyPatch) -> None: """Test _parse_repo maps all fields correctly.""" + # Mock subprocess.run: allow CodeCommitImporter construction (aws --version check) monkeypatch.setattr("subprocess.run", lambda cmd, **kw: _aws_ok("aws-cli/2.x")) importer = CodeCommitImporter(region="us-east-1") @@ -597,6 +621,8 @@ def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str ) ) + # Mock subprocess.run: first call passes aws --version, second returns + # successful sts get-caller-identity to confirm credentials are valid monkeypatch.setattr("subprocess.run", mock_run) importer = CodeCommitImporter() @@ -615,6 +641,8 @@ def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str # sts get-caller-identity fails with credential error return _aws_err(stderr="Unable to locate credentials") + # Mock subprocess.run: first call passes aws --version, second fails + # sts get-caller-identity with credential error to simulate missing credentials monkeypatch.setattr("subprocess.run", mock_run) importer = CodeCommitImporter() diff --git a/tests/_internal/remotes/test_gitlab.py b/tests/_internal/remotes/test_gitlab.py index 90bf7a96a..6a5033d02 100644 --- a/tests/_internal/remotes/test_gitlab.py +++ b/tests/_internal/remotes/test_gitlab.py @@ -290,6 +290,7 @@ def urlopen_capture( captured_urls.append(request.full_url) return MockHTTPResponse(json.dumps(response_json).encode()) + # Mock urlopen: capture request URLs to verify subgroup path encoding monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) importer = GitLabImporter() @@ -338,6 +339,7 @@ def urlopen_capture( captured_urls.append(request.full_url) return MockHTTPResponse(json.dumps(response_json).encode()) + # Mock urlopen: capture request URLs to verify deep nesting path encoding monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) importer = GitLabImporter() @@ -421,6 +423,7 @@ def urlopen_capture( captured_urls.append(request.full_url) return MockHTTPResponse(json.dumps(response_json).encode()) + # Mock urlopen: capture request URLs to verify archived param is omitted monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) importer = GitLabImporter() @@ -463,6 +466,7 @@ def urlopen_capture( captured_urls.append(request.full_url) return MockHTTPResponse(json.dumps(response_json).encode()) + # Mock urlopen: capture request URLs to verify archived=false is included monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) importer = GitLabImporter() @@ -504,6 +508,7 @@ def urlopen_capture( captured_urls.append(request.full_url) return MockHTTPResponse(json.dumps(search_response).encode()) + # Mock urlopen: capture request URLs to verify search archived=false param monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) importer = GitLabImporter(token="test-token") @@ -547,6 +552,7 @@ def urlopen_capture( captured_urls.append(request.full_url) return MockHTTPResponse(json.dumps(search_response).encode()) + # Mock urlopen: capture request URLs to verify archived param is omitted in search monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) importer = GitLabImporter(token="test-token") diff --git a/tests/_internal/remotes/test_pagination_duplicates.py b/tests/_internal/remotes/test_pagination_duplicates.py index dd7f4637f..01164cbea 100644 --- a/tests/_internal/remotes/test_pagination_duplicates.py +++ b/tests/_internal/remotes/test_pagination_duplicates.py @@ -156,6 +156,7 @@ def urlopen_capture( call_count += 1 return MockHTTPResponse(body, headers, status) + # Mock urlopen: capture paginated requests to verify consistent per_page monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) importer = GitHubImporter() @@ -227,6 +228,7 @@ def urlopen_capture( call_count += 1 return MockHTTPResponse(body, headers, status) + # Mock urlopen: capture paginated requests to verify consistent limit param monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) importer = GiteaImporter(base_url="https://codeberg.org") @@ -297,6 +299,7 @@ def urlopen_capture( call_count += 1 return MockHTTPResponse(body, headers, status) + # Mock urlopen: capture paginated requests to verify consistent per_page monkeypatch.setattr("urllib.request.urlopen", urlopen_capture) importer = GitLabImporter() diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index 7632381be..c659996d3 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -385,6 +385,8 @@ def test_resolve_config_file( cfg_path.touch() full_paths.append(cfg_path) + # Mock find_home_config_files: return pre-created config file paths + # instead of scanning the real home directory monkeypatch.setattr( import_repos_mod, "find_home_config_files", @@ -600,6 +602,7 @@ def fetch_repos( raise mock_error yield from mock_repos + # Mock _get_importer: return MockImporter to avoid real API/network calls monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -734,8 +737,9 @@ def test_import_repos_user_abort( workspace.mkdir() config_file = tmp_path / ".vcspull.yaml" - # Mock user input and ensure isatty returns True so we reach input() + # Mock builtins.input: simulate user typing "n" to decline confirmation monkeypatch.setattr("builtins.input", lambda _: "n") + # Mock sys.stdin: fake TTY so the confirmation prompt is shown monkeypatch.setattr( "sys.stdin", type("FakeTTY", (), {"isatty": lambda self: True})() ) @@ -750,6 +754,7 @@ def fetch_repos( ) -> t.Iterator[RemoteRepo]: yield _make_repo("repo1") + # Mock _get_importer: return MockImporter to avoid real API calls monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -800,8 +805,9 @@ def test_import_repos_eoferror_aborts( def raise_eof(_: str) -> str: raise EOFError + # Mock builtins.input: simulate EOFError from piped/closed stdin monkeypatch.setattr("builtins.input", raise_eof) - # Ensure isatty returns True so we reach input() + # Mock sys.stdin: fake TTY so the confirmation prompt path is exercised monkeypatch.setattr( "sys.stdin", type("FakeTTY", (), {"isatty": lambda self: True})() ) @@ -815,6 +821,7 @@ def fetch_repos( ) -> t.Iterator[RemoteRepo]: yield _make_repo("repo1") + # Mock _get_importer: return MockImporter to avoid real API calls monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -861,7 +868,7 @@ def test_import_repos_non_tty_aborts( workspace.mkdir() config_file = tmp_path / ".vcspull.yaml" - # Mock stdin.isatty() to return False + # Mock sys.stdin: fake non-TTY to test non-interactive abort path monkeypatch.setattr( "sys.stdin", type("FakeNonTTY", (), {"isatty": lambda self: False})() ) @@ -875,6 +882,7 @@ def fetch_repos( ) -> t.Iterator[RemoteRepo]: yield _make_repo("repo1") + # Mock _get_importer: return MockImporter to avoid real API calls monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -942,6 +950,7 @@ def fetch_repos( yield _make_repo("repo1") yield _make_repo("repo2") + # Mock _get_importer: return MockImporter with both existing and new repos monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -1012,6 +1021,7 @@ def fetch_repos( ) -> t.Iterator[RemoteRepo]: yield _make_repo("repo1") + # Mock _get_importer: return MockImporter with only already-existing repos monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -1064,6 +1074,7 @@ def fetch_repos( ) -> t.Iterator[RemoteRepo]: yield _make_repo("repo1", stars=50) + # Mock _get_importer: return MockImporter to test JSON output format monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -1123,6 +1134,7 @@ def fetch_repos( yield _make_repo("repo1") yield _make_repo("repo2") + # Mock _get_importer: return MockImporter to test NDJSON output format monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -1184,6 +1196,7 @@ def fetch_repos( received_options.append(options) return iter([]) + # Mock _get_importer: capture ImportOptions to verify filter passthrough monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -1244,6 +1257,7 @@ def fetch_repos( ) -> t.Iterator[RemoteRepo]: yield _make_repo("aws-repo") + # Mock _get_importer: return MockImporter to verify CodeCommit allows empty target monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -1303,6 +1317,7 @@ def fetch_repos( ) -> t.Iterator[RemoteRepo]: yield from many_repos + # Mock _get_importer: return MockImporter with 15 repos to test truncated preview monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -1361,6 +1376,7 @@ def fetch_repos( ) -> t.Iterator[RemoteRepo]: yield _make_repo("repo1") + # Mock _get_importer: return MockImporter to test config load error handling monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -1445,6 +1461,7 @@ def fetch_repos( ) -> t.Iterator[RemoteRepo]: yield _make_repo("myrepo") + # Mock _get_importer: return MockImporter to verify default SSH URL output monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -1506,6 +1523,7 @@ def fetch_repos( ) -> t.Iterator[RemoteRepo]: yield _make_repo("myrepo") + # Mock _get_importer: return MockImporter to verify HTTPS URL output monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -1600,6 +1618,7 @@ def fetch_repos( ) -> t.Iterator[RemoteRepo]: yield _make_repo("repo1") + # Mock _get_importer: return MockImporter to test non-YAML config rejection monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -1655,13 +1674,14 @@ def fetch_repos( ) -> t.Iterator[RemoteRepo]: yield _make_repo("repo1") + # Mock _get_importer: return MockImporter to isolate config resolution logic monkeypatch.setattr( import_repos_mod, "_get_importer", lambda *args, **kwargs: MockImporter(), ) - # Mock _resolve_config_file to raise MultipleConfigWarning + # Mock _resolve_config_file: raise MultipleConfigWarning to test error handling def raise_multiple_config(_: str | None) -> pathlib.Path: raise MultipleConfigWarning(MultipleConfigWarning.message) @@ -1718,6 +1738,7 @@ def fetch_repos( ) -> t.Iterator[RemoteRepo]: yield _make_repo("repo1") + # Mock _get_importer: return MockImporter to test invalid limit error handling monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -1809,6 +1830,7 @@ def fetch_repos( ) -> t.Iterator[RemoteRepo]: yield _make_repo("repo1") + # Mock _get_importer: return MockImporter to test successful exit code monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -1865,6 +1887,7 @@ def fetch_repos( ) -> t.Iterator[RemoteRepo]: yield _make_repo("repo1") + # Mock _get_importer: return MockImporter to test non-dict config rejection monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -2158,6 +2181,7 @@ def fetch_repos( ) -> t.Iterator[RemoteRepo]: return iter([]) + # Mock _get_importer: return MockImporter to test language warning behavior monkeypatch.setattr( import_repos_mod, "_get_importer", @@ -2272,6 +2296,7 @@ def fetch_repos( ) -> t.Iterator[RemoteRepo]: return iter([]) + # Mock _get_importer: return MockImporter to test unsupported filter warnings monkeypatch.setattr( import_repos_mod, "_get_importer", From ceb5628af5bb159e3e4a2f59632da96f5b3108bb Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 17:51:27 -0600 Subject: [PATCH 073/109] docs(CHANGES[import]): Restructure changelog for v1.55.x release why: Headline feature was buried below sub-features, missing user-facing behaviors, and long commands were hard to scan on one line. what: - Reorder: main "New command: vcspull import" entry now leads - Add missing behaviors: confirmation prompt, duplicate skip, exit codes - Add bug fix: atomic config writes via temp-file-then-rename - Split all CLI examples with \ continuation for readability --- CHANGES | 114 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 71 insertions(+), 43 deletions(-) diff --git a/CHANGES b/CHANGES index f58d20a32..96ebd227a 100644 --- a/CHANGES +++ b/CHANGES @@ -35,90 +35,118 @@ _Notes on upcoming releases will be added here_ ### New features -#### `vcspull import`: Default to SSH clone URLs (#510) +#### New command: `vcspull import` (#510) -`vcspull import` now uses SSH clone URLs by default. Use `--https` -to get HTTPS URLs instead: +Import repositories from GitHub, GitLab, Codeberg/Gitea/Forgejo, and AWS +CodeCommit directly into your vcspull configuration. + +Import a user's repositories: ```console -$ vcspull import github torvalds -w ~/repos/linux --mode user +$ vcspull import github torvalds \ + -w ~/repos/linux \ + --mode user ``` +Import an organization's repositories: + ```console -$ vcspull import github torvalds -w ~/repos/linux --mode user --https +$ vcspull import github django \ + -w ~/study/python \ + --mode org ``` -#### `vcspull import`: GitLab subgroups map to workspace roots (#510) +Search and import repositories: -For GitLab organization/group imports, subgroup namespaces are now preserved -under the selected workspace root by default. +```console +$ vcspull import github "machine learning" \ + -w ~/ml-repos \ + --mode search \ + --min-stars 1000 +``` -Example: +Use with self-hosted GitLab: ```console -$ vcspull import gitlab vcs-python-group-test -w ~/projects/python --mode org +$ vcspull import gitlab myuser \ + -w ~/work \ + --url https://gitlab.company.com ``` -This writes repositories into workspace sections like: +Import from AWS CodeCommit: -- `~/projects/python/` -- `~/projects/python//` -- `~/projects/python///` +```console +$ vcspull import codecommit \ + -w ~/work/aws \ + --region us-east-1 +``` -Use `--flatten-groups` to import all subgroup repositories into a single -workspace root: +Preview without writing (dry run): ```console -$ vcspull import gitlab vcs-python-group-test -w ~/projects/python --mode org --flatten-groups +$ vcspull import codeberg user \ + -w ~/oss \ + --dry-run ``` -#### `vcspull import` command for remote repository discovery (#510) - -Import repositories from GitHub, GitLab, Codeberg/Gitea/Forgejo, and AWS -CodeCommit directly into your vcspull configuration. +**Key features:** -Import a user's repositories: +- Service aliases: `gh`, `gl`, `cb`, `cc`, `aws` +- Filtering: `--language`, `--topics`, `--min-stars`, `--archived`, `--forks` +- Output modes: human-readable (default), `--json`, `--ndjson` +- Interactive confirmation before writing; use `--yes`/`-y` to skip +- Repositories already in the config are detected and skipped +- Non-zero exit code on errors (for CI/automation) +- No new dependencies (uses stdlib `urllib` for HTTP) -```console -$ vcspull import github torvalds -w ~/repos/linux --mode user -``` +#### `vcspull import`: SSH clone URLs by default (#510) -Import an organization's repositories: +Clone URLs default to SSH. Use `--https` to get HTTPS URLs instead: ```console -$ vcspull import github django -w ~/study/python --mode org +$ vcspull import github torvalds \ + -w ~/repos/linux \ + --mode user ``` -Search and import repositories: - ```console -$ vcspull import github "machine learning" -w ~/ml-repos --mode search --min-stars 1000 +$ vcspull import github torvalds \ + -w ~/repos/linux \ + --mode user \ + --https ``` -Use with self-hosted GitLab: +#### `vcspull import`: GitLab subgroups map to workspace roots (#510) + +For GitLab organization/group imports, subgroup namespaces are preserved +under the workspace root by default: ```console -$ vcspull import gitlab myuser -w ~/work --url https://gitlab.company.com +$ vcspull import gitlab vcs-python-group-test \ + -w ~/projects/python \ + --mode org ``` -Preview without writing (dry run): +This writes repositories into workspace sections like: -```console -$ vcspull import codeberg user -w ~/oss --dry-run -``` +- `~/projects/python/` +- `~/projects/python//` +- `~/projects/python///` -Import from AWS CodeCommit: +Use `--flatten-groups` to collapse subgroup repositories into a single +workspace root: ```console -$ vcspull import codecommit -w ~/work/aws --region us-east-1 +$ vcspull import gitlab vcs-python-group-test \ + -w ~/projects/python \ + --mode org \ + --flatten-groups ``` -Features: +### Bug fixes -- Service aliases: `gh`, `gl`, `cb`, `cc`, `aws` -- Filtering: `--language`, `--topics`, `--min-stars`, `--archived`, `--forks` -- Output modes: human-readable (default), `--json`, `--ndjson` -- No new dependencies (uses stdlib `urllib` for HTTP) +- Config writes now use atomic temp-file-then-rename to prevent data loss + during interrupted writes (#510) ### Tests From d43aac9ab67ee12d62fc6499b44a3bae3a89d363 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 18:03:44 -0600 Subject: [PATCH 074/109] docs(cli/import[pages]): Add CLI and API documentation for vcspull import why: Every other CLI command has user-facing and API doc pages; import was missing. what: - Add docs/cli/import.md with usage, services, modes, filtering, output - Add docs/api/cli/import.md with automodule for vcspull.cli.import_repos - Add import to toctrees in docs/cli/index.md and docs/api/cli/index.md - Update docs/cli/add.md migration note to distinguish old import (v1.36-v1.39) from new import (v1.55+) and link to cli-import --- docs/api/cli/import.md | 8 ++ docs/api/cli/index.md | 1 + docs/cli/add.md | 20 ++-- docs/cli/import.md | 213 +++++++++++++++++++++++++++++++++++++++++ docs/cli/index.md | 3 +- 5 files changed, 237 insertions(+), 8 deletions(-) create mode 100644 docs/api/cli/import.md create mode 100644 docs/cli/import.md diff --git a/docs/api/cli/import.md b/docs/api/cli/import.md new file mode 100644 index 000000000..34a0643fc --- /dev/null +++ b/docs/api/cli/import.md @@ -0,0 +1,8 @@ +# vcspull import - `vcspull.cli.import_repos` + +```{eval-rst} +.. automodule:: vcspull.cli.import_repos + :members: + :show-inheritance: + :undoc-members: +``` diff --git a/docs/api/cli/index.md b/docs/api/cli/index.md index 8ca2beb71..5093c2fe5 100644 --- a/docs/api/cli/index.md +++ b/docs/api/cli/index.md @@ -10,6 +10,7 @@ sync add +import discover list search diff --git a/docs/cli/add.md b/docs/cli/add.md index df96dc0f8..3b80801b1 100644 --- a/docs/cli/add.md +++ b/docs/cli/add.md @@ -8,8 +8,9 @@ merges duplicate workspace roots by default, and prompts before writing unless you pass `--yes`. ```{note} -This command replaces the manual import functionality from `vcspull import`. -For bulk scanning of existing repositories, see {ref}`cli-discover`. +This command replaces the old `vcspull import ` from v1.36--v1.39. +For bulk scanning of local repositories, see {ref}`cli-discover`. +For bulk import from remote services (GitHub, GitLab, etc.), see {ref}`cli-import`. ``` ## Command @@ -114,10 +115,10 @@ a summary of the merge. Prefer to inspect duplicates yourself? Add 2. Run `vcspull list` to verify the new entry (see {ref}`cli-list`). 3. Run `vcspull sync` to clone or update the working tree (see {ref}`cli-sync`). -## Migration from vcspull import +## Migration from the old vcspull import -If you previously used `vcspull import `, switch to the path-first -workflow: +The `vcspull import ` command from v1.36--v1.39 has been replaced +by `vcspull add`: ```diff - $ vcspull import flask https://github.com/pallets/flask.git -c ~/.vcspull.yaml @@ -126,10 +127,15 @@ workflow: Key differences: -- `vcspull add` now derives the name from the filesystem unless you pass - `--name`. +- `vcspull add` derives the name from the filesystem unless you pass `--name`. - The parent directory becomes the workspace automatically; use `--workspace` to override. - Use `--url` to record a remote when the checkout does not have one. +```{note} +Starting with v1.55, `vcspull import` is a *different* command that bulk-imports +repositories from remote services (GitHub, GitLab, etc.). See {ref}`cli-import` +for details. +``` + [pip vcs url]: https://pip.pypa.io/en/stable/topics/vcs-support/ diff --git a/docs/cli/import.md b/docs/cli/import.md new file mode 100644 index 000000000..a0ea2656a --- /dev/null +++ b/docs/cli/import.md @@ -0,0 +1,213 @@ +(cli-import)= + +# vcspull import + +The `vcspull import` command bulk-imports repositories from remote hosting +services into your vcspull configuration. It connects to the service API, +fetches a list of repositories, and writes them to your config file in a single +step. + +Supported services: **GitHub**, **GitLab**, **Codeberg**, **Gitea**, +**Forgejo**, and **AWS CodeCommit**. + +## Command + +```{eval-rst} +.. argparse:: + :module: vcspull.cli + :func: create_parser + :prog: vcspull + :path: import +``` + +## Basic usage + +Import all repositories for a GitHub user into a workspace: + +```vcspull-console +$ vcspull import github myuser -w ~/code/ +→ Fetching repositories from GitHub... +✓ Found 12 repositories + + project-a [Python] + + project-b [Rust] ★42 + + dotfiles + ... and 9 more +Import 12 repositories to ~/.vcspull.yaml? [y/N]: y +✓ Added 12 repositories to ~/.vcspull.yaml +``` + +## Supported services + +| Service | Aliases | Self-hosted | Auth env var | +|------------|------------------|-------------|--------------------------| +| GitHub | `github`, `gh` | `--url` | `GITHUB_TOKEN` | +| GitLab | `gitlab`, `gl` | `--url` | `GITLAB_TOKEN` | +| Codeberg | `codeberg`, `cb` | No | `CODEBERG_TOKEN` | +| Gitea | `gitea` | `--url` (required) | `GITEA_TOKEN` | +| Forgejo | `forgejo` | `--url` (required) | `FORGEJO_TOKEN` | +| CodeCommit | `codecommit`, `cc`, `aws` | N/A | AWS credentials | + +For Gitea and Forgejo, `--url` is required because there is no default +instance. + +## Import modes + +### User mode (default) + +Fetch all repositories owned by a user: + +```console +$ vcspull import gh myuser -w ~/code/ +``` + +### Organization mode + +Fetch repositories belonging to an organization or group: + +```console +$ vcspull import gh my-org --mode org -w ~/code/ +``` + +For GitLab, subgroups are supported with slash notation: + +```console +$ vcspull import gl my-group/sub-group --mode org -w ~/code/ +``` + +### Search mode + +Search for repositories matching a query: + +```console +$ vcspull import gh django --mode search -w ~/code/ --min-stars 100 +``` + +## Filtering + +Narrow results with filtering flags: + +```console +$ vcspull import gh myuser -w ~/code/ --language python +``` + +```console +$ vcspull import gh myuser -w ~/code/ --topics cli,automation +``` + +```console +$ vcspull import gh django --mode search -w ~/code/ --min-stars 50 +``` + +Include archived or forked repositories (excluded by default): + +```console +$ vcspull import gh myuser -w ~/code/ --archived --forks +``` + +Limit the number of repositories fetched: + +```console +$ vcspull import gh myuser -w ~/code/ --limit 50 +``` + +```{note} +Not all filters work with every service. For example, `--language` may not +return results for GitLab or CodeCommit because those APIs don't expose +language metadata. vcspull warns when a filter is unlikely to work. +``` + +## Output formats + +Human-readable output (default): + +```console +$ vcspull import gh myuser -w ~/code/ +``` + +JSON for automation: + +```console +$ vcspull import gh myuser -w ~/code/ --json +``` + +NDJSON for streaming: + +```console +$ vcspull import gh myuser -w ~/code/ --ndjson +``` + +## Dry runs and confirmation + +Preview what would be imported without writing to the config file: + +```console +$ vcspull import gh myuser -w ~/code/ --dry-run +``` + +Skip the confirmation prompt (useful for scripts): + +```console +$ vcspull import gh myuser -w ~/code/ --yes +``` + +## Configuration file selection + +vcspull writes to `~/.vcspull.yaml` by default. Override with `-f/--file`: + +```console +$ vcspull import gh myuser -w ~/code/ -f ~/configs/github.yaml +``` + +## Protocol selection + +SSH clone URLs are used by default. Switch to HTTPS with `--https`: + +```console +$ vcspull import gh myuser -w ~/code/ --https +``` + +## Group flattening + +When importing a GitLab group with `--mode org`, vcspull preserves subgroup +structure as nested workspace directories by default. Use `--flatten-groups` to +place all repositories directly in the base workspace: + +```console +$ vcspull import gl my-group --mode org -w ~/code/ --flatten-groups +``` + +## AWS CodeCommit + +CodeCommit does not require a target argument. Use `--region` and `--profile` +to select the AWS environment: + +```console +$ vcspull import codecommit -w ~/code/ --region us-east-1 --profile work +``` + +## Self-hosted instances + +Point to a self-hosted GitHub Enterprise, GitLab, Gitea, or Forgejo instance +with `--url`: + +```console +$ vcspull import gitea myuser -w ~/code/ --url https://git.example.com +``` + +## Authentication + +vcspull reads API tokens from environment variables by default. You can also +pass a token directly with `--token`, though environment variables are preferred +for security: + +```console +$ export GITHUB_TOKEN=ghp_... +$ vcspull import gh myuser -w ~/code/ +``` + +## After importing + +1. Run `vcspull fmt --write` to normalize and sort the configuration (see + {ref}`cli-fmt`). +2. Run `vcspull list` to verify the imported entries (see {ref}`cli-list`). +3. Run `vcspull sync` to clone the repositories (see {ref}`cli-sync`). diff --git a/docs/cli/index.md b/docs/cli/index.md index 977483710..d1d571d15 100644 --- a/docs/cli/index.md +++ b/docs/cli/index.md @@ -8,6 +8,7 @@ sync add +import discover list search @@ -36,5 +37,5 @@ completion :nosubcommands: subparser_name : @replace - See :ref:`cli-sync`, :ref:`cli-add`, :ref:`cli-discover`, :ref:`cli-list`, :ref:`cli-search`, :ref:`cli-status`, :ref:`cli-fmt` + See :ref:`cli-sync`, :ref:`cli-add`, :ref:`cli-import`, :ref:`cli-discover`, :ref:`cli-list`, :ref:`cli-search`, :ref:`cli-status`, :ref:`cli-fmt` ``` From 1f4287982130a141e0cfe490d6259e7e5dd4c5be Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 18:15:21 -0600 Subject: [PATCH 075/109] docs(import[docstrings]): Add auth details to importer docstrings why: Users reading API reference docs need to know which tokens, scopes, and env vars each importer requires. what: - Add Notes sections to GitHubImporter, GitLabImporter, GiteaImporter, and CodeCommitImporter __init__ docstrings with token types, required scopes, env var names, and creation URLs - Expand import_repos.py module docstring to list supported services and mention authentication mechanism --- src/vcspull/_internal/remotes/codecommit.py | 9 +++++++++ src/vcspull/_internal/remotes/gitea.py | 13 +++++++++++++ src/vcspull/_internal/remotes/github.py | 11 +++++++++++ src/vcspull/_internal/remotes/gitlab.py | 9 +++++++++ src/vcspull/cli/import_repos.py | 7 ++++++- 5 files changed, 48 insertions(+), 1 deletion(-) diff --git a/src/vcspull/_internal/remotes/codecommit.py b/src/vcspull/_internal/remotes/codecommit.py index ba22856c0..5814cf6bc 100644 --- a/src/vcspull/_internal/remotes/codecommit.py +++ b/src/vcspull/_internal/remotes/codecommit.py @@ -47,6 +47,15 @@ def __init__( AWS region. If not provided, uses AWS CLI default. profile : str | None AWS profile name. If not provided, uses default profile. + + Notes + ----- + Uses AWS CLI credentials (``aws configure``). No token environment + variable is used. IAM policy must include + ``codecommit:ListRepositories`` (resource ``*``) and + ``codecommit:BatchGetRepositories``. + + Requires AWS CLI: ``pip install awscli``. """ self._region = region self._profile = profile diff --git a/src/vcspull/_internal/remotes/gitea.py b/src/vcspull/_internal/remotes/gitea.py index 4f64ae1f7..45b416911 100644 --- a/src/vcspull/_internal/remotes/gitea.py +++ b/src/vcspull/_internal/remotes/gitea.py @@ -53,6 +53,19 @@ def __init__( Base URL for the Gitea instance. Required for generic Gitea. Defaults to Codeberg if not specified. + Notes + ----- + Token lookup is hostname-aware: + + - Codeberg (codeberg.org): ``CODEBERG_TOKEN``, falls back to + ``GITEA_TOKEN`` + - Forgejo (hostname contains "forgejo"): ``FORGEJO_TOKEN``, falls back + to ``GITEA_TOKEN`` + - Other Gitea instances: ``GITEA_TOKEN`` + + Create a scoped token with at least ``read:repository`` permission at + ``https:///user/settings/applications``. + Examples -------- >>> importer = GiteaImporter(token="fake", base_url="https://codeberg.org") diff --git a/src/vcspull/_internal/remotes/github.py b/src/vcspull/_internal/remotes/github.py index ec2e9ed02..ed43addb9 100644 --- a/src/vcspull/_internal/remotes/github.py +++ b/src/vcspull/_internal/remotes/github.py @@ -52,6 +52,17 @@ def __init__( base_url : str | None Base URL for GitHub Enterprise. Defaults to api.github.com. + Notes + ----- + Authentication is optional for public repositories. For private + repositories or higher rate limits, set ``GITHUB_TOKEN`` or ``GH_TOKEN``. + + Classic PAT: no scopes needed for public repos; ``repo`` scope for + private. Fine-grained PAT: "Metadata: Read-only" for public; add + "Contents: Read-only" for private repos. + + Create a token at https://github.com/settings/tokens. + Examples -------- >>> importer = GitHubImporter(token="fake") diff --git a/src/vcspull/_internal/remotes/gitlab.py b/src/vcspull/_internal/remotes/gitlab.py index 5f2d6d15e..aea8c4403 100644 --- a/src/vcspull/_internal/remotes/gitlab.py +++ b/src/vcspull/_internal/remotes/gitlab.py @@ -53,6 +53,15 @@ def __init__( base_url : str | None Base URL for self-hosted GitLab instances. Defaults to gitlab.com. + Notes + ----- + Set ``GITLAB_TOKEN`` or ``GL_TOKEN`` for authentication. A token with + the ``read_api`` scope is the minimum for listing projects. Search mode + **requires** authentication. + + Create a token at + https://gitlab.com/-/user_settings/personal_access_tokens. + Examples -------- >>> importer = GitLabImporter(token="fake") diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py index 82d8b0d93..89ac0424d 100644 --- a/src/vcspull/cli/import_repos.py +++ b/src/vcspull/cli/import_repos.py @@ -1,4 +1,9 @@ -"""Import repositories from remote services for vcspull.""" +"""Import repositories from remote services for vcspull. + +Supports GitHub, GitLab, Codeberg, Gitea, Forgejo, and AWS CodeCommit. +Authentication is via environment variables (``GITHUB_TOKEN``, ``GITLAB_TOKEN``, +etc.) or the ``--token`` CLI flag. See each importer class for required scopes. +""" from __future__ import annotations From 5d5e1413f996400c1fa776a133ba3a53da40d662 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 18:15:28 -0600 Subject: [PATCH 076/109] docs(cli/import[auth]): Expand authentication guide with per-service details why: The existing auth section only showed a single GITHUB_TOKEN example. Users need to know env vars, token types, scopes, and creation URLs for each supported service. what: - Add per-service auth subsections for GitHub, GitLab, Codeberg, Gitea, Forgejo, and AWS CodeCommit with env vars, token types, required scopes, creation URLs, and example commands - Add summary table comparing all services at a glance - Update supported services table to show fallback env vars --- docs/cli/import.md | 108 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 97 insertions(+), 11 deletions(-) diff --git a/docs/cli/import.md b/docs/cli/import.md index a0ea2656a..021d6569f 100644 --- a/docs/cli/import.md +++ b/docs/cli/import.md @@ -38,14 +38,14 @@ Import 12 repositories to ~/.vcspull.yaml? [y/N]: y ## Supported services -| Service | Aliases | Self-hosted | Auth env var | -|------------|------------------|-------------|--------------------------| -| GitHub | `github`, `gh` | `--url` | `GITHUB_TOKEN` | -| GitLab | `gitlab`, `gl` | `--url` | `GITLAB_TOKEN` | -| Codeberg | `codeberg`, `cb` | No | `CODEBERG_TOKEN` | -| Gitea | `gitea` | `--url` (required) | `GITEA_TOKEN` | -| Forgejo | `forgejo` | `--url` (required) | `FORGEJO_TOKEN` | -| CodeCommit | `codecommit`, `cc`, `aws` | N/A | AWS credentials | +| Service | Aliases | Self-hosted | Auth env var(s) | +|------------|------------------|----------------------|-----------------------------------| +| GitHub | `github`, `gh` | `--url` | `GITHUB_TOKEN` / `GH_TOKEN` | +| GitLab | `gitlab`, `gl` | `--url` | `GITLAB_TOKEN` / `GL_TOKEN` | +| Codeberg | `codeberg`, `cb` | No | `CODEBERG_TOKEN` / `GITEA_TOKEN` | +| Gitea | `gitea` | `--url` (required) | `GITEA_TOKEN` | +| Forgejo | `forgejo` | `--url` (required) | `FORGEJO_TOKEN` / `GITEA_TOKEN` | +| CodeCommit | `codecommit`, `cc`, `aws` | N/A | AWS CLI credentials | For Gitea and Forgejo, `--url` is required because there is no default instance. @@ -196,15 +196,101 @@ $ vcspull import gitea myuser -w ~/code/ --url https://git.example.com ## Authentication -vcspull reads API tokens from environment variables by default. You can also -pass a token directly with `--token`, though environment variables are preferred -for security: +vcspull reads API tokens from environment variables. Use `--token` to override. +Environment variables are preferred for security. + +### GitHub + +- **Env vars**: `GITHUB_TOKEN` (primary), `GH_TOKEN` (fallback) +- **Token type**: Personal access token (classic) or fine-grained PAT +- **Permissions**: + - Classic PAT: no scopes needed for public repos; `repo` scope for private + repos; `read:org` for org repos + - Fine-grained PAT: "Metadata: Read-only" for public; add "Contents: + Read-only" for private +- **Create at**: ```console $ export GITHUB_TOKEN=ghp_... $ vcspull import gh myuser -w ~/code/ ``` +### GitLab + +- **Env vars**: `GITLAB_TOKEN` (primary), `GL_TOKEN` (fallback) +- **Token type**: Personal access token +- **Scope**: `read_api` (minimum for listing projects; **required** for search + mode) +- **Create at**: + (self-hosted: `https:///-/user_settings/personal_access_tokens`) + +```console +$ export GITLAB_TOKEN=glpat-... +$ vcspull import gl myuser -w ~/code/ +``` + +### Codeberg + +- **Env vars**: `CODEBERG_TOKEN` (primary), `GITEA_TOKEN` (fallback) +- **Token type**: API token +- **Scope**: no scopes needed for public repos; token required for private repos +- **Create at**: + +```console +$ export CODEBERG_TOKEN=... +$ vcspull import codeberg myuser -w ~/code/ +``` + +### Gitea + +- **Env var**: `GITEA_TOKEN` +- **Token type**: API token with scoped permissions +- **Scope**: `read:repository` (minimum for listing repos) +- **Create at**: `https:///user/settings/applications` + +```console +$ export GITEA_TOKEN=... +$ vcspull import gitea myuser -w ~/code/ --url https://git.example.com +``` + +### Forgejo + +- **Env vars**: `FORGEJO_TOKEN` (primary; matched when hostname contains + "forgejo"), `GITEA_TOKEN` (fallback) +- **Token type**: API token +- **Scope**: `read:repository` +- **Create at**: `https:///user/settings/applications` + +```console +$ export FORGEJO_TOKEN=... +$ vcspull import forgejo myuser -w ~/code/ --url https://forgejo.example.com +``` + +### AWS CodeCommit + +- **Auth**: AWS CLI credentials (`aws configure`) — no token env var +- **CLI args**: `--region`, `--profile` +- **IAM permissions required**: + - `codecommit:ListRepositories` (resource: `*`) + - `codecommit:BatchGetRepositories` (resource: repo ARNs or `*`) +- **Dependency**: AWS CLI must be installed (`pip install awscli`) + +```console +$ aws configure +$ vcspull import codecommit -w ~/code/ --region us-east-1 +``` + +### Summary + +| Service | Env var(s) | Token type | Min scope / permissions | +|------------|----------------------------------|-----------------------|------------------------------------------------------------------| +| GitHub | `GITHUB_TOKEN` / `GH_TOKEN` | PAT (classic or fine) | None (public), `repo` (private) | +| GitLab | `GITLAB_TOKEN` / `GL_TOKEN` | PAT | `read_api` | +| Codeberg | `CODEBERG_TOKEN` / `GITEA_TOKEN` | API token | None (public), any token (private) | +| Gitea | `GITEA_TOKEN` | API token | `read:repository` | +| Forgejo | `FORGEJO_TOKEN` / `GITEA_TOKEN` | API token | `read:repository` | +| CodeCommit | AWS CLI credentials | IAM access key | `codecommit:ListRepositories`, `codecommit:BatchGetRepositories` | + ## After importing 1. Run `vcspull fmt --write` to normalize and sort the configuration (see From c0a054d2edb6c682e9c124c5868189acdd8f0e81 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 18:18:45 -0600 Subject: [PATCH 077/109] docs(cli/import[help]): Protect --flatten-groups with backticks in description why: RST smart typography converts bare -- to an en-dash in Sphinx docs. what: - Wrap --flatten-groups in double backticks to render as inline literal --- src/vcspull/cli/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vcspull/cli/__init__.py b/src/vcspull/cli/__init__.py index ba5886320..0c5cf0e96 100644 --- a/src/vcspull/cli/__init__.py +++ b/src/vcspull/cli/__init__.py @@ -252,7 +252,7 @@ def build_description( For GitLab, you can specify subgroups using slash notation (e.g., parent/child). In org mode, subgroup paths are preserved under the workspace root by - default; use --flatten-groups to collapse them into a single workspace. + default; use ``--flatten-groups`` to collapse them into a single workspace. """, ( ( From 443f6473d34d068814853aac71674a1b626f2db6 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 18:49:10 -0600 Subject: [PATCH 078/109] refactor(cli/import[common]): Extract shared parsers and _run_import to import_cmd/_common why: Preparing for per-service subparsers by extracting shared infrastructure into its own module. This is the foundation that all service handlers will delegate to. what: - Create import_cmd package with _common.py containing parent parser factories - Extract _resolve_config_file and _run_import from import_repos.py - Define Importer Protocol for structural typing of service importers - Parent parsers: shared (workspace/filtering/output), token, mode, target --- src/vcspull/cli/import_cmd/__init__.py | 38 ++ src/vcspull/cli/import_cmd/_common.py | 627 +++++++++++++++++++++++++ 2 files changed, 665 insertions(+) create mode 100644 src/vcspull/cli/import_cmd/__init__.py create mode 100644 src/vcspull/cli/import_cmd/_common.py diff --git a/src/vcspull/cli/import_cmd/__init__.py b/src/vcspull/cli/import_cmd/__init__.py new file mode 100644 index 000000000..d3c1dd9bf --- /dev/null +++ b/src/vcspull/cli/import_cmd/__init__.py @@ -0,0 +1,38 @@ +"""``vcspull import`` subcommand package. + +Each supported service (GitHub, GitLab, Codeberg, Gitea, Forgejo, +CodeCommit) is registered as a proper argparse subcommand so that +``vcspull import --help`` shows only the flags relevant to +that service. +""" + +from __future__ import annotations + +import argparse + +from .codeberg import create_codeberg_subparser +from .codecommit import create_codecommit_subparser +from .forgejo import create_forgejo_subparser +from .gitea import create_gitea_subparser +from .github import create_github_subparser +from .gitlab import create_gitlab_subparser + +__all__ = ["create_import_subparser"] + + +def create_import_subparser(parser: argparse.ArgumentParser) -> None: + """Wire per-service subparsers into the ``vcspull import`` parser. + + Parameters + ---------- + parser : argparse.ArgumentParser + The ``import`` parser to attach service subcommands to. + """ + service_subparsers = parser.add_subparsers(dest="import_service") + + create_github_subparser(service_subparsers) + create_gitlab_subparser(service_subparsers) + create_codeberg_subparser(service_subparsers) + create_gitea_subparser(service_subparsers) + create_forgejo_subparser(service_subparsers) + create_codecommit_subparser(service_subparsers) diff --git a/src/vcspull/cli/import_cmd/_common.py b/src/vcspull/cli/import_cmd/_common.py new file mode 100644 index 000000000..32bc64426 --- /dev/null +++ b/src/vcspull/cli/import_cmd/_common.py @@ -0,0 +1,627 @@ +"""Shared infrastructure for the ``vcspull import`` subcommand tree. + +Provides parent argparse parsers (for flag composition via ``parents=[]``) +and the ``_run_import()`` function that all per-service handlers delegate to. +""" + +from __future__ import annotations + +import argparse +import logging +import pathlib +import sys +import typing as t + +from vcspull._internal.private_path import PrivatePath +from vcspull._internal.remotes import ( + AuthenticationError, + ConfigurationError, + ImportMode, + ImportOptions, + NotFoundError, + RateLimitError, + RemoteImportError, + RemoteRepo, + ServiceUnavailableError, +) +from vcspull.config import ( + find_home_config_files, + save_config_yaml, + workspace_root_label, +) +from vcspull.exc import MultipleConfigWarning + +from .._colors import Colors, get_color_mode +from .._output import OutputFormatter, get_output_mode + +log = logging.getLogger(__name__) + + +class Importer(t.Protocol): + """Structural type for any remote service importer.""" + + service_name: str + + def fetch_repos(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: + """Yield repositories matching *options*.""" + ... + + +# --------------------------------------------------------------------------- +# Parent parser factories +# --------------------------------------------------------------------------- + + +def _create_shared_parent() -> argparse.ArgumentParser: + """Create parent parser with workspace, filtering, and output flags. + + Returns + ------- + argparse.ArgumentParser + Parent parser (``add_help=False``) carrying flags shared by all + import service subcommands. + """ + parent = argparse.ArgumentParser(add_help=False) + parent.add_argument( + "-w", + "--workspace", + dest="workspace", + metavar="DIR", + default=None, + help="Workspace root directory (REQUIRED)", + ) + + # Filtering options + filter_group = parent.add_argument_group("filtering") + filter_group.add_argument( + "-l", + "--language", + dest="language", + metavar="LANG", + help="Filter by programming language", + ) + filter_group.add_argument( + "--topics", + dest="topics", + metavar="TOPICS", + help="Filter by topics (comma-separated)", + ) + filter_group.add_argument( + "--min-stars", + dest="min_stars", + type=int, + default=0, + metavar="N", + help="Minimum stars (for search mode)", + ) + filter_group.add_argument( + "--archived", + dest="include_archived", + action="store_true", + help="Include archived repositories", + ) + filter_group.add_argument( + "--forks", + dest="include_forks", + action="store_true", + help="Include forked repositories", + ) + filter_group.add_argument( + "--limit", + dest="limit", + type=int, + default=100, + metavar="N", + help="Maximum repositories to fetch (default: 100)", + ) + + # Output options + output_group = parent.add_argument_group("output") + output_group.add_argument( + "-f", + "--file", + dest="config", + metavar="FILE", + help="Config file to write to (default: ~/.vcspull.yaml)", + ) + output_group.add_argument( + "--dry-run", + "-n", + action="store_true", + help="Preview without writing to config file", + ) + output_group.add_argument( + "--yes", + "-y", + action="store_true", + help="Skip confirmation prompt", + ) + output_group.add_argument( + "--json", + action="store_true", + dest="output_json", + help="Output as JSON", + ) + output_group.add_argument( + "--ndjson", + action="store_true", + dest="output_ndjson", + help="Output as NDJSON (one JSON per line)", + ) + output_group.add_argument( + "--https", + action="store_true", + dest="use_https", + help="Use HTTPS clone URLs instead of SSH (default: SSH)", + ) + output_group.add_argument( + "--color", + choices=["auto", "always", "never"], + default="auto", + help="When to use colors (default: auto)", + ) + return parent + + +def _create_token_parent() -> argparse.ArgumentParser: + """Create parent parser with the ``--token`` flag. + + Returns + ------- + argparse.ArgumentParser + Parent parser carrying ``--token``. + """ + parent = argparse.ArgumentParser(add_help=False) + parent.add_argument( + "--token", + dest="token", + metavar="TOKEN", + help="API token (overrides env var; prefer env var for security)", + ) + return parent + + +def _create_mode_parent() -> argparse.ArgumentParser: + """Create parent parser with the ``-m/--mode`` flag. + + Returns + ------- + argparse.ArgumentParser + Parent parser carrying ``-m/--mode``. + """ + parent = argparse.ArgumentParser(add_help=False) + parent.add_argument( + "-m", + "--mode", + dest="mode", + choices=["user", "org", "search"], + default="user", + help="Import mode: user (default), org, or search", + ) + return parent + + +def _create_target_parent() -> argparse.ArgumentParser: + """Create parent parser with the required ``target`` positional. + + Returns + ------- + argparse.ArgumentParser + Parent parser carrying the ``target`` positional argument. + """ + parent = argparse.ArgumentParser(add_help=False) + parent.add_argument( + "target", + metavar="TARGET", + help=( + "User, org name, or search query. " + "For GitLab, supports subgroups with slash notation (e.g., parent/child)." + ), + ) + return parent + + +# --------------------------------------------------------------------------- +# Config resolution +# --------------------------------------------------------------------------- + + +def _resolve_config_file(config_path_str: str | None) -> pathlib.Path: + """Resolve the config file path. + + Parameters + ---------- + config_path_str : str | None + Config file path from user, or None for default + + Returns + ------- + pathlib.Path + Resolved config file path + """ + if config_path_str: + path = pathlib.Path(config_path_str).expanduser().resolve() + if path.suffix.lower() not in {".yaml", ".yml"}: + msg = f"Only YAML config files are supported, got: {path.suffix}" + raise ValueError(msg) + return path + + home_configs = find_home_config_files(filetype=["yaml"]) + if home_configs: + return home_configs[0] + + return pathlib.Path.home() / ".vcspull.yaml" + + +# --------------------------------------------------------------------------- +# Core import logic +# --------------------------------------------------------------------------- + + +def _run_import( + importer: Importer, + *, + service_name: str, + target: str, + workspace: str, + mode: str, + language: str | None, + topics: str | None, + min_stars: int, + include_archived: bool, + include_forks: bool, + limit: int, + config_path_str: str | None, + dry_run: bool, + yes: bool, + output_json: bool, + output_ndjson: bool, + color: str, + use_https: bool = False, + flatten_groups: bool = False, +) -> int: + """Run the import workflow for a single service. + + This is the core fetch / preview / confirm / write logic shared by every + per-service handler. The caller is responsible for constructing the + *importer* instance; this function only orchestrates the import flow. + + Parameters + ---------- + importer : Importer + Already-constructed importer instance (any object satisfying + the :class:`Importer` protocol) + service_name : str + Canonical service name (e.g. ``"github"``, ``"gitlab"``, ``"codecommit"``) + target : str + User, org, or search query + workspace : str + Workspace root directory + mode : str + Import mode (user, org, search) + language : str | None + Language filter + topics : str | None + Topics filter (comma-separated) + min_stars : int + Minimum stars filter + include_archived : bool + Include archived repositories + include_forks : bool + Include forked repositories + limit : int + Maximum repositories to fetch + config_path_str : str | None + Config file path + dry_run : bool + Preview without writing + yes : bool + Skip confirmation + output_json : bool + Output as JSON + output_ndjson : bool + Output as NDJSON + color : str + Color mode + use_https : bool + Use HTTPS clone URLs instead of SSH (default: False, i.e., SSH) + flatten_groups : bool + For GitLab org imports, flatten subgroup paths into base workspace + + Returns + ------- + int + 0 on success, 1 on error + """ + output_mode = get_output_mode(output_json, output_ndjson) + formatter = OutputFormatter(output_mode) + colors = Colors(get_color_mode(color)) + + # Build import options + import_mode = ImportMode(mode) + topic_list = ( + [topic.strip() for topic in topics.split(",") if topic.strip()] + if topics + else [] + ) + + try: + options = ImportOptions( + mode=import_mode, + target=target, + include_forks=include_forks, + include_archived=include_archived, + language=language, + topics=topic_list, + min_stars=min_stars, + limit=limit, + ) + except ValueError as exc_: + log.error("%s %s", colors.error("✗"), exc_) # noqa: TRY400 + return 1 + + # Warn if --language is used with services that don't return language info + if options.language and service_name in ("gitlab", "codecommit"): + log.warning( + "%s %s does not return language metadata; " + "--language filter may exclude all results", + colors.warning("!"), + importer.service_name, + ) + if options.topics and service_name == "codecommit": + log.warning( + "%s %s does not support topic filtering; " + "--topics filter may exclude all results", + colors.warning("!"), + importer.service_name, + ) + if options.min_stars > 0 and service_name == "codecommit": + log.warning( + "%s %s does not track star counts; " + "--min-stars filter may exclude all results", + colors.warning("!"), + importer.service_name, + ) + + # Resolve workspace path + workspace_path = pathlib.Path(workspace).expanduser().resolve() + cwd = pathlib.Path.cwd() + home = pathlib.Path.home() + + # Resolve config file + try: + config_file_path = _resolve_config_file(config_path_str) + except (ValueError, MultipleConfigWarning) as exc_: + log.error("%s %s", colors.error("✗"), exc_) # noqa: TRY400 + return 1 + display_config_path = str(PrivatePath(config_file_path)) + + # Fetch repositories + if output_mode.value == "human": + log.info( + "%s Fetching repositories from %s...", + colors.info("→"), + colors.highlight(importer.service_name), + ) + + repos: list[RemoteRepo] = [] + try: + for repo in importer.fetch_repos(options): + repos.append(repo) + + # Emit for JSON/NDJSON output + formatter.emit(repo.to_dict()) + + # Log progress for human output + if output_mode.value == "human" and len(repos) % 10 == 0: + log.info( + "%s Fetched %s repositories...", + colors.muted("•"), + colors.info(str(len(repos))), + ) + + except AuthenticationError as exc: + log.error( # noqa: TRY400 + "%s Authentication error: %s", colors.error("✗"), exc + ) + formatter.finalize() + return 1 + except RateLimitError as exc: + log.error( # noqa: TRY400 + "%s Rate limit exceeded: %s", colors.error("✗"), exc + ) + formatter.finalize() + return 1 + except NotFoundError as exc: + log.error("%s Not found: %s", colors.error("✗"), exc) # noqa: TRY400 + formatter.finalize() + return 1 + except ServiceUnavailableError as exc: + log.error( # noqa: TRY400 + "%s Service unavailable: %s", colors.error("✗"), exc + ) + formatter.finalize() + return 1 + except ConfigurationError as exc: + log.error( # noqa: TRY400 + "%s Configuration error: %s", colors.error("✗"), exc + ) + formatter.finalize() + return 1 + except RemoteImportError as exc: + log.error("%s Error: %s", colors.error("✗"), exc) # noqa: TRY400 + formatter.finalize() + return 1 + + if not repos: + if output_mode.value == "human": + log.info( + "%s No repositories found matching criteria.", + colors.warning("!"), + ) + formatter.finalize() + return 0 + + if output_mode.value == "human": + log.info( + "\n%s Found %s repositories", + colors.success("✓"), + colors.info(str(len(repos))), + ) + + # Show preview in human mode + if output_mode.value == "human": + for repo in repos[:10]: # Show first 10 + stars_str = f" ★{repo.stars}" if repo.stars > 0 else "" + lang_str = f" [{repo.language}]" if repo.language else "" + log.info( + " %s %s%s%s", + colors.success("+"), + colors.info(repo.name), + colors.muted(lang_str), + colors.muted(stars_str), + ) + if len(repos) > 10: + log.info( + " %s and %s more", + colors.muted("..."), + colors.info(str(len(repos) - 10)), + ) + + formatter.finalize() + + # Handle dry-run + if dry_run: + log.info( + "\n%s Dry run complete. Would write to %s", + colors.warning("→"), + colors.muted(display_config_path), + ) + return 0 + + # Confirm with user + if not yes and output_mode.value == "human": + if not sys.stdin.isatty(): + log.info( + "%s Non-interactive mode: use --yes to skip confirmation.", + colors.error("✗"), + ) + return 0 + try: + confirm = input( + f"\n{colors.info('Import')} {len(repos)} repositories to " + f"{display_config_path}? [y/N]: ", + ).lower() + except EOFError: + confirm = "" + if confirm not in {"y", "yes"}: + log.info("%s Aborted by user.", colors.error("✗")) + return 0 + + # Load existing config or create new + raw_config: dict[str, t.Any] + if config_file_path.exists(): + import yaml + + try: + with config_file_path.open() as f: + raw_config = yaml.safe_load(f) or {} + except (yaml.YAMLError, OSError): + log.exception("Error loading config file") + return 1 + + if not isinstance(raw_config, dict): + log.error( + "%s Config file is not a valid YAML mapping: %s", + colors.error("✗"), + display_config_path, + ) + return 1 + else: + raw_config = {} + + # Add repositories to config + checked_labels: set[str] = set() + added_count = 0 + skipped_count = 0 + + for repo in repos: + # Determine workspace for this repo + repo_workspace_path = workspace_path + + preserve_group_structure = ( + service_name == "gitlab" + and options.mode == ImportMode.ORG + and not flatten_groups + ) + if preserve_group_structure and repo.owner.startswith(options.target): + # Check if it is a subdirectory + if repo.owner == options.target: + subpath = "" + elif repo.owner.startswith(options.target + "/"): + subpath = repo.owner[len(options.target) + 1 :] + else: + subpath = "" + + if subpath: + repo_workspace_path = workspace_path / subpath + + repo_workspace_label = workspace_root_label( + repo_workspace_path, cwd=cwd, home=home + ) + + if repo_workspace_label not in checked_labels: + if repo_workspace_label in raw_config and not isinstance( + raw_config[repo_workspace_label], dict + ): + log.error( + "%s Workspace section '%s' is not a mapping in config", + colors.error("✗"), + repo_workspace_label, + ) + checked_labels.add(repo_workspace_label) + + if repo_workspace_label in raw_config and not isinstance( + raw_config[repo_workspace_label], dict + ): + continue + + if repo_workspace_label not in raw_config: + raw_config[repo_workspace_label] = {} + + if repo.name in raw_config[repo_workspace_label]: + skipped_count += 1 + continue + + raw_config[repo_workspace_label][repo.name] = { + "repo": repo.to_vcspull_url(use_ssh=not use_https), + } + added_count += 1 + + if added_count == 0: + log.info( + "%s All repositories already exist in config. Nothing to add.", + colors.success("✓"), + ) + return 0 + + # Save config + try: + save_config_yaml(config_file_path, raw_config) + log.info( + "%s Added %s repositories to %s", + colors.success("✓"), + colors.info(str(added_count)), + colors.muted(display_config_path), + ) + if skipped_count > 0: + log.info( + "%s Skipped %s existing repositories", + colors.warning("!"), + colors.info(str(skipped_count)), + ) + except OSError: + log.exception("Error saving config to %s", display_config_path) + return 1 + + return 0 From 1df8882f67c1793160cd9658d57400a0afb8704c Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 18:49:18 -0600 Subject: [PATCH 079/109] refactor(cli/import[services]): Add per-service subparser modules why: Each service now has its own module with only the flags it supports, making --help self-documenting per service. what: - github.py: --url (optional, GHE), aliases [gh] - gitlab.py: --url (optional), --flatten-groups, aliases [gl] - codeberg.py: fixed base_url, aliases [cb] - gitea.py: --url (required), no aliases - forgejo.py: --url (required), no aliases - codecommit.py: --region, --profile, optional target, aliases [cc, aws] --- src/vcspull/cli/import_cmd/codeberg.py | 87 +++++++++++++++++++ src/vcspull/cli/import_cmd/codecommit.py | 104 +++++++++++++++++++++++ src/vcspull/cli/import_cmd/forgejo.py | 94 ++++++++++++++++++++ src/vcspull/cli/import_cmd/gitea.py | 94 ++++++++++++++++++++ src/vcspull/cli/import_cmd/github.py | 96 +++++++++++++++++++++ src/vcspull/cli/import_cmd/gitlab.py | 104 +++++++++++++++++++++++ 6 files changed, 579 insertions(+) create mode 100644 src/vcspull/cli/import_cmd/codeberg.py create mode 100644 src/vcspull/cli/import_cmd/codecommit.py create mode 100644 src/vcspull/cli/import_cmd/forgejo.py create mode 100644 src/vcspull/cli/import_cmd/gitea.py create mode 100644 src/vcspull/cli/import_cmd/github.py create mode 100644 src/vcspull/cli/import_cmd/gitlab.py diff --git a/src/vcspull/cli/import_cmd/codeberg.py b/src/vcspull/cli/import_cmd/codeberg.py new file mode 100644 index 000000000..542ce0dfc --- /dev/null +++ b/src/vcspull/cli/import_cmd/codeberg.py @@ -0,0 +1,87 @@ +"""``vcspull import codeberg`` subcommand.""" + +from __future__ import annotations + +import argparse +import logging + +from vcspull._internal.remotes import GiteaImporter + +from .._formatter import VcspullHelpFormatter +from ._common import ( + _create_mode_parent, + _create_shared_parent, + _create_target_parent, + _create_token_parent, + _run_import, +) + +log = logging.getLogger(__name__) + + +def create_codeberg_subparser( + subparsers: argparse._SubParsersAction[argparse.ArgumentParser], +) -> None: + """Register the ``codeberg`` (alias ``cb``) service subcommand. + + Parameters + ---------- + subparsers : argparse._SubParsersAction + The subparsers action from the ``import`` parser. + """ + subparsers.add_parser( + "codeberg", + aliases=["cb"], + help="import from Codeberg", + parents=[ + _create_shared_parent(), + _create_token_parent(), + _create_mode_parent(), + _create_target_parent(), + ], + formatter_class=VcspullHelpFormatter, + description="Import repositories from Codeberg.", + ).set_defaults(import_handler=handle_codeberg) + + +def handle_codeberg(args: argparse.Namespace) -> int: + """Handle ``vcspull import codeberg``. + + Parameters + ---------- + args : argparse.Namespace + Parsed CLI arguments. + + Returns + ------- + int + Exit code (0 = success). + """ + if args.workspace is None: + msg = "-w/--workspace is required" + raise SystemExit(msg) + + importer = GiteaImporter( + token=getattr(args, "token", None), + base_url="https://codeberg.org", + ) + return _run_import( + importer, + service_name="codeberg", + target=args.target, + workspace=args.workspace, + mode=args.mode, + language=getattr(args, "language", None), + topics=getattr(args, "topics", None), + min_stars=getattr(args, "min_stars", 0), + include_archived=getattr(args, "include_archived", False), + include_forks=getattr(args, "include_forks", False), + limit=getattr(args, "limit", 100), + config_path_str=getattr(args, "config", None), + dry_run=getattr(args, "dry_run", False), + yes=getattr(args, "yes", False), + output_json=getattr(args, "output_json", False), + output_ndjson=getattr(args, "output_ndjson", False), + color=getattr(args, "color", "auto"), + use_https=getattr(args, "use_https", False), + ) diff --git a/src/vcspull/cli/import_cmd/codecommit.py b/src/vcspull/cli/import_cmd/codecommit.py new file mode 100644 index 000000000..1dd343079 --- /dev/null +++ b/src/vcspull/cli/import_cmd/codecommit.py @@ -0,0 +1,104 @@ +"""``vcspull import codecommit`` subcommand.""" + +from __future__ import annotations + +import argparse +import logging + +from vcspull._internal.remotes import CodeCommitImporter, DependencyError + +from .._colors import Colors, get_color_mode +from .._formatter import VcspullHelpFormatter +from ._common import _create_shared_parent, _run_import + +log = logging.getLogger(__name__) + + +def create_codecommit_subparser( + subparsers: argparse._SubParsersAction[argparse.ArgumentParser], +) -> None: + """Register the ``codecommit`` (aliases ``cc``, ``aws``) service subcommand. + + Parameters + ---------- + subparsers : argparse._SubParsersAction + The subparsers action from the ``import`` parser. + """ + parser = subparsers.add_parser( + "codecommit", + aliases=["cc", "aws"], + help="import from AWS CodeCommit", + parents=[_create_shared_parent()], + formatter_class=VcspullHelpFormatter, + description="Import repositories from AWS CodeCommit.", + ) + parser.add_argument( + "target", + metavar="TARGET", + nargs="?", + default="", + help="Optional filter prefix for repository names", + ) + parser.add_argument( + "--region", + dest="region", + metavar="REGION", + help="AWS region for CodeCommit", + ) + parser.add_argument( + "--profile", + dest="profile", + metavar="PROFILE", + help="AWS profile for CodeCommit", + ) + parser.set_defaults(import_handler=handle_codecommit) + + +def handle_codecommit(args: argparse.Namespace) -> int: + """Handle ``vcspull import codecommit``. + + Parameters + ---------- + args : argparse.Namespace + Parsed CLI arguments. + + Returns + ------- + int + Exit code (0 = success). + """ + if args.workspace is None: + msg = "-w/--workspace is required" + raise SystemExit(msg) + + colors = Colors(get_color_mode(getattr(args, "color", "auto"))) + + try: + importer = CodeCommitImporter( + region=getattr(args, "region", None), + profile=getattr(args, "profile", None), + ) + except DependencyError as exc: + log.error("%s %s", colors.error("\u2717"), exc) # noqa: TRY400 + return 1 + + return _run_import( + importer, + service_name="codecommit", + target=getattr(args, "target", "") or "", + workspace=args.workspace, + mode="user", + language=getattr(args, "language", None), + topics=getattr(args, "topics", None), + min_stars=getattr(args, "min_stars", 0), + include_archived=getattr(args, "include_archived", False), + include_forks=getattr(args, "include_forks", False), + limit=getattr(args, "limit", 100), + config_path_str=getattr(args, "config", None), + dry_run=getattr(args, "dry_run", False), + yes=getattr(args, "yes", False), + output_json=getattr(args, "output_json", False), + output_ndjson=getattr(args, "output_ndjson", False), + color=getattr(args, "color", "auto"), + use_https=getattr(args, "use_https", False), + ) diff --git a/src/vcspull/cli/import_cmd/forgejo.py b/src/vcspull/cli/import_cmd/forgejo.py new file mode 100644 index 000000000..580ce31d5 --- /dev/null +++ b/src/vcspull/cli/import_cmd/forgejo.py @@ -0,0 +1,94 @@ +"""``vcspull import forgejo`` subcommand.""" + +from __future__ import annotations + +import argparse +import logging + +from vcspull._internal.remotes import GiteaImporter + +from .._formatter import VcspullHelpFormatter +from ._common import ( + _create_mode_parent, + _create_shared_parent, + _create_target_parent, + _create_token_parent, + _run_import, +) + +log = logging.getLogger(__name__) + + +def create_forgejo_subparser( + subparsers: argparse._SubParsersAction[argparse.ArgumentParser], +) -> None: + """Register the ``forgejo`` service subcommand. + + Parameters + ---------- + subparsers : argparse._SubParsersAction + The subparsers action from the ``import`` parser. + """ + parser = subparsers.add_parser( + "forgejo", + help="import from a Forgejo instance", + parents=[ + _create_shared_parent(), + _create_token_parent(), + _create_mode_parent(), + _create_target_parent(), + ], + formatter_class=VcspullHelpFormatter, + description="Import repositories from a Forgejo instance.", + ) + parser.add_argument( + "--url", + dest="base_url", + metavar="URL", + required=True, + help="Base URL of the Forgejo instance (required)", + ) + parser.set_defaults(import_handler=handle_forgejo) + + +def handle_forgejo(args: argparse.Namespace) -> int: + """Handle ``vcspull import forgejo``. + + Parameters + ---------- + args : argparse.Namespace + Parsed CLI arguments. + + Returns + ------- + int + Exit code (0 = success). + """ + if args.workspace is None: + msg = "-w/--workspace is required" + raise SystemExit(msg) + + importer = GiteaImporter( + token=getattr(args, "token", None), + base_url=args.base_url, + ) + return _run_import( + importer, + service_name="forgejo", + target=args.target, + workspace=args.workspace, + mode=args.mode, + language=getattr(args, "language", None), + topics=getattr(args, "topics", None), + min_stars=getattr(args, "min_stars", 0), + include_archived=getattr(args, "include_archived", False), + include_forks=getattr(args, "include_forks", False), + limit=getattr(args, "limit", 100), + config_path_str=getattr(args, "config", None), + dry_run=getattr(args, "dry_run", False), + yes=getattr(args, "yes", False), + output_json=getattr(args, "output_json", False), + output_ndjson=getattr(args, "output_ndjson", False), + color=getattr(args, "color", "auto"), + use_https=getattr(args, "use_https", False), + ) diff --git a/src/vcspull/cli/import_cmd/gitea.py b/src/vcspull/cli/import_cmd/gitea.py new file mode 100644 index 000000000..6bf2039a8 --- /dev/null +++ b/src/vcspull/cli/import_cmd/gitea.py @@ -0,0 +1,94 @@ +"""``vcspull import gitea`` subcommand.""" + +from __future__ import annotations + +import argparse +import logging + +from vcspull._internal.remotes import GiteaImporter + +from .._formatter import VcspullHelpFormatter +from ._common import ( + _create_mode_parent, + _create_shared_parent, + _create_target_parent, + _create_token_parent, + _run_import, +) + +log = logging.getLogger(__name__) + + +def create_gitea_subparser( + subparsers: argparse._SubParsersAction[argparse.ArgumentParser], +) -> None: + """Register the ``gitea`` service subcommand. + + Parameters + ---------- + subparsers : argparse._SubParsersAction + The subparsers action from the ``import`` parser. + """ + parser = subparsers.add_parser( + "gitea", + help="import from a Gitea instance", + parents=[ + _create_shared_parent(), + _create_token_parent(), + _create_mode_parent(), + _create_target_parent(), + ], + formatter_class=VcspullHelpFormatter, + description="Import repositories from a Gitea instance.", + ) + parser.add_argument( + "--url", + dest="base_url", + metavar="URL", + required=True, + help="Base URL of the Gitea instance (required)", + ) + parser.set_defaults(import_handler=handle_gitea) + + +def handle_gitea(args: argparse.Namespace) -> int: + """Handle ``vcspull import gitea``. + + Parameters + ---------- + args : argparse.Namespace + Parsed CLI arguments. + + Returns + ------- + int + Exit code (0 = success). + """ + if args.workspace is None: + msg = "-w/--workspace is required" + raise SystemExit(msg) + + importer = GiteaImporter( + token=getattr(args, "token", None), + base_url=args.base_url, + ) + return _run_import( + importer, + service_name="gitea", + target=args.target, + workspace=args.workspace, + mode=args.mode, + language=getattr(args, "language", None), + topics=getattr(args, "topics", None), + min_stars=getattr(args, "min_stars", 0), + include_archived=getattr(args, "include_archived", False), + include_forks=getattr(args, "include_forks", False), + limit=getattr(args, "limit", 100), + config_path_str=getattr(args, "config", None), + dry_run=getattr(args, "dry_run", False), + yes=getattr(args, "yes", False), + output_json=getattr(args, "output_json", False), + output_ndjson=getattr(args, "output_ndjson", False), + color=getattr(args, "color", "auto"), + use_https=getattr(args, "use_https", False), + ) diff --git a/src/vcspull/cli/import_cmd/github.py b/src/vcspull/cli/import_cmd/github.py new file mode 100644 index 000000000..f90dc4cc6 --- /dev/null +++ b/src/vcspull/cli/import_cmd/github.py @@ -0,0 +1,96 @@ +"""``vcspull import github`` subcommand.""" + +from __future__ import annotations + +import argparse +import logging + +from vcspull._internal.remotes import GitHubImporter + +from .._formatter import VcspullHelpFormatter +from ._common import ( + _create_mode_parent, + _create_shared_parent, + _create_target_parent, + _create_token_parent, + _run_import, +) + +log = logging.getLogger(__name__) + + +def create_github_subparser( + subparsers: argparse._SubParsersAction[argparse.ArgumentParser], +) -> None: + """Register the ``github`` (alias ``gh``) service subcommand. + + Parameters + ---------- + subparsers : argparse._SubParsersAction + The subparsers action from the ``import`` parser. + """ + parser = subparsers.add_parser( + "github", + aliases=["gh"], + help="import from GitHub", + parents=[ + _create_shared_parent(), + _create_token_parent(), + _create_mode_parent(), + _create_target_parent(), + ], + formatter_class=VcspullHelpFormatter, + description=( + "Import repositories from GitHub (github.com or GitHub Enterprise)." + ), + ) + parser.add_argument( + "--url", + dest="base_url", + metavar="URL", + help="Base URL for GitHub Enterprise (optional)", + ) + parser.set_defaults(import_handler=handle_github) + + +def handle_github(args: argparse.Namespace) -> int: + """Handle ``vcspull import github``. + + Parameters + ---------- + args : argparse.Namespace + Parsed CLI arguments. + + Returns + ------- + int + Exit code (0 = success). + """ + if args.workspace is None: + msg = "-w/--workspace is required" + raise SystemExit(msg) + + importer = GitHubImporter( + token=getattr(args, "token", None), + base_url=getattr(args, "base_url", None), + ) + return _run_import( + importer, + service_name="github", + target=args.target, + workspace=args.workspace, + mode=args.mode, + language=getattr(args, "language", None), + topics=getattr(args, "topics", None), + min_stars=getattr(args, "min_stars", 0), + include_archived=getattr(args, "include_archived", False), + include_forks=getattr(args, "include_forks", False), + limit=getattr(args, "limit", 100), + config_path_str=getattr(args, "config", None), + dry_run=getattr(args, "dry_run", False), + yes=getattr(args, "yes", False), + output_json=getattr(args, "output_json", False), + output_ndjson=getattr(args, "output_ndjson", False), + color=getattr(args, "color", "auto"), + use_https=getattr(args, "use_https", False), + ) diff --git a/src/vcspull/cli/import_cmd/gitlab.py b/src/vcspull/cli/import_cmd/gitlab.py new file mode 100644 index 000000000..fce1c610f --- /dev/null +++ b/src/vcspull/cli/import_cmd/gitlab.py @@ -0,0 +1,104 @@ +"""``vcspull import gitlab`` subcommand.""" + +from __future__ import annotations + +import argparse +import logging + +from vcspull._internal.remotes import GitLabImporter + +from .._formatter import VcspullHelpFormatter +from ._common import ( + _create_mode_parent, + _create_shared_parent, + _create_target_parent, + _create_token_parent, + _run_import, +) + +log = logging.getLogger(__name__) + + +def create_gitlab_subparser( + subparsers: argparse._SubParsersAction[argparse.ArgumentParser], +) -> None: + """Register the ``gitlab`` (alias ``gl``) service subcommand. + + Parameters + ---------- + subparsers : argparse._SubParsersAction + The subparsers action from the ``import`` parser. + """ + parser = subparsers.add_parser( + "gitlab", + aliases=["gl"], + help="import from GitLab", + parents=[ + _create_shared_parent(), + _create_token_parent(), + _create_mode_parent(), + _create_target_parent(), + ], + formatter_class=VcspullHelpFormatter, + description="Import repositories from GitLab (gitlab.com or self-hosted).", + ) + parser.add_argument( + "--url", + dest="base_url", + metavar="URL", + help="Base URL for self-hosted GitLab (optional)", + ) + parser.add_argument( + "--flatten-groups", + action="store_true", + dest="flatten_groups", + help=( + "For ``--mode org``, flatten subgroup repositories into the base " + "workspace instead of preserving subgroup paths" + ), + ) + parser.set_defaults(import_handler=handle_gitlab) + + +def handle_gitlab(args: argparse.Namespace) -> int: + """Handle ``vcspull import gitlab``. + + Parameters + ---------- + args : argparse.Namespace + Parsed CLI arguments. + + Returns + ------- + int + Exit code (0 = success). + """ + if args.workspace is None: + msg = "-w/--workspace is required" + raise SystemExit(msg) + + importer = GitLabImporter( + token=getattr(args, "token", None), + base_url=getattr(args, "base_url", None), + ) + return _run_import( + importer, + service_name="gitlab", + target=args.target, + workspace=args.workspace, + mode=args.mode, + language=getattr(args, "language", None), + topics=getattr(args, "topics", None), + min_stars=getattr(args, "min_stars", 0), + include_archived=getattr(args, "include_archived", False), + include_forks=getattr(args, "include_forks", False), + limit=getattr(args, "limit", 100), + config_path_str=getattr(args, "config", None), + dry_run=getattr(args, "dry_run", False), + yes=getattr(args, "yes", False), + output_json=getattr(args, "output_json", False), + output_ndjson=getattr(args, "output_ndjson", False), + color=getattr(args, "color", "auto"), + use_https=getattr(args, "use_https", False), + flatten_groups=getattr(args, "flatten_groups", False), + ) From 4ab197d205aaf5e9fed6917dc103a096fe958980 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 18:49:25 -0600 Subject: [PATCH 080/109] refactor(cli/import[dispatch]): Wire import_cmd subparsers into CLI dispatch why: Switch the import command from monolithic import_repos to the new per-service subparser architecture. what: - Change import path from .import_repos to .import_cmd - Replace 25-line import_repos() call with handler dispatch via set_defaults - Simplify IMPORT_DESCRIPTION to overview listing available services --- src/vcspull/cli/__init__.py | 52 +++++++++++-------------------------- 1 file changed, 15 insertions(+), 37 deletions(-) diff --git a/src/vcspull/cli/__init__.py b/src/vcspull/cli/__init__.py index 0c5cf0e96..7fb589173 100644 --- a/src/vcspull/cli/__init__.py +++ b/src/vcspull/cli/__init__.py @@ -17,7 +17,7 @@ from .add import add_repo, create_add_subparser, handle_add_command from .discover import create_discover_subparser, discover_repos from .fmt import create_fmt_subparser, format_config_file -from .import_repos import create_import_subparser, import_repos +from .import_cmd import create_import_subparser from .list import create_list_subparser, list_repos from .search import create_search_subparser, search_repos from .status import create_status_subparser, status_repos @@ -247,22 +247,23 @@ def build_description( """ Import repositories from remote services. - Fetches repository lists from GitHub, GitLab, Codeberg/Gitea/Forgejo, - or AWS CodeCommit and adds them to the vcspull configuration. + Fetches repository lists from a remote hosting service and adds them to + the vcspull configuration. Choose a service subcommand for details: - For GitLab, you can specify subgroups using slash notation (e.g., parent/child). - In org mode, subgroup paths are preserved under the workspace root by - default; use ``--flatten-groups`` to collapse them into a single workspace. + github (gh) GitHub or GitHub Enterprise + gitlab (gl) GitLab (gitlab.com or self-hosted) + codeberg (cb) Codeberg + gitea Self-hosted Gitea instance + forgejo Self-hosted Forgejo instance + codecommit (cc) AWS CodeCommit """, ( ( None, [ - "vcspull import github torvalds -w ~/repos/linux --mode user", - "vcspull import github django -w ~/study/python --mode org", - "vcspull import gitlab gitlab-org/ci-cd -w ~/work --mode org", - "vcspull import gitlab myuser -w ~/work --url https://gitlab.company.com", - "vcspull import codeberg user -w ~/oss --dry-run", + "vcspull import github torvalds -w ~/repos/linux", + "vcspull import gh django -w ~/study/python --mode org", + "vcspull import gitlab mygroup -w ~/work --mode org", "vcspull import codecommit -w ~/work/aws --region us-east-1", ], ), @@ -500,33 +501,10 @@ def cli(_args: list[str] | None = None) -> None: merge_roots=args.merge_roots, ) elif args.subparser_name == "import": - # Show help if required arguments are missing - if args.service is None or args.workspace is None: + handler = getattr(args, "import_handler", None) + if handler is None: _import_parser.print_help() return - result = import_repos( - service=args.service, - target=args.target, - workspace=args.workspace, - mode=args.mode, - base_url=getattr(args, "base_url", None), - token=getattr(args, "token", None), - region=getattr(args, "region", None), - profile=getattr(args, "profile", None), - language=getattr(args, "language", None), - topics=getattr(args, "topics", None), - min_stars=getattr(args, "min_stars", 0), - include_archived=getattr(args, "include_archived", False), - include_forks=getattr(args, "include_forks", False), - limit=getattr(args, "limit", 100), - config_path_str=getattr(args, "config", None), - dry_run=getattr(args, "dry_run", False), - yes=getattr(args, "yes", False), - output_json=getattr(args, "output_json", False), - output_ndjson=getattr(args, "output_ndjson", False), - color=getattr(args, "color", "auto"), - use_https=getattr(args, "use_https", False), - flatten_groups=getattr(args, "flatten_groups", False), - ) + result = handler(args) if result: raise SystemExit(result) From f09ea85b9a27f821f67b815c427c13af8d9891f2 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 18:49:29 -0600 Subject: [PATCH 081/109] refactor(cli/import[cleanup]): Remove old import_repos.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: All logic has been distributed to import_cmd/ package modules. what: - Delete import_repos.py (709 lines) — replaced by import_cmd/_common.py and per-service modules --- src/vcspull/cli/import_repos.py | 709 -------------------------------- 1 file changed, 709 deletions(-) delete mode 100644 src/vcspull/cli/import_repos.py diff --git a/src/vcspull/cli/import_repos.py b/src/vcspull/cli/import_repos.py deleted file mode 100644 index 89ac0424d..000000000 --- a/src/vcspull/cli/import_repos.py +++ /dev/null @@ -1,709 +0,0 @@ -"""Import repositories from remote services for vcspull. - -Supports GitHub, GitLab, Codeberg, Gitea, Forgejo, and AWS CodeCommit. -Authentication is via environment variables (``GITHUB_TOKEN``, ``GITLAB_TOKEN``, -etc.) or the ``--token`` CLI flag. See each importer class for required scopes. -""" - -from __future__ import annotations - -import argparse -import logging -import pathlib -import sys -import typing as t - -from vcspull._internal.private_path import PrivatePath -from vcspull._internal.remotes import ( - AuthenticationError, - CodeCommitImporter, - ConfigurationError, - DependencyError, - GiteaImporter, - GitHubImporter, - GitLabImporter, - ImportMode, - ImportOptions, - NotFoundError, - RateLimitError, - RemoteImportError, - RemoteRepo, - ServiceUnavailableError, -) -from vcspull.config import ( - find_home_config_files, - save_config_yaml, - workspace_root_label, -) -from vcspull.exc import MultipleConfigWarning - -from ._colors import Colors, get_color_mode -from ._output import OutputFormatter, get_output_mode - -log = logging.getLogger(__name__) - -SERVICE_ALIASES: dict[str, str] = { - "github": "github", - "gh": "github", - "gitlab": "gitlab", - "gl": "gitlab", - "codeberg": "codeberg", - "cb": "codeberg", - "gitea": "gitea", - "forgejo": "forgejo", - "codecommit": "codecommit", - "cc": "codecommit", - "aws": "codecommit", -} - - -def create_import_subparser(parser: argparse.ArgumentParser) -> None: - """Create ``vcspull import`` argument subparser. - - Parameters - ---------- - parser : argparse.ArgumentParser - The parser to configure - """ - parser.add_argument( - "service", - metavar="SERVICE", - nargs="?", - default=None, - help="Remote service: github, gitlab, codeberg, gitea, forgejo, codecommit", - ) - parser.add_argument( - "target", - metavar="TARGET", - nargs="?", - default="", - help=( - "User, org name, or search query (optional for codecommit). " - "For GitLab, supports subgroups with slash notation (e.g., parent/child)." - ), - ) - parser.add_argument( - "-w", - "--workspace", - dest="workspace", - metavar="DIR", - default=None, - help="Workspace root directory (REQUIRED)", - ) - parser.add_argument( - "-m", - "--mode", - dest="mode", - choices=["user", "org", "search"], - default="user", - help="Import mode: user (default), org, or search", - ) - parser.add_argument( - "--url", - dest="base_url", - metavar="URL", - help="Base URL for self-hosted instances (required for gitea/forgejo)", - ) - parser.add_argument( - "--token", - dest="token", - metavar="TOKEN", - help="API token (overrides env var; prefer env var for security)", - ) - parser.add_argument( - "--region", - dest="region", - metavar="REGION", - help="AWS region for CodeCommit", - ) - parser.add_argument( - "--profile", - dest="profile", - metavar="PROFILE", - help="AWS profile for CodeCommit", - ) - - # Filtering options - filter_group = parser.add_argument_group("filtering") - filter_group.add_argument( - "-l", - "--language", - dest="language", - metavar="LANG", - help="Filter by programming language", - ) - filter_group.add_argument( - "--topics", - dest="topics", - metavar="TOPICS", - help="Filter by topics (comma-separated)", - ) - filter_group.add_argument( - "--min-stars", - dest="min_stars", - type=int, - default=0, - metavar="N", - help="Minimum stars (for search mode)", - ) - filter_group.add_argument( - "--archived", - dest="include_archived", - action="store_true", - help="Include archived repositories", - ) - filter_group.add_argument( - "--forks", - dest="include_forks", - action="store_true", - help="Include forked repositories", - ) - filter_group.add_argument( - "--limit", - dest="limit", - type=int, - default=100, - metavar="N", - help="Maximum repositories to fetch (default: 100)", - ) - - # Output options - output_group = parser.add_argument_group("output") - output_group.add_argument( - "-f", - "--file", - dest="config", - metavar="FILE", - help="Config file to write to (default: ~/.vcspull.yaml)", - ) - output_group.add_argument( - "--dry-run", - "-n", - action="store_true", - help="Preview without writing to config file", - ) - output_group.add_argument( - "--yes", - "-y", - action="store_true", - help="Skip confirmation prompt", - ) - output_group.add_argument( - "--json", - action="store_true", - dest="output_json", - help="Output as JSON", - ) - output_group.add_argument( - "--ndjson", - action="store_true", - dest="output_ndjson", - help="Output as NDJSON (one JSON per line)", - ) - output_group.add_argument( - "--https", - action="store_true", - dest="use_https", - help="Use HTTPS clone URLs instead of SSH (default: SSH)", - ) - output_group.add_argument( - "--flatten-groups", - action="store_true", - dest="flatten_groups", - help=( - "For GitLab --mode org, flatten subgroup repositories into the base " - "workspace instead of preserving subgroup paths" - ), - ) - output_group.add_argument( - "--color", - choices=["auto", "always", "never"], - default="auto", - help="When to use colors (default: auto)", - ) - - -def _get_importer( - service: str, - *, - token: str | None, - base_url: str | None, - region: str | None, - profile: str | None, -) -> GitHubImporter | GitLabImporter | GiteaImporter | CodeCommitImporter: - """Create the appropriate importer for the service. - - Parameters - ---------- - service : str - Service name - token : str | None - API token - base_url : str | None - Base URL for self-hosted instances - region : str | None - AWS region (for CodeCommit) - profile : str | None - AWS profile (for CodeCommit) - - Returns - ------- - Importer instance - - Raises - ------ - ValueError - When service is unknown or missing required arguments - """ - normalized = SERVICE_ALIASES.get(service.lower()) - if normalized is None: - msg = f"Unknown service: {service}" - raise ValueError(msg) - - if normalized == "github": - return GitHubImporter(token=token, base_url=base_url) - - if normalized == "gitlab": - return GitLabImporter(token=token, base_url=base_url) - - if normalized == "codeberg": - return GiteaImporter(token=token, base_url=base_url or "https://codeberg.org") - - if normalized in ("gitea", "forgejo"): - if not base_url: - msg = f"--url is required for {normalized}" - raise ValueError(msg) - return GiteaImporter(token=token, base_url=base_url) - - if normalized == "codecommit": - return CodeCommitImporter(region=region, profile=profile) - - msg = f"Unknown service: {service}" - raise ValueError(msg) - - -def _resolve_config_file(config_path_str: str | None) -> pathlib.Path: - """Resolve the config file path. - - Parameters - ---------- - config_path_str : str | None - Config file path from user, or None for default - - Returns - ------- - pathlib.Path - Resolved config file path - """ - if config_path_str: - path = pathlib.Path(config_path_str).expanduser().resolve() - if path.suffix.lower() not in {".yaml", ".yml"}: - msg = f"Only YAML config files are supported, got: {path.suffix}" - raise ValueError(msg) - return path - - home_configs = find_home_config_files(filetype=["yaml"]) - if home_configs: - return home_configs[0] - - return pathlib.Path.home() / ".vcspull.yaml" - - -def import_repos( - service: str, - target: str, - workspace: str, - mode: str, - base_url: str | None, - token: str | None, - region: str | None, - profile: str | None, - language: str | None, - topics: str | None, - min_stars: int, - include_archived: bool, - include_forks: bool, - limit: int, - config_path_str: str | None, - dry_run: bool, - yes: bool, - output_json: bool, - output_ndjson: bool, - color: str, - use_https: bool = False, - flatten_groups: bool = False, -) -> int: - """Import repositories from a remote service. - - Parameters - ---------- - service : str - Remote service name - target : str - User, org, or search query - workspace : str - Workspace root directory - mode : str - Import mode (user, org, search) - base_url : str | None - Base URL for self-hosted instances - token : str | None - API token - region : str | None - AWS region (for CodeCommit) - profile : str | None - AWS profile (for CodeCommit) - language : str | None - Language filter - topics : str | None - Topics filter (comma-separated) - min_stars : int - Minimum stars filter - include_archived : bool - Include archived repositories - include_forks : bool - Include forked repositories - limit : int - Maximum repositories to fetch - config_path_str : str | None - Config file path - dry_run : bool - Preview without writing - yes : bool - Skip confirmation - output_json : bool - Output as JSON - output_ndjson : bool - Output as NDJSON - color : str - Color mode - use_https : bool - Use HTTPS clone URLs instead of SSH (default: False, i.e., SSH) - flatten_groups : bool - For GitLab org imports, flatten subgroup paths into base workspace - - Returns - ------- - int - 0 on success, 1 on error - """ - output_mode = get_output_mode(output_json, output_ndjson) - formatter = OutputFormatter(output_mode) - colors = Colors(get_color_mode(color)) - - # Validate service and create importer - try: - importer = _get_importer( - service, - token=token, - base_url=base_url, - region=region, - profile=profile, - ) - except ValueError as exc: - log.error("%s %s", colors.error("✗"), exc) # noqa: TRY400 - return 1 - except DependencyError as exc: - log.error("%s %s", colors.error("✗"), exc) # noqa: TRY400 - return 1 - - # Validate target for non-CodeCommit services - normalized_service = SERVICE_ALIASES.get(service.lower(), service.lower()) - if normalized_service != "codecommit" and not target: - log.error( - "%s TARGET is required for %s", - colors.error("✗"), - service, - ) - return 1 - - # Build import options - import_mode = ImportMode(mode) - topic_list = ( - [topic.strip() for topic in topics.split(",") if topic.strip()] - if topics - else [] - ) - - try: - options = ImportOptions( - mode=import_mode, - target=target, - base_url=base_url, - token=token, - include_forks=include_forks, - include_archived=include_archived, - language=language, - topics=topic_list, - min_stars=min_stars, - limit=limit, - ) - except ValueError as exc_: - log.error("%s %s", colors.error("✗"), exc_) # noqa: TRY400 - return 1 - - # Warn if --language is used with services that don't return language info - if options.language and normalized_service in ("gitlab", "codecommit"): - log.warning( - "%s %s does not return language metadata; " - "--language filter may exclude all results", - colors.warning("!"), - importer.service_name, - ) - if options.topics and normalized_service == "codecommit": - log.warning( - "%s %s does not support topic filtering; " - "--topics filter may exclude all results", - colors.warning("!"), - importer.service_name, - ) - if options.min_stars > 0 and normalized_service == "codecommit": - log.warning( - "%s %s does not track star counts; " - "--min-stars filter may exclude all results", - colors.warning("!"), - importer.service_name, - ) - - # Resolve workspace path - workspace_path = pathlib.Path(workspace).expanduser().resolve() - cwd = pathlib.Path.cwd() - home = pathlib.Path.home() - - # Resolve config file - try: - config_file_path = _resolve_config_file(config_path_str) - except (ValueError, MultipleConfigWarning) as exc_: - log.error("%s %s", colors.error("✗"), exc_) # noqa: TRY400 - return 1 - display_config_path = str(PrivatePath(config_file_path)) - - # Fetch repositories - if output_mode.value == "human": - log.info( - "%s Fetching repositories from %s...", - colors.info("→"), - colors.highlight(importer.service_name), - ) - - repos: list[RemoteRepo] = [] - try: - for repo in importer.fetch_repos(options): - repos.append(repo) - - # Emit for JSON/NDJSON output - formatter.emit(repo.to_dict()) - - # Log progress for human output - if output_mode.value == "human" and len(repos) % 10 == 0: - log.info( - "%s Fetched %s repositories...", - colors.muted("•"), - colors.info(str(len(repos))), - ) - - except AuthenticationError as exc: - log.error( # noqa: TRY400 - "%s Authentication error: %s", colors.error("✗"), exc - ) - formatter.finalize() - return 1 - except RateLimitError as exc: - log.error( # noqa: TRY400 - "%s Rate limit exceeded: %s", colors.error("✗"), exc - ) - formatter.finalize() - return 1 - except NotFoundError as exc: - log.error("%s Not found: %s", colors.error("✗"), exc) # noqa: TRY400 - formatter.finalize() - return 1 - except ServiceUnavailableError as exc: - log.error( # noqa: TRY400 - "%s Service unavailable: %s", colors.error("✗"), exc - ) - formatter.finalize() - return 1 - except ConfigurationError as exc: - log.error( # noqa: TRY400 - "%s Configuration error: %s", colors.error("✗"), exc - ) - formatter.finalize() - return 1 - except RemoteImportError as exc: - log.error("%s Error: %s", colors.error("✗"), exc) # noqa: TRY400 - formatter.finalize() - return 1 - - if not repos: - if output_mode.value == "human": - log.info( - "%s No repositories found matching criteria.", - colors.warning("!"), - ) - formatter.finalize() - return 0 - - if output_mode.value == "human": - log.info( - "\n%s Found %s repositories", - colors.success("✓"), - colors.info(str(len(repos))), - ) - - # Show preview in human mode - if output_mode.value == "human": - for repo in repos[:10]: # Show first 10 - stars_str = f" ★{repo.stars}" if repo.stars > 0 else "" - lang_str = f" [{repo.language}]" if repo.language else "" - log.info( - " %s %s%s%s", - colors.success("+"), - colors.info(repo.name), - colors.muted(lang_str), - colors.muted(stars_str), - ) - if len(repos) > 10: - log.info( - " %s and %s more", - colors.muted("..."), - colors.info(str(len(repos) - 10)), - ) - - formatter.finalize() - - # Handle dry-run - if dry_run: - log.info( - "\n%s Dry run complete. Would write to %s", - colors.warning("→"), - colors.muted(display_config_path), - ) - return 0 - - # Confirm with user - if not yes and output_mode.value == "human": - if not sys.stdin.isatty(): - log.info( - "%s Non-interactive mode: use --yes to skip confirmation.", - colors.error("✗"), - ) - return 0 - try: - confirm = input( - f"\n{colors.info('Import')} {len(repos)} repositories to " - f"{display_config_path}? [y/N]: ", - ).lower() - except EOFError: - confirm = "" - if confirm not in {"y", "yes"}: - log.info("%s Aborted by user.", colors.error("✗")) - return 0 - - # Load existing config or create new - raw_config: dict[str, t.Any] - if config_file_path.exists(): - import yaml - - try: - with config_file_path.open() as f: - raw_config = yaml.safe_load(f) or {} - except (yaml.YAMLError, OSError): - log.exception("Error loading config file") - return 1 - - if not isinstance(raw_config, dict): - log.error( - "%s Config file is not a valid YAML mapping: %s", - colors.error("✗"), - display_config_path, - ) - return 1 - else: - raw_config = {} - - # Add repositories to config - checked_labels: set[str] = set() - added_count = 0 - skipped_count = 0 - - for repo in repos: - # Determine workspace for this repo - repo_workspace_path = workspace_path - - preserve_group_structure = ( - normalized_service == "gitlab" - and options.mode == ImportMode.ORG - and not flatten_groups - ) - if preserve_group_structure and repo.owner.startswith(options.target): - # Check if it is a subdirectory - if repo.owner == options.target: - subpath = "" - elif repo.owner.startswith(options.target + "/"): - subpath = repo.owner[len(options.target) + 1 :] - else: - subpath = "" - - if subpath: - repo_workspace_path = workspace_path / subpath - - repo_workspace_label = workspace_root_label( - repo_workspace_path, cwd=cwd, home=home - ) - - if repo_workspace_label not in checked_labels: - if repo_workspace_label in raw_config and not isinstance( - raw_config[repo_workspace_label], dict - ): - log.error( - "%s Workspace section '%s' is not a mapping in config", - colors.error("✗"), - repo_workspace_label, - ) - checked_labels.add(repo_workspace_label) - - if repo_workspace_label in raw_config and not isinstance( - raw_config[repo_workspace_label], dict - ): - continue - - if repo_workspace_label not in raw_config: - raw_config[repo_workspace_label] = {} - - if repo.name in raw_config[repo_workspace_label]: - skipped_count += 1 - continue - - raw_config[repo_workspace_label][repo.name] = { - "repo": repo.to_vcspull_url(use_ssh=not use_https), - } - added_count += 1 - - if added_count == 0: - log.info( - "%s All repositories already exist in config. Nothing to add.", - colors.success("✓"), - ) - return 0 - - # Save config - try: - save_config_yaml(config_file_path, raw_config) - log.info( - "%s Added %s repositories to %s", - colors.success("✓"), - colors.info(str(added_count)), - colors.muted(display_config_path), - ) - if skipped_count > 0: - log.info( - "%s Skipped %s existing repositories", - colors.warning("!"), - colors.info(str(skipped_count)), - ) - except OSError: - log.exception("Error saving config to %s", display_config_path) - return 1 - - return 0 From e148ad2bd4d0be01ca6cc6b56e6a3a0cc18fc53f Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 18:49:35 -0600 Subject: [PATCH 082/109] style(cli/_formatter): Add import-command flags to colorization sets why: Ensure import-specific flags get proper syntax highlighting in --help. what: - OPTIONS_EXPECTING_VALUE: add --region, --profile, --token, -m, --mode, -l, --language, --topics, --min-stars, --limit - OPTIONS_FLAG_ONLY: add --flatten-groups, --archived, --forks, --https --- src/vcspull/cli/_formatter.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/vcspull/cli/_formatter.py b/src/vcspull/cli/_formatter.py index 8cbf01828..4f96fc31c 100644 --- a/src/vcspull/cli/_formatter.py +++ b/src/vcspull/cli/_formatter.py @@ -19,6 +19,16 @@ "--max-concurrent", "--name", "--url", + "--region", + "--profile", + "--token", + "-m", + "--mode", + "-l", + "--language", + "--topics", + "--min-stars", + "--limit", } OPTIONS_FLAG_ONLY = { @@ -62,6 +72,10 @@ "--sequential", "--no-merge", "--verbose", + "--flatten-groups", + "--archived", + "--forks", + "--https", } From f0206155bf158079ada09635ae44568fda3db5e1 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 18:49:44 -0600 Subject: [PATCH 083/109] test(cli/import[subparsers]): Update tests for per-service subparser architecture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: Tests must target the new import_cmd module paths and _run_import API. what: - Rewrite imports: import_repos → import_cmd._common - Call _run_import() directly with MockImporter instead of import_repos() - Remove tests for deleted _get_importer and SERVICE_ALIASES - Add alias parsing tests (gh, gl, cb, cc, aws) - Add service-specific flag isolation tests (--flatten-groups gitlab-only, --region codecommit-only, --url required for gitea/forgejo) - Update test_log.py logger name expectations for import_cmd subpackage --- tests/cli/test_import_repos.py | 1227 +++++++++----------------------- tests/test_log.py | 9 +- 2 files changed, 340 insertions(+), 896 deletions(-) diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index c659996d3..5572ba83f 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -5,7 +5,6 @@ import json import logging import pathlib -import subprocess import sys import typing as t @@ -20,16 +19,14 @@ RemoteRepo, ServiceUnavailableError, ) -from vcspull.cli.import_repos import ( - SERVICE_ALIASES, - _get_importer, +from vcspull.cli.import_cmd._common import ( _resolve_config_file, - import_repos, + _run_import, ) from vcspull.config import save_config_yaml, workspace_root_label -# Get the actual module (not the function from __init__.py) -import_repos_mod = sys.modules["vcspull.cli.import_repos"] +# Get the actual _common module for monkeypatching +import_common_mod = sys.modules["vcspull.cli.import_cmd._common"] if t.TYPE_CHECKING: from _pytest.monkeypatch import MonkeyPatch @@ -58,265 +55,28 @@ def _make_repo( ) -class GetImporterFixture(t.NamedTuple): - """Fixture for _get_importer test cases.""" - - test_id: str - service: str - token: str | None - base_url: str | None - region: str | None - profile: str | None - expected_type_name: str - expected_error: str | None - - -GET_IMPORTER_FIXTURES: list[GetImporterFixture] = [ - GetImporterFixture( - test_id="github-direct", - service="github", - token=None, - base_url=None, - region=None, - profile=None, - expected_type_name="GitHubImporter", - expected_error=None, - ), - GetImporterFixture( - test_id="github-alias-gh", - service="gh", - token=None, - base_url=None, - region=None, - profile=None, - expected_type_name="GitHubImporter", - expected_error=None, - ), - GetImporterFixture( - test_id="gitlab-direct", - service="gitlab", - token="test-token", - base_url=None, - region=None, - profile=None, - expected_type_name="GitLabImporter", - expected_error=None, - ), - GetImporterFixture( - test_id="gitlab-alias-gl", - service="gl", - token=None, - base_url=None, - region=None, - profile=None, - expected_type_name="GitLabImporter", - expected_error=None, - ), - GetImporterFixture( - test_id="codeberg-direct", - service="codeberg", - token=None, - base_url=None, - region=None, - profile=None, - expected_type_name="GiteaImporter", - expected_error=None, - ), - GetImporterFixture( - test_id="codeberg-alias-cb", - service="cb", - token=None, - base_url=None, - region=None, - profile=None, - expected_type_name="GiteaImporter", - expected_error=None, - ), - GetImporterFixture( - test_id="codeberg-custom-url", - service="codeberg", - token=None, - base_url="https://my-codeberg-mirror.example.com", - region=None, - profile=None, - expected_type_name="GiteaImporter", - expected_error=None, - ), - GetImporterFixture( - test_id="gitea-with-url", - service="gitea", - token=None, - base_url="https://gitea.example.com", - region=None, - profile=None, - expected_type_name="GiteaImporter", - expected_error=None, - ), - GetImporterFixture( - test_id="gitea-without-url-fails", - service="gitea", - token=None, - base_url=None, - region=None, - profile=None, - expected_type_name="", - expected_error="--url is required for gitea", - ), - GetImporterFixture( - test_id="forgejo-with-url", - service="forgejo", - token=None, - base_url="https://forgejo.example.com", - region=None, - profile=None, - expected_type_name="GiteaImporter", - expected_error=None, - ), - GetImporterFixture( - test_id="forgejo-without-url-fails", - service="forgejo", - token=None, - base_url=None, - region=None, - profile=None, - expected_type_name="", - expected_error="--url is required for forgejo", - ), - GetImporterFixture( - test_id="codecommit-direct", - service="codecommit", - token=None, - base_url=None, - region="us-east-1", - profile=None, - expected_type_name="CodeCommitImporter", - expected_error=None, - ), - GetImporterFixture( - test_id="codecommit-alias-cc", - service="cc", - token=None, - base_url=None, - region=None, - profile="myprofile", - expected_type_name="CodeCommitImporter", - expected_error=None, - ), - GetImporterFixture( - test_id="codecommit-alias-aws", - service="aws", - token=None, - base_url=None, - region=None, - profile=None, - expected_type_name="CodeCommitImporter", - expected_error=None, - ), - GetImporterFixture( - test_id="unknown-service-fails", - service="unknown", - token=None, - base_url=None, - region=None, - profile=None, - expected_type_name="", - expected_error="Unknown service: unknown", - ), -] - - -@pytest.mark.parametrize( - list(GetImporterFixture._fields), - GET_IMPORTER_FIXTURES, - ids=[f.test_id for f in GET_IMPORTER_FIXTURES], -) -def test_get_importer( - test_id: str, - service: str, - token: str | None, - base_url: str | None, - region: str | None, - profile: str | None, - expected_type_name: str, - expected_error: str | None, - monkeypatch: MonkeyPatch, -) -> None: - """Test _get_importer creates the correct importer type.""" - # Mock subprocess.run for CodeCommit tests (aws --version check) - if service in ("codecommit", "cc", "aws"): - monkeypatch.setattr( - "subprocess.run", - lambda cmd, **kwargs: subprocess.CompletedProcess( - cmd, 0, stdout="aws-cli/2.x", stderr="" - ), - ) - - if expected_error: - with pytest.raises(ValueError, match=expected_error): - _get_importer( - service, - token=token, - base_url=base_url, - region=region, - profile=profile, - ) - else: - importer = _get_importer( - service, - token=token, - base_url=base_url, - region=region, - profile=profile, - ) - assert type(importer).__name__ == expected_type_name +class MockImporter: + """Reusable mock importer for tests.""" + def __init__( + self, + *, + service_name: str = "MockService", + repos: list[RemoteRepo] | None = None, + error: Exception | None = None, + ) -> None: + self.service_name = service_name + self._repos = repos or [] + self._error = error -def test_codeberg_custom_url_used() -> None: - """Test that Codeberg importer uses custom base_url when provided.""" - from vcspull._internal.remotes.gitea import GiteaImporter - - importer = _get_importer( - "codeberg", - token=None, - base_url="https://my-codeberg.example.com", - region=None, - profile=None, - ) - assert isinstance(importer, GiteaImporter) - assert importer._client.base_url == "https://my-codeberg.example.com/api/v1" - - -def test_codeberg_default_url_used() -> None: - """Test that Codeberg importer uses default URL when no base_url.""" - from vcspull._internal.remotes.gitea import GiteaImporter - - importer = _get_importer( - "codeberg", - token=None, - base_url=None, - region=None, - profile=None, - ) - assert isinstance(importer, GiteaImporter) - assert importer._client.base_url == "https://codeberg.org/api/v1" - - -def test_service_aliases_coverage() -> None: - """Test that SERVICE_ALIASES covers expected services.""" - expected_aliases = { - "github", - "gh", - "gitlab", - "gl", - "codeberg", - "cb", - "gitea", - "forgejo", - "codecommit", - "cc", - "aws", - } - assert set(SERVICE_ALIASES.keys()) == expected_aliases + def fetch_repos( + self, + options: ImportOptions, + ) -> t.Iterator[RemoteRepo]: + """Yield mock repos or raise a mock error.""" + if self._error: + raise self._error + yield from self._repos class ResolveConfigFixture(t.NamedTuple): @@ -388,7 +148,7 @@ def test_resolve_config_file( # Mock find_home_config_files: return pre-created config file paths # instead of scanning the real home directory monkeypatch.setattr( - import_repos_mod, + import_common_mod, "find_home_config_files", lambda filetype=None: full_paths, ) @@ -398,13 +158,12 @@ def test_resolve_config_file( class ImportReposFixture(t.NamedTuple): - """Fixture for import_repos test cases.""" + """Fixture for _run_import test cases.""" test_id: str - service: str + service_name: str target: str mode: str - base_url: str | None dry_run: bool yes: bool output_json: bool @@ -417,10 +176,9 @@ class ImportReposFixture(t.NamedTuple): IMPORT_REPOS_FIXTURES: list[ImportReposFixture] = [ ImportReposFixture( test_id="basic-github-user-dry-run", - service="github", + service_name="github", target="testuser", mode="user", - base_url=None, dry_run=True, yes=True, output_json=False, @@ -431,10 +189,9 @@ class ImportReposFixture(t.NamedTuple): ), ImportReposFixture( test_id="github-user-writes-config", - service="github", + service_name="github", target="testuser", mode="user", - base_url=None, dry_run=False, yes=True, output_json=False, @@ -445,10 +202,9 @@ class ImportReposFixture(t.NamedTuple): ), ImportReposFixture( test_id="no-repos-found", - service="github", + service_name="github", target="emptyuser", mode="user", - base_url=None, dry_run=False, yes=True, output_json=False, @@ -459,10 +215,9 @@ class ImportReposFixture(t.NamedTuple): ), ImportReposFixture( test_id="authentication-error", - service="github", + service_name="github", target="testuser", mode="user", - base_url=None, dry_run=False, yes=True, output_json=False, @@ -473,10 +228,9 @@ class ImportReposFixture(t.NamedTuple): ), ImportReposFixture( test_id="rate-limit-error", - service="github", + service_name="github", target="testuser", mode="user", - base_url=None, dry_run=False, yes=True, output_json=False, @@ -487,10 +241,9 @@ class ImportReposFixture(t.NamedTuple): ), ImportReposFixture( test_id="not-found-error", - service="github", + service_name="github", target="nosuchuser", mode="user", - base_url=None, dry_run=False, yes=True, output_json=False, @@ -501,10 +254,9 @@ class ImportReposFixture(t.NamedTuple): ), ImportReposFixture( test_id="service-unavailable-error", - service="github", + service_name="github", target="testuser", mode="user", - base_url=None, dry_run=False, yes=True, output_json=False, @@ -515,10 +267,9 @@ class ImportReposFixture(t.NamedTuple): ), ImportReposFixture( test_id="configuration-error", - service="codecommit", + service_name="codecommit", target="", mode="user", - base_url=None, dry_run=False, yes=True, output_json=False, @@ -529,10 +280,9 @@ class ImportReposFixture(t.NamedTuple): ), ImportReposFixture( test_id="gitlab-org-mode", - service="gitlab", + service_name="gitlab", target="testgroup", mode="org", - base_url=None, dry_run=True, yes=True, output_json=False, @@ -543,10 +293,9 @@ class ImportReposFixture(t.NamedTuple): ), ImportReposFixture( test_id="codeberg-search-mode", - service="codeberg", + service_name="codeberg", target="python cli", mode="search", - base_url=None, dry_run=True, yes=True, output_json=False, @@ -565,10 +314,9 @@ class ImportReposFixture(t.NamedTuple): ) def test_import_repos( test_id: str, - service: str, + service_name: str, target: str, mode: str, - base_url: str | None, dry_run: bool, yes: bool, output_json: bool, @@ -580,7 +328,7 @@ def test_import_repos( monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test import_repos with various scenarios.""" + """Test _run_import with various scenarios.""" caplog.set_level(logging.INFO) monkeypatch.setenv("HOME", str(tmp_path)) @@ -590,34 +338,14 @@ def test_import_repos( workspace.mkdir() config_file = tmp_path / ".vcspull.yaml" - # Mock the importer - class MockImporter: - service_name = "MockService" + importer = MockImporter(repos=mock_repos, error=mock_error) - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - if mock_error: - raise mock_error - yield from mock_repos - - # Mock _get_importer: return MockImporter to avoid real API/network calls - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) - - import_repos( - service=service, + _run_import( + importer, + service_name=service_name, target=target, workspace=str(workspace), mode=mode, - base_url=base_url, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -648,88 +376,12 @@ def fetch_repos( assert total_repos == expected_config_repos -def test_import_repos_missing_target( - tmp_path: pathlib.Path, - monkeypatch: MonkeyPatch, - caplog: pytest.LogCaptureFixture, -) -> None: - """Test import_repos fails when target is missing for non-codecommit.""" - caplog.set_level(logging.ERROR) - - monkeypatch.setenv("HOME", str(tmp_path)) - workspace = tmp_path / "repos" - workspace.mkdir() - - import_repos( - service="github", - target="", # Empty target - workspace=str(workspace), - mode="user", - base_url=None, - token=None, - region=None, - profile=None, - language=None, - topics=None, - min_stars=0, - include_archived=False, - include_forks=False, - limit=100, - config_path_str=str(tmp_path / "config.yaml"), - dry_run=False, - yes=True, - output_json=False, - output_ndjson=False, - color="never", - ) - - assert "TARGET is required" in caplog.text - - -def test_import_repos_unknown_service( - tmp_path: pathlib.Path, - monkeypatch: MonkeyPatch, - caplog: pytest.LogCaptureFixture, -) -> None: - """Test import_repos fails for unknown service.""" - caplog.set_level(logging.ERROR) - - monkeypatch.setenv("HOME", str(tmp_path)) - workspace = tmp_path / "repos" - workspace.mkdir() - - import_repos( - service="unknownservice", - target="testuser", - workspace=str(workspace), - mode="user", - base_url=None, - token=None, - region=None, - profile=None, - language=None, - topics=None, - min_stars=0, - include_archived=False, - include_forks=False, - limit=100, - config_path_str=str(tmp_path / "config.yaml"), - dry_run=False, - yes=True, - output_json=False, - output_ndjson=False, - color="never", - ) - - assert "Unknown service" in caplog.text - - def test_import_repos_user_abort( tmp_path: pathlib.Path, monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test import_repos aborts when user declines confirmation.""" + """Test _run_import aborts when user declines confirmation.""" caplog.set_level(logging.INFO) monkeypatch.setenv("HOME", str(tmp_path)) @@ -744,32 +396,14 @@ def test_import_repos_user_abort( "sys.stdin", type("FakeTTY", (), {"isatty": lambda self: True})() ) - # Mock the importer - class MockImporter: - service_name = "MockService" + importer = MockImporter(repos=[_make_repo("repo1")]) - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - yield _make_repo("repo1") - - # Mock _get_importer: return MockImporter to avoid real API calls - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) - - import_repos( - service="github", + _run_import( + importer, + service_name="github", target="testuser", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -793,7 +427,7 @@ def test_import_repos_eoferror_aborts( monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test import_repos aborts gracefully on EOFError from input().""" + """Test _run_import aborts gracefully on EOFError from input().""" caplog.set_level(logging.INFO) monkeypatch.setenv("HOME", str(tmp_path)) @@ -812,31 +446,14 @@ def raise_eof(_: str) -> str: "sys.stdin", type("FakeTTY", (), {"isatty": lambda self: True})() ) - class MockImporter: - service_name = "MockService" + importer = MockImporter(repos=[_make_repo("repo1")]) - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - yield _make_repo("repo1") - - # Mock _get_importer: return MockImporter to avoid real API calls - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) - - import_repos( - service="github", + _run_import( + importer, + service_name="github", target="testuser", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -860,7 +477,7 @@ def test_import_repos_non_tty_aborts( monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test import_repos aborts when stdin is not a TTY.""" + """Test _run_import aborts when stdin is not a TTY.""" caplog.set_level(logging.INFO) monkeypatch.setenv("HOME", str(tmp_path)) @@ -873,31 +490,14 @@ def test_import_repos_non_tty_aborts( "sys.stdin", type("FakeNonTTY", (), {"isatty": lambda self: False})() ) - class MockImporter: - service_name = "MockService" - - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - yield _make_repo("repo1") - - # Mock _get_importer: return MockImporter to avoid real API calls - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) + importer = MockImporter(repos=[_make_repo("repo1")]) - import_repos( - service="github", + _run_import( + importer, + service_name="github", target="testuser", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -921,7 +521,7 @@ def test_import_repos_skips_existing( monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test import_repos skips repositories already in config.""" + """Test _run_import skips repositories already in config.""" import yaml caplog.set_level(logging.INFO) @@ -939,33 +539,14 @@ def test_import_repos_skips_existing( } save_config_yaml(config_file, existing_config) - # Mock the importer to return repo1 (existing) and repo2 (new) - class MockImporter: - service_name = "MockService" - - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - yield _make_repo("repo1") - yield _make_repo("repo2") - - # Mock _get_importer: return MockImporter with both existing and new repos - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) + importer = MockImporter(repos=[_make_repo("repo1"), _make_repo("repo2")]) - import_repos( - service="github", + _run_import( + importer, + service_name="github", target="testuser", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -995,7 +576,7 @@ def test_import_repos_all_existing( monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test import_repos handles all repos already existing.""" + """Test _run_import handles all repos already existing.""" caplog.set_level(logging.INFO) monkeypatch.setenv("HOME", str(tmp_path)) @@ -1011,32 +592,14 @@ def test_import_repos_all_existing( } save_config_yaml(config_file, existing_config) - # Mock the importer to return only repo1 (existing) - class MockImporter: - service_name = "MockService" - - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - yield _make_repo("repo1") - - # Mock _get_importer: return MockImporter with only already-existing repos - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) + importer = MockImporter(repos=[_make_repo("repo1")]) - import_repos( - service="github", + _run_import( + importer, + service_name="github", target="testuser", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -1059,37 +622,19 @@ def test_import_repos_json_output( monkeypatch: MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - """Test import_repos JSON output format.""" + """Test _run_import JSON output format.""" monkeypatch.setenv("HOME", str(tmp_path)) workspace = tmp_path / "repos" workspace.mkdir() - # Mock the importer - class MockImporter: - service_name = "MockService" + importer = MockImporter(repos=[_make_repo("repo1", stars=50)]) - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - yield _make_repo("repo1", stars=50) - - # Mock _get_importer: return MockImporter to test JSON output format - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) - - import_repos( - service="github", + _run_import( + importer, + service_name="github", target="testuser", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -1118,38 +663,19 @@ def test_import_repos_ndjson_output( monkeypatch: MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - """Test import_repos NDJSON output format.""" + """Test _run_import NDJSON output format.""" monkeypatch.setenv("HOME", str(tmp_path)) workspace = tmp_path / "repos" workspace.mkdir() - # Mock the importer - class MockImporter: - service_name = "MockService" + importer = MockImporter(repos=[_make_repo("repo1"), _make_repo("repo2")]) - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - yield _make_repo("repo1") - yield _make_repo("repo2") - - # Mock _get_importer: return MockImporter to test NDJSON output format - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) - - import_repos( - service="github", + _run_import( + importer, + service_name="github", target="testuser", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -1177,7 +703,7 @@ def test_import_repos_topics_filter( monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test import_repos passes topics filter correctly.""" + """Test _run_import passes topics filter correctly.""" caplog.set_level(logging.INFO) monkeypatch.setenv("HOME", str(tmp_path)) @@ -1186,7 +712,7 @@ def test_import_repos_topics_filter( received_options: list[ImportOptions] = [] - class MockImporter: + class CapturingImporter: service_name = "MockService" def fetch_repos( @@ -1196,22 +722,12 @@ def fetch_repos( received_options.append(options) return iter([]) - # Mock _get_importer: capture ImportOptions to verify filter passthrough - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) - - import_repos( - service="github", + _run_import( + CapturingImporter(), + service_name="github", target="testuser", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region=None, - profile=None, language="Python", topics="cli,tool,python", min_stars=50, @@ -1241,38 +757,24 @@ def test_import_repos_codecommit_no_target_required( monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test import_repos allows empty target for codecommit.""" + """Test _run_import allows empty target for codecommit.""" caplog.set_level(logging.INFO) monkeypatch.setenv("HOME", str(tmp_path)) workspace = tmp_path / "repos" workspace.mkdir() - class MockImporter: - service_name = "CodeCommit" - - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - yield _make_repo("aws-repo") - - # Mock _get_importer: return MockImporter to verify CodeCommit allows empty target - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), + importer = MockImporter( + service_name="CodeCommit", + repos=[_make_repo("aws-repo")], ) - import_repos( - service="codecommit", + _run_import( + importer, + service_name="codecommit", target="", # Empty target is OK for CodeCommit workspace=str(workspace), mode="user", - base_url=None, - token=None, - region="us-east-1", - profile=None, language=None, topics=None, min_stars=0, @@ -1298,7 +800,7 @@ def test_import_repos_many_repos_truncates_preview( monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test import_repos shows '...and X more' when many repos.""" + """Test _run_import shows '...and X more' when many repos.""" caplog.set_level(logging.INFO) monkeypatch.setenv("HOME", str(tmp_path)) @@ -1308,31 +810,14 @@ def test_import_repos_many_repos_truncates_preview( # Create 15 repos many_repos = [_make_repo(f"repo{i}") for i in range(15)] - class MockImporter: - service_name = "MockService" - - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - yield from many_repos + importer = MockImporter(repos=many_repos) - # Mock _get_importer: return MockImporter with 15 repos to test truncated preview - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) - - import_repos( - service="github", + _run_import( + importer, + service_name="github", target="testuser", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -1356,7 +841,7 @@ def test_import_repos_config_load_error( monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test import_repos handles config load errors.""" + """Test _run_import handles config load errors.""" caplog.set_level(logging.ERROR) monkeypatch.setenv("HOME", str(tmp_path)) @@ -1367,31 +852,14 @@ def test_import_repos_config_load_error( config_file = tmp_path / ".vcspull.yaml" config_file.write_text("invalid: yaml: content: [", encoding="utf-8") - class MockImporter: - service_name = "MockService" - - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - yield _make_repo("repo1") - - # Mock _get_importer: return MockImporter to test config load error handling - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) + importer = MockImporter(repos=[_make_repo("repo1")]) - import_repos( - service="github", + _run_import( + importer, + service_name="github", target="testuser", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -1410,31 +878,14 @@ def fetch_repos( def test_import_no_args_shows_help(capsys: pytest.CaptureFixture[str]) -> None: - """Test that 'vcspull import' without args shows help (like --help).""" + """Test that 'vcspull import' without args shows help.""" from vcspull.cli import cli - # Call cli with just "import" - should show help and not error cli(["import"]) captured = capsys.readouterr() - # Verify help is shown (usage line and description) assert "usage: vcspull import" in captured.out assert "Import repositories from remote services" in captured.out - assert "positional arguments:" in captured.out - assert "SERVICE" in captured.out - - -def test_import_only_service_shows_help(capsys: pytest.CaptureFixture[str]) -> None: - """Test that 'vcspull import github' without workspace shows help.""" - from vcspull.cli import cli - - # Call cli with just "import github" - missing workspace - cli(["import", "github"]) - - captured = capsys.readouterr() - # Verify help is shown - assert "usage: vcspull import" in captured.out - assert "-w, --workspace DIR" in captured.out def test_import_repos_defaults_to_ssh_urls( @@ -1442,7 +893,7 @@ def test_import_repos_defaults_to_ssh_urls( monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test import_repos writes SSH URLs to config by default.""" + """Test _run_import writes SSH URLs to config by default.""" import yaml caplog.set_level(logging.INFO) @@ -1452,31 +903,14 @@ def test_import_repos_defaults_to_ssh_urls( workspace.mkdir() config_file = tmp_path / ".vcspull.yaml" - class MockImporter: - service_name = "MockService" + importer = MockImporter(repos=[_make_repo("myrepo")]) - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - yield _make_repo("myrepo") - - # Mock _get_importer: return MockImporter to verify default SSH URL output - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) - - import_repos( - service="github", + _run_import( + importer, + service_name="github", target="testuser", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -1504,7 +938,7 @@ def test_import_repos_https_flag( monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test import_repos writes HTTPS URLs when use_https=True.""" + """Test _run_import writes HTTPS URLs when use_https=True.""" import yaml caplog.set_level(logging.INFO) @@ -1514,31 +948,14 @@ def test_import_repos_https_flag( workspace.mkdir() config_file = tmp_path / ".vcspull.yaml" - class MockImporter: - service_name = "MockService" - - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - yield _make_repo("myrepo") - - # Mock _get_importer: return MockImporter to verify HTTPS URL output - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) + importer = MockImporter(repos=[_make_repo("myrepo")]) - import_repos( - service="github", + _run_import( + importer, + service_name="github", target="testuser", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -1562,7 +979,7 @@ def fetch_repos( assert repo_url == "git+https://github.com/testuser/myrepo.git" -def test_import_https_flag_via_cli(capsys: pytest.CaptureFixture[str]) -> None: +def test_import_https_flag_via_cli() -> None: """Test that --https flag is recognized by the CLI parser.""" from vcspull.cli import create_parser @@ -1571,23 +988,19 @@ def test_import_https_flag_via_cli(capsys: pytest.CaptureFixture[str]) -> None: ["import", "github", "testuser", "-w", "/tmp/repos", "--https"] ) assert args.use_https is True - assert args.flatten_groups is False -def test_import_ssh_default_via_cli(capsys: pytest.CaptureFixture[str]) -> None: +def test_import_ssh_default_via_cli() -> None: """Test that SSH is the default (no --https flag).""" from vcspull.cli import create_parser parser = create_parser(return_subparsers=False) args = parser.parse_args(["import", "github", "testuser", "-w", "/tmp/repos"]) assert args.use_https is False - assert args.flatten_groups is False -def test_import_flatten_groups_flag_via_cli( - capsys: pytest.CaptureFixture[str], -) -> None: - """Test that --flatten-groups flag is recognized by the CLI parser.""" +def test_import_flatten_groups_flag_via_cli() -> None: + """Test that --flatten-groups flag is recognized by the GitLab subparser.""" from vcspull.cli import create_parser parser = create_parser(return_subparsers=False) @@ -1602,38 +1015,21 @@ def test_import_repos_rejects_non_yaml_config( monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test import_repos rejects non-YAML config file paths.""" + """Test _run_import rejects non-YAML config file paths.""" caplog.set_level(logging.ERROR) monkeypatch.setenv("HOME", str(tmp_path)) workspace = tmp_path / "repos" workspace.mkdir() - class MockImporter: - service_name = "MockService" - - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - yield _make_repo("repo1") - - # Mock _get_importer: return MockImporter to test non-YAML config rejection - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) + importer = MockImporter(repos=[_make_repo("repo1")]) - import_repos( - service="github", + _run_import( + importer, + service_name="github", target="testuser", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -1656,7 +1052,7 @@ def test_import_repos_catches_multiple_config_warning( monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test import_repos logs error instead of crashing on MultipleConfigWarning.""" + """Test _run_import logs error instead of crashing on MultipleConfigWarning.""" from vcspull.exc import MultipleConfigWarning caplog.set_level(logging.ERROR) @@ -1665,41 +1061,24 @@ def test_import_repos_catches_multiple_config_warning( workspace = tmp_path / "repos" workspace.mkdir() - class MockImporter: - service_name = "MockService" - - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - yield _make_repo("repo1") - - # Mock _get_importer: return MockImporter to isolate config resolution logic - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) + importer = MockImporter(repos=[_make_repo("repo1")]) # Mock _resolve_config_file: raise MultipleConfigWarning to test error handling def raise_multiple_config(_: str | None) -> pathlib.Path: raise MultipleConfigWarning(MultipleConfigWarning.message) monkeypatch.setattr( - import_repos_mod, + import_common_mod, "_resolve_config_file", raise_multiple_config, ) - import_repos( - service="github", + _run_import( + importer, + service_name="github", target="testuser", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -1722,38 +1101,21 @@ def test_import_repos_invalid_limit( monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test import_repos logs error for invalid limit (e.g. 0).""" + """Test _run_import logs error for invalid limit (e.g. 0).""" caplog.set_level(logging.ERROR) monkeypatch.setenv("HOME", str(tmp_path)) workspace = tmp_path / "repos" workspace.mkdir() - class MockImporter: - service_name = "MockService" - - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - yield _make_repo("repo1") + importer = MockImporter(repos=[_make_repo("repo1")]) - # Mock _get_importer: return MockImporter to test invalid limit error handling - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) - - import_repos( - service="github", + _run_import( + importer, + service_name="github", target="testuser", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -1776,22 +1138,21 @@ def test_import_repos_returns_nonzero_on_error( monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test import_repos returns non-zero exit code on error.""" + """Test _run_import returns non-zero exit code on error.""" caplog.set_level(logging.ERROR) monkeypatch.setenv("HOME", str(tmp_path)) workspace = tmp_path / "repos" workspace.mkdir() - result = import_repos( - service="unknownservice", + importer = MockImporter(error=AuthenticationError("Bad credentials")) + + result = _run_import( + importer, + service_name="github", target="testuser", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -1814,38 +1175,21 @@ def test_import_repos_returns_zero_on_success( monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test import_repos returns 0 on success.""" + """Test _run_import returns 0 on success.""" caplog.set_level(logging.INFO) monkeypatch.setenv("HOME", str(tmp_path)) workspace = tmp_path / "repos" workspace.mkdir() - class MockImporter: - service_name = "MockService" + importer = MockImporter(repos=[_make_repo("repo1")]) - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - yield _make_repo("repo1") - - # Mock _get_importer: return MockImporter to test successful exit code - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) - - result = import_repos( - service="github", + result = _run_import( + importer, + service_name="github", target="testuser", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -1868,7 +1212,7 @@ def test_import_repos_rejects_non_dict_config( monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test import_repos rejects config that is a YAML list instead of dict.""" + """Test _run_import rejects config that is a YAML list instead of dict.""" caplog.set_level(logging.ERROR) monkeypatch.setenv("HOME", str(tmp_path)) @@ -1878,31 +1222,14 @@ def test_import_repos_rejects_non_dict_config( # Write a YAML list instead of a mapping config_file.write_text("- item1\n- item2\n", encoding="utf-8") - class MockImporter: - service_name = "MockService" - - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - yield _make_repo("repo1") - - # Mock _get_importer: return MockImporter to test non-dict config rejection - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) + importer = MockImporter(repos=[_make_repo("repo1")]) - import_repos( - service="github", + _run_import( + importer, + service_name="github", target="testuser", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -2057,28 +1384,14 @@ def test_import_nested_groups( workspace.mkdir(parents=True) config_file = tmp_path / ".vcspull.yaml" - class MockImporter: - service_name = "GitLab" + importer = MockImporter(service_name="GitLab", repos=mock_repos) - def fetch_repos(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: - yield from mock_repos - - # Mock the importer factory so import_repos() exercises only workspace mapping. - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) - - import_repos( - service="gitlab", + _run_import( + importer, + service_name="gitlab", target=target, workspace=str(workspace), mode=mode, - base_url=None, - token=None, - region=None, - profile=None, language=None, topics=None, min_stars=0, @@ -2116,7 +1429,7 @@ class LanguageWarningFixture(t.NamedTuple): """Fixture for --language warning test cases.""" test_id: str - service: str + service_name: str language: str | None expect_warning: bool @@ -2124,25 +1437,25 @@ class LanguageWarningFixture(t.NamedTuple): LANGUAGE_WARNING_FIXTURES: list[LanguageWarningFixture] = [ LanguageWarningFixture( test_id="gitlab-with-language-warns", - service="gitlab", + service_name="gitlab", language="Python", expect_warning=True, ), LanguageWarningFixture( test_id="codecommit-with-language-warns", - service="codecommit", + service_name="codecommit", language="Python", expect_warning=True, ), LanguageWarningFixture( test_id="github-with-language-no-warning", - service="github", + service_name="github", language="Python", expect_warning=False, ), LanguageWarningFixture( test_id="gitlab-without-language-no-warning", - service="gitlab", + service_name="gitlab", language=None, expect_warning=False, ), @@ -2156,7 +1469,7 @@ class LanguageWarningFixture(t.NamedTuple): ) def test_import_repos_language_warning( test_id: str, - service: str, + service_name: str, language: str | None, expect_warning: bool, tmp_path: pathlib.Path, @@ -2170,33 +1483,17 @@ def test_import_repos_language_warning( workspace = tmp_path / "repos" workspace.mkdir() - class MockImporter: - service_name = {"gitlab": "GitLab", "codecommit": "CodeCommit"}.get( - service, "GitHub" - ) - - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - return iter([]) - - # Mock _get_importer: return MockImporter to test language warning behavior - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), + display_name = {"gitlab": "GitLab", "codecommit": "CodeCommit"}.get( + service_name, "GitHub" ) + importer = MockImporter(service_name=display_name) - import_repos( - service=service, - target="testuser" if service != "codecommit" else "", + _run_import( + importer, + service_name=service_name, + target="testuser" if service_name != "codecommit" else "", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region="us-east-1" if service == "codecommit" else None, - profile=None, language=language, topics=None, min_stars=0, @@ -2221,7 +1518,7 @@ class UnsupportedFilterFixture(t.NamedTuple): """Fixture for unsupported CodeCommit filter warning test cases.""" test_id: str - service: str + service_name: str topics: str | None min_stars: int expect_topics_warning: bool @@ -2231,7 +1528,7 @@ class UnsupportedFilterFixture(t.NamedTuple): UNSUPPORTED_FILTER_FIXTURES: list[UnsupportedFilterFixture] = [ UnsupportedFilterFixture( test_id="codecommit-with-topics-warns", - service="codecommit", + service_name="codecommit", topics="python,cli", min_stars=0, expect_topics_warning=True, @@ -2239,7 +1536,7 @@ class UnsupportedFilterFixture(t.NamedTuple): ), UnsupportedFilterFixture( test_id="codecommit-with-min-stars-warns", - service="codecommit", + service_name="codecommit", topics=None, min_stars=10, expect_topics_warning=False, @@ -2247,7 +1544,7 @@ class UnsupportedFilterFixture(t.NamedTuple): ), UnsupportedFilterFixture( test_id="codecommit-with-both-warns", - service="codecommit", + service_name="codecommit", topics="python", min_stars=5, expect_topics_warning=True, @@ -2255,7 +1552,7 @@ class UnsupportedFilterFixture(t.NamedTuple): ), UnsupportedFilterFixture( test_id="github-with-topics-no-warning", - service="github", + service_name="github", topics="python,cli", min_stars=10, expect_topics_warning=False, @@ -2271,7 +1568,7 @@ class UnsupportedFilterFixture(t.NamedTuple): ) def test_import_repos_unsupported_filter_warning( test_id: str, - service: str, + service_name: str, topics: str | None, min_stars: int, expect_topics_warning: bool, @@ -2287,31 +1584,15 @@ def test_import_repos_unsupported_filter_warning( workspace = tmp_path / "repos" workspace.mkdir() - class MockImporter: - service_name = "CodeCommit" if service == "codecommit" else "GitHub" + display_name = "CodeCommit" if service_name == "codecommit" else "GitHub" + importer = MockImporter(service_name=display_name) - def fetch_repos( - self, - options: ImportOptions, - ) -> t.Iterator[RemoteRepo]: - return iter([]) - - # Mock _get_importer: return MockImporter to test unsupported filter warnings - monkeypatch.setattr( - import_repos_mod, - "_get_importer", - lambda *args, **kwargs: MockImporter(), - ) - - import_repos( - service=service, - target="testuser" if service != "codecommit" else "", + _run_import( + importer, + service_name=service_name, + target="testuser" if service_name != "codecommit" else "", workspace=str(workspace), mode="user", - base_url=None, - token=None, - region="us-east-1" if service == "codecommit" else None, - profile=None, language=None, topics=topics, min_stars=min_stars, @@ -2335,3 +1616,159 @@ def fetch_repos( assert "does not track star counts" in caplog.text else: assert "does not track star counts" not in caplog.text + + +# ── New tests for per-service subparser architecture ── + + +def test_alias_parsing_gh() -> None: + """Test that 'import gh' resolves the same as 'import github'.""" + from vcspull.cli import create_parser + + parser = create_parser(return_subparsers=False) + args = parser.parse_args(["import", "gh", "myuser", "-w", "/tmp/repos"]) + assert args.import_service in ("github", "gh") + assert hasattr(args, "import_handler") + + +def test_alias_parsing_gl() -> None: + """Test that 'import gl' resolves the same as 'import gitlab'.""" + from vcspull.cli import create_parser + + parser = create_parser(return_subparsers=False) + args = parser.parse_args(["import", "gl", "myuser", "-w", "/tmp/repos"]) + assert args.import_service in ("gitlab", "gl") + assert hasattr(args, "import_handler") + + +def test_alias_parsing_cb() -> None: + """Test that 'import cb' resolves the same as 'import codeberg'.""" + from vcspull.cli import create_parser + + parser = create_parser(return_subparsers=False) + args = parser.parse_args(["import", "cb", "myuser", "-w", "/tmp/repos"]) + assert args.import_service in ("codeberg", "cb") + assert hasattr(args, "import_handler") + + +def test_alias_parsing_cc() -> None: + """Test that 'import cc' resolves the same as 'import codecommit'.""" + from vcspull.cli import create_parser + + parser = create_parser(return_subparsers=False) + args = parser.parse_args(["import", "cc", "-w", "/tmp/repos"]) + assert args.import_service in ("codecommit", "cc") + assert hasattr(args, "import_handler") + + +def test_alias_parsing_aws() -> None: + """Test that 'import aws' resolves the same as 'import codecommit'.""" + from vcspull.cli import create_parser + + parser = create_parser(return_subparsers=False) + args = parser.parse_args(["import", "aws", "-w", "/tmp/repos"]) + assert args.import_service in ("codecommit", "aws") + assert hasattr(args, "import_handler") + + +def test_flatten_groups_only_on_gitlab() -> None: + """Test that --flatten-groups is only available on the gitlab subparser.""" + from vcspull.cli import create_parser + + parser = create_parser(return_subparsers=False) + + # Should work for gitlab + args = parser.parse_args( + ["import", "gitlab", "mygroup", "-w", "/tmp/repos", "--flatten-groups"] + ) + assert args.flatten_groups is True + + # Should fail for github + with pytest.raises(SystemExit): + parser.parse_args( + ["import", "github", "myuser", "-w", "/tmp/repos", "--flatten-groups"] + ) + + +def test_region_only_on_codecommit() -> None: + """Test that --region is only available on the codecommit subparser.""" + from vcspull.cli import create_parser + + parser = create_parser(return_subparsers=False) + + # Should work for codecommit + args = parser.parse_args( + ["import", "codecommit", "-w", "/tmp/repos", "--region", "us-east-1"] + ) + assert args.region == "us-east-1" + + # Should fail for github + with pytest.raises(SystemExit): + parser.parse_args( + ["import", "github", "myuser", "-w", "/tmp/repos", "--region", "us-east-1"] + ) + + +def test_url_required_for_gitea() -> None: + """Test that --url is required for the gitea subparser.""" + from vcspull.cli import create_parser + + parser = create_parser(return_subparsers=False) + + # Should fail without --url + with pytest.raises(SystemExit): + parser.parse_args(["import", "gitea", "myuser", "-w", "/tmp/repos"]) + + # Should work with --url + args = parser.parse_args( + [ + "import", + "gitea", + "myuser", + "-w", + "/tmp/repos", + "--url", + "https://git.example.com", + ] + ) + assert args.base_url == "https://git.example.com" + + +def test_url_required_for_forgejo() -> None: + """Test that --url is required for the forgejo subparser.""" + from vcspull.cli import create_parser + + parser = create_parser(return_subparsers=False) + + # Should fail without --url + with pytest.raises(SystemExit): + parser.parse_args(["import", "forgejo", "myuser", "-w", "/tmp/repos"]) + + # Should work with --url + args = parser.parse_args( + [ + "import", + "forgejo", + "myuser", + "-w", + "/tmp/repos", + "--url", + "https://forgejo.example.com", + ] + ) + assert args.base_url == "https://forgejo.example.com" + + +def test_codecommit_target_is_optional() -> None: + """Test that target is optional for the codecommit subparser.""" + from vcspull.cli import create_parser + + parser = create_parser(return_subparsers=False) + + # Should work without target + args = parser.parse_args(["import", "codecommit", "-w", "/tmp/repos"]) + assert args.target == "" + + # Should work with target + args = parser.parse_args(["import", "codecommit", "myprefix", "-w", "/tmp/repos"]) + assert args.target == "myprefix" diff --git a/tests/test_log.py b/tests/test_log.py index 672a843ca..2ccf26ce6 100644 --- a/tests/test_log.py +++ b/tests/test_log.py @@ -432,7 +432,14 @@ def test_get_cli_logger_names_includes_base() -> None: "vcspull.cli.add", "vcspull.cli.discover", "vcspull.cli.fmt", - "vcspull.cli.import_repos", + "vcspull.cli.import_cmd", + "vcspull.cli.import_cmd._common", + "vcspull.cli.import_cmd.codeberg", + "vcspull.cli.import_cmd.codecommit", + "vcspull.cli.import_cmd.forgejo", + "vcspull.cli.import_cmd.gitea", + "vcspull.cli.import_cmd.github", + "vcspull.cli.import_cmd.gitlab", "vcspull.cli.list", "vcspull.cli.search", "vcspull.cli.status", From 89dcd2f3fcc3012bc033697a0584959717ec22f5 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 18:49:50 -0600 Subject: [PATCH 084/109] docs(cli/import): Update API docs for import_cmd package why: The autodoc reference pointed to the deleted import_repos module. what: - Change automodule from vcspull.cli.import_repos to vcspull.cli.import_cmd - Add automodule for vcspull.cli.import_cmd._common --- docs/api/cli/import.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/docs/api/cli/import.md b/docs/api/cli/import.md index 34a0643fc..ab9bbeef2 100644 --- a/docs/api/cli/import.md +++ b/docs/api/cli/import.md @@ -1,7 +1,14 @@ -# vcspull import - `vcspull.cli.import_repos` +# vcspull import - `vcspull.cli.import_cmd` ```{eval-rst} -.. automodule:: vcspull.cli.import_repos +.. automodule:: vcspull.cli.import_cmd + :members: + :show-inheritance: + :undoc-members: +``` + +```{eval-rst} +.. automodule:: vcspull.cli.import_cmd._common :members: :show-inheritance: :undoc-members: From c686b1a2b0fbc0cf57cf5c5b230f39dd1bf00da7 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 19:08:04 -0600 Subject: [PATCH 085/109] docs(cli/import[pages]): Split import docs into per-service nested pages why: Each service subcommand deserves its own page mirroring the CLI structure. what: - Replace docs/cli/import.md with docs/cli/import/index.md - Create per-service pages: github, gitlab, codeberg, gitea, forgejo, codecommit - Move service-specific auth, group flattening, and CodeCommit usage to respective pages - Use :nosubcommands: and :nodescription: to keep index argparse output clean - Add linked service list and hidden toctree on index page - Update docs/cli/index.md toctree entry to import/index --- docs/cli/import/codeberg.md | 27 +++++ docs/cli/import/codecommit.md | 38 +++++++ docs/cli/import/forgejo.md | 28 ++++++ docs/cli/import/gitea.md | 27 +++++ docs/cli/import/github.md | 31 ++++++ docs/cli/import/gitlab.md | 39 ++++++++ docs/cli/{import.md => import/index.md} | 128 +++++------------------- docs/cli/index.md | 2 +- 8 files changed, 216 insertions(+), 104 deletions(-) create mode 100644 docs/cli/import/codeberg.md create mode 100644 docs/cli/import/codecommit.md create mode 100644 docs/cli/import/forgejo.md create mode 100644 docs/cli/import/gitea.md create mode 100644 docs/cli/import/github.md create mode 100644 docs/cli/import/gitlab.md rename docs/cli/{import.md => import/index.md} (65%) diff --git a/docs/cli/import/codeberg.md b/docs/cli/import/codeberg.md new file mode 100644 index 000000000..78b3737e7 --- /dev/null +++ b/docs/cli/import/codeberg.md @@ -0,0 +1,27 @@ +(cli-import-codeberg)= + +# vcspull import codeberg + +Import repositories from Codeberg. + +## Command + +```{eval-rst} +.. argparse:: + :module: vcspull.cli + :func: create_parser + :prog: vcspull + :path: import codeberg +``` + +## Authentication + +- **Env vars**: `CODEBERG_TOKEN` (primary), `GITEA_TOKEN` (fallback) +- **Token type**: API token +- **Scope**: no scopes needed for public repos; token required for private repos +- **Create at**: + +```console +$ export CODEBERG_TOKEN=... +$ vcspull import codeberg myuser -w ~/code/ +``` diff --git a/docs/cli/import/codecommit.md b/docs/cli/import/codecommit.md new file mode 100644 index 000000000..7f6d91b16 --- /dev/null +++ b/docs/cli/import/codecommit.md @@ -0,0 +1,38 @@ +(cli-import-codecommit)= + +# vcspull import codecommit + +Import repositories from AWS CodeCommit. + +## Command + +```{eval-rst} +.. argparse:: + :module: vcspull.cli + :func: create_parser + :prog: vcspull + :path: import codecommit +``` + +## Usage + +CodeCommit does not require a target argument. Use `--region` and `--profile` +to select the AWS environment: + +```console +$ vcspull import codecommit -w ~/code/ --region us-east-1 --profile work +``` + +## Authentication + +- **Auth**: AWS CLI credentials (`aws configure`) — no token env var +- **CLI args**: `--region`, `--profile` +- **IAM permissions required**: + - `codecommit:ListRepositories` (resource: `*`) + - `codecommit:BatchGetRepositories` (resource: repo ARNs or `*`) +- **Dependency**: AWS CLI must be installed (`pip install awscli`) + +```console +$ aws configure +$ vcspull import codecommit -w ~/code/ --region us-east-1 +``` diff --git a/docs/cli/import/forgejo.md b/docs/cli/import/forgejo.md new file mode 100644 index 000000000..4480f48fe --- /dev/null +++ b/docs/cli/import/forgejo.md @@ -0,0 +1,28 @@ +(cli-import-forgejo)= + +# vcspull import forgejo + +Import repositories from a self-hosted Forgejo instance. + +## Command + +```{eval-rst} +.. argparse:: + :module: vcspull.cli + :func: create_parser + :prog: vcspull + :path: import forgejo +``` + +## Authentication + +- **Env vars**: `FORGEJO_TOKEN` (primary; matched when hostname contains + "forgejo"), `GITEA_TOKEN` (fallback) +- **Token type**: API token +- **Scope**: `read:repository` +- **Create at**: `https:///user/settings/applications` + +```console +$ export FORGEJO_TOKEN=... +$ vcspull import forgejo myuser -w ~/code/ --url https://forgejo.example.com +``` diff --git a/docs/cli/import/gitea.md b/docs/cli/import/gitea.md new file mode 100644 index 000000000..bceb9a644 --- /dev/null +++ b/docs/cli/import/gitea.md @@ -0,0 +1,27 @@ +(cli-import-gitea)= + +# vcspull import gitea + +Import repositories from a self-hosted Gitea instance. + +## Command + +```{eval-rst} +.. argparse:: + :module: vcspull.cli + :func: create_parser + :prog: vcspull + :path: import gitea +``` + +## Authentication + +- **Env var**: `GITEA_TOKEN` +- **Token type**: API token with scoped permissions +- **Scope**: `read:repository` (minimum for listing repos) +- **Create at**: `https:///user/settings/applications` + +```console +$ export GITEA_TOKEN=... +$ vcspull import gitea myuser -w ~/code/ --url https://git.example.com +``` diff --git a/docs/cli/import/github.md b/docs/cli/import/github.md new file mode 100644 index 000000000..c476fd7e9 --- /dev/null +++ b/docs/cli/import/github.md @@ -0,0 +1,31 @@ +(cli-import-github)= + +# vcspull import github + +Import repositories from GitHub or GitHub Enterprise. + +## Command + +```{eval-rst} +.. argparse:: + :module: vcspull.cli + :func: create_parser + :prog: vcspull + :path: import github +``` + +## Authentication + +- **Env vars**: `GITHUB_TOKEN` (primary), `GH_TOKEN` (fallback) +- **Token type**: Personal access token (classic) or fine-grained PAT +- **Permissions**: + - Classic PAT: no scopes needed for public repos; `repo` scope for private + repos; `read:org` for org repos + - Fine-grained PAT: "Metadata: Read-only" for public; add "Contents: + Read-only" for private +- **Create at**: + +```console +$ export GITHUB_TOKEN=ghp_... +$ vcspull import gh myuser -w ~/code/ +``` diff --git a/docs/cli/import/gitlab.md b/docs/cli/import/gitlab.md new file mode 100644 index 000000000..776bcba0f --- /dev/null +++ b/docs/cli/import/gitlab.md @@ -0,0 +1,39 @@ +(cli-import-gitlab)= + +# vcspull import gitlab + +Import repositories from GitLab or a self-hosted GitLab instance. + +## Command + +```{eval-rst} +.. argparse:: + :module: vcspull.cli + :func: create_parser + :prog: vcspull + :path: import gitlab +``` + +## Group flattening + +When importing a GitLab group with `--mode org`, vcspull preserves subgroup +structure as nested workspace directories by default. Use `--flatten-groups` to +place all repositories directly in the base workspace: + +```console +$ vcspull import gl my-group --mode org -w ~/code/ --flatten-groups +``` + +## Authentication + +- **Env vars**: `GITLAB_TOKEN` (primary), `GL_TOKEN` (fallback) +- **Token type**: Personal access token +- **Scope**: `read_api` (minimum for listing projects; **required** for search + mode) +- **Create at**: + (self-hosted: `https:///-/user_settings/personal_access_tokens`) + +```console +$ export GITLAB_TOKEN=glpat-... +$ vcspull import gl myuser -w ~/code/ +``` diff --git a/docs/cli/import.md b/docs/cli/import/index.md similarity index 65% rename from docs/cli/import.md rename to docs/cli/import/index.md index 021d6569f..5890c2d90 100644 --- a/docs/cli/import.md +++ b/docs/cli/import/index.md @@ -18,8 +18,19 @@ Supported services: **GitHub**, **GitLab**, **Codeberg**, **Gitea**, :func: create_parser :prog: vcspull :path: import + :nosubcommands: + :nodescription: ``` +Choose a service subcommand for details: + +- {ref}`cli-import-github` — GitHub or GitHub Enterprise +- {ref}`cli-import-gitlab` — GitLab (gitlab.com or self-hosted) +- {ref}`cli-import-codeberg` — Codeberg +- {ref}`cli-import-gitea` — Self-hosted Gitea instance +- {ref}`cli-import-forgejo` — Self-hosted Forgejo instance +- {ref}`cli-import-codecommit` — AWS CodeCommit + ## Basic usage Import all repositories for a GitHub user into a workspace: @@ -50,6 +61,18 @@ Import 12 repositories to ~/.vcspull.yaml? [y/N]: y For Gitea and Forgejo, `--url` is required because there is no default instance. +```{toctree} +:maxdepth: 1 +:hidden: + +github +gitlab +codeberg +gitea +forgejo +codecommit +``` + ## Import modes ### User mode (default) @@ -166,25 +189,6 @@ SSH clone URLs are used by default. Switch to HTTPS with `--https`: $ vcspull import gh myuser -w ~/code/ --https ``` -## Group flattening - -When importing a GitLab group with `--mode org`, vcspull preserves subgroup -structure as nested workspace directories by default. Use `--flatten-groups` to -place all repositories directly in the base workspace: - -```console -$ vcspull import gl my-group --mode org -w ~/code/ --flatten-groups -``` - -## AWS CodeCommit - -CodeCommit does not require a target argument. Use `--region` and `--profile` -to select the AWS environment: - -```console -$ vcspull import codecommit -w ~/code/ --region us-east-1 --profile work -``` - ## Self-hosted instances Point to a self-hosted GitHub Enterprise, GitLab, Gitea, or Forgejo instance @@ -197,90 +201,8 @@ $ vcspull import gitea myuser -w ~/code/ --url https://git.example.com ## Authentication vcspull reads API tokens from environment variables. Use `--token` to override. -Environment variables are preferred for security. - -### GitHub - -- **Env vars**: `GITHUB_TOKEN` (primary), `GH_TOKEN` (fallback) -- **Token type**: Personal access token (classic) or fine-grained PAT -- **Permissions**: - - Classic PAT: no scopes needed for public repos; `repo` scope for private - repos; `read:org` for org repos - - Fine-grained PAT: "Metadata: Read-only" for public; add "Contents: - Read-only" for private -- **Create at**: - -```console -$ export GITHUB_TOKEN=ghp_... -$ vcspull import gh myuser -w ~/code/ -``` - -### GitLab - -- **Env vars**: `GITLAB_TOKEN` (primary), `GL_TOKEN` (fallback) -- **Token type**: Personal access token -- **Scope**: `read_api` (minimum for listing projects; **required** for search - mode) -- **Create at**: - (self-hosted: `https:///-/user_settings/personal_access_tokens`) - -```console -$ export GITLAB_TOKEN=glpat-... -$ vcspull import gl myuser -w ~/code/ -``` - -### Codeberg - -- **Env vars**: `CODEBERG_TOKEN` (primary), `GITEA_TOKEN` (fallback) -- **Token type**: API token -- **Scope**: no scopes needed for public repos; token required for private repos -- **Create at**: - -```console -$ export CODEBERG_TOKEN=... -$ vcspull import codeberg myuser -w ~/code/ -``` - -### Gitea - -- **Env var**: `GITEA_TOKEN` -- **Token type**: API token with scoped permissions -- **Scope**: `read:repository` (minimum for listing repos) -- **Create at**: `https:///user/settings/applications` - -```console -$ export GITEA_TOKEN=... -$ vcspull import gitea myuser -w ~/code/ --url https://git.example.com -``` - -### Forgejo - -- **Env vars**: `FORGEJO_TOKEN` (primary; matched when hostname contains - "forgejo"), `GITEA_TOKEN` (fallback) -- **Token type**: API token -- **Scope**: `read:repository` -- **Create at**: `https:///user/settings/applications` - -```console -$ export FORGEJO_TOKEN=... -$ vcspull import forgejo myuser -w ~/code/ --url https://forgejo.example.com -``` - -### AWS CodeCommit - -- **Auth**: AWS CLI credentials (`aws configure`) — no token env var -- **CLI args**: `--region`, `--profile` -- **IAM permissions required**: - - `codecommit:ListRepositories` (resource: `*`) - - `codecommit:BatchGetRepositories` (resource: repo ARNs or `*`) -- **Dependency**: AWS CLI must be installed (`pip install awscli`) - -```console -$ aws configure -$ vcspull import codecommit -w ~/code/ --region us-east-1 -``` - -### Summary +Environment variables are preferred for security. See each service page for +details. | Service | Env var(s) | Token type | Min scope / permissions | |------------|----------------------------------|-----------------------|------------------------------------------------------------------| diff --git a/docs/cli/index.md b/docs/cli/index.md index d1d571d15..1cb579205 100644 --- a/docs/cli/index.md +++ b/docs/cli/index.md @@ -8,7 +8,7 @@ sync add -import +import/index discover list search From bddfd9908ae909348027dbddb5cf72cf60fa55ce Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 20:03:52 -0600 Subject: [PATCH 086/109] fix(cli/import[traversal]) Reject subgroup paths that escape workspace why: GitLab subgroup owner paths containing ".." bypass the startswith() prefix guard and resolve outside the workspace directory. what: - Validate resolved subpath stays within workspace via is_relative_to() - Fall back to base workspace (flatten) when traversal detected - Add test fixture with "../.." in owner path --- src/vcspull/cli/import_cmd/_common.py | 11 ++++++++++- tests/cli/test_import_repos.py | 15 +++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/vcspull/cli/import_cmd/_common.py b/src/vcspull/cli/import_cmd/_common.py index 32bc64426..10fa086e6 100644 --- a/src/vcspull/cli/import_cmd/_common.py +++ b/src/vcspull/cli/import_cmd/_common.py @@ -564,7 +564,16 @@ def _run_import( subpath = "" if subpath: - repo_workspace_path = workspace_path / subpath + candidate = (workspace_path / subpath).resolve() + if not candidate.is_relative_to(workspace_path.resolve()): + log.warning( + "%s Ignoring subgroup path that escapes workspace: %s", + colors.warning("⚠"), + repo.owner, + ) + subpath = "" + else: + repo_workspace_path = workspace_path / subpath repo_workspace_label = workspace_root_label( repo_workspace_path, cwd=cwd, home=home diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index 5572ba83f..9525c689c 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -1323,6 +1323,21 @@ class NestedGroupImportFixture(t.NamedTuple): "": ("outside",), }, ), + NestedGroupImportFixture( + test_id="traversal-in-owner-flattened-to-base", + target="a/b", + mode="org", + flatten_groups=False, + workspace_relpath="repos", + mock_repos=[ + _make_repo("evil", owner="a/b/../../escape"), + _make_repo("safe", owner="a/b/c"), + ], + expected_sections={ + "": ("evil",), + "c": ("safe",), + }, + ), NestedGroupImportFixture( test_id="flatten-groups-flag-uses-single-workspace", target="a/b", From df942bfab1068b260f8092319e884b1cb7060923 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 20:05:18 -0600 Subject: [PATCH 087/109] fix(config[atomic_write]) Preserve file permissions on atomic write why: mkstemp() creates temp files with 0600 permissions. The rename overwrites the original file's permissions (e.g. 0644) with 0600. what: - Capture original file mode before writing - Restore mode on temp file before replace() - Add test asserting 0644 is preserved after save --- src/vcspull/config.py | 6 ++++++ tests/test_config_writer.py | 14 ++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/src/vcspull/config.py b/src/vcspull/config.py index 69ec6db36..455bbf52f 100644 --- a/src/vcspull/config.py +++ b/src/vcspull/config.py @@ -475,6 +475,10 @@ def _atomic_write(target: pathlib.Path, content: str) -> None: content : str Content to write """ + original_mode: int | None = None + if target.exists(): + original_mode = target.stat().st_mode + fd, tmp_path = tempfile.mkstemp( dir=target.parent, prefix=f".{target.name}.", @@ -483,6 +487,8 @@ def _atomic_write(target: pathlib.Path, content: str) -> None: try: with os.fdopen(fd, "w", encoding="utf-8") as f: f.write(content) + if original_mode is not None: + pathlib.Path(tmp_path).chmod(original_mode) pathlib.Path(tmp_path).replace(target) except BaseException: # Clean up the temp file on any failure diff --git a/tests/test_config_writer.py b/tests/test_config_writer.py index 21a2f2bfd..849de6d74 100644 --- a/tests/test_config_writer.py +++ b/tests/test_config_writer.py @@ -75,6 +75,20 @@ def test_save_config_yaml_atomic_write( assert tmp_files == [config_path] +def test_save_config_yaml_atomic_preserves_permissions( + tmp_path: pathlib.Path, +) -> None: + """Test that save_config_yaml preserves original file permissions.""" + config_path = tmp_path / ".vcspull.yaml" + config_path.write_text("~/code/: {}\n", encoding="utf-8") + config_path.chmod(0o644) + + data = {"~/code/": {"myrepo": {"repo": "git+https://example.com/repo.git"}}} + save_config_yaml(config_path, data) + + assert config_path.stat().st_mode & 0o777 == 0o644 + + def test_save_config_yaml_atomic_preserves_existing_on_error( tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch, From dcfed9281bd237496dc590d9fdbc895ab467ea75 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 20:08:39 -0600 Subject: [PATCH 088/109] fix(cli/import[json]) Accept JSON config files in import command why: vcspull supports JSON configs project-wide but the import command hardcoded YAML-only suffix validation, rejecting .json files. what: - Accept .json in _resolve_config_file suffix check - Search for JSON in home config auto-discovery - Add save_config_json() and dispatch by suffix on write - Update test to use .toml as the rejected extension - Add json-extension-accepted test fixture --- src/vcspull/cli/import_cmd/_common.py | 14 +++++++++----- src/vcspull/config.py | 18 ++++++++++++++++++ tests/cli/test_import_repos.py | 16 +++++++++++----- 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/src/vcspull/cli/import_cmd/_common.py b/src/vcspull/cli/import_cmd/_common.py index 10fa086e6..69a79e523 100644 --- a/src/vcspull/cli/import_cmd/_common.py +++ b/src/vcspull/cli/import_cmd/_common.py @@ -26,6 +26,7 @@ ) from vcspull.config import ( find_home_config_files, + save_config_json, save_config_yaml, workspace_root_label, ) @@ -241,12 +242,12 @@ def _resolve_config_file(config_path_str: str | None) -> pathlib.Path: """ if config_path_str: path = pathlib.Path(config_path_str).expanduser().resolve() - if path.suffix.lower() not in {".yaml", ".yml"}: - msg = f"Only YAML config files are supported, got: {path.suffix}" + if path.suffix.lower() not in {".yaml", ".yml", ".json"}: + msg = f"Unsupported config file type: {path.suffix}" raise ValueError(msg) return path - home_configs = find_home_config_files(filetype=["yaml"]) + home_configs = find_home_config_files(filetype=["yaml", "json"]) if home_configs: return home_configs[0] @@ -532,7 +533,7 @@ def _run_import( if not isinstance(raw_config, dict): log.error( - "%s Config file is not a valid YAML mapping: %s", + "%s Config file is not a valid mapping: %s", colors.error("✗"), display_config_path, ) @@ -616,7 +617,10 @@ def _run_import( # Save config try: - save_config_yaml(config_file_path, raw_config) + if config_file_path.suffix.lower() == ".json": + save_config_json(config_file_path, raw_config) + else: + save_config_yaml(config_file_path, raw_config) log.info( "%s Added %s repositories to %s", colors.success("✓"), diff --git a/src/vcspull/config.py b/src/vcspull/config.py index 455bbf52f..8a94f32ec 100644 --- a/src/vcspull/config.py +++ b/src/vcspull/config.py @@ -515,6 +515,24 @@ def save_config_yaml(config_file_path: pathlib.Path, data: dict[t.Any, t.Any]) - _atomic_write(config_file_path, yaml_content) +def save_config_json(config_file_path: pathlib.Path, data: dict[t.Any, t.Any]) -> None: + """Save configuration data to a JSON file. + + Parameters + ---------- + config_file_path : pathlib.Path + Path to the configuration file to write + data : dict + Configuration data to save + """ + json_content = ConfigReader._dump( + fmt="json", + content=data, + indent=2, + ) + _atomic_write(config_file_path, json_content) + + def save_config_yaml_with_items( config_file_path: pathlib.Path, items: list[tuple[str, t.Any]], diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index 9525c689c..50fe6efcf 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -119,6 +119,12 @@ class ResolveConfigFixture(t.NamedTuple): home_configs=[], expected_suffix="config.yml", ), + ResolveConfigFixture( + test_id="json-extension-accepted", + config_path_str="/custom/config.json", + home_configs=[], + expected_suffix="config.json", + ), ] @@ -1010,12 +1016,12 @@ def test_import_flatten_groups_flag_via_cli() -> None: assert args.flatten_groups is True -def test_import_repos_rejects_non_yaml_config( +def test_import_repos_rejects_unsupported_config_type( tmp_path: pathlib.Path, monkeypatch: MonkeyPatch, caplog: pytest.LogCaptureFixture, ) -> None: - """Test _run_import rejects non-YAML config file paths.""" + """Test _run_import rejects unsupported config file types.""" caplog.set_level(logging.ERROR) monkeypatch.setenv("HOME", str(tmp_path)) @@ -1036,7 +1042,7 @@ def test_import_repos_rejects_non_yaml_config( include_archived=False, include_forks=False, limit=100, - config_path_str=str(tmp_path / "config.json"), + config_path_str=str(tmp_path / "config.toml"), dry_run=False, yes=True, output_json=False, @@ -1044,7 +1050,7 @@ def test_import_repos_rejects_non_yaml_config( color="never", ) - assert "Only YAML config files are supported" in caplog.text + assert "Unsupported config file type" in caplog.text def test_import_repos_catches_multiple_config_warning( @@ -1244,7 +1250,7 @@ def test_import_repos_rejects_non_dict_config( color="never", ) - assert "not a valid YAML mapping" in caplog.text + assert "not a valid mapping" in caplog.text class NestedGroupImportFixture(t.NamedTuple): From 160f8b8a9bce873349dfdf6a9a40ff3cbad7609f Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 20:10:22 -0600 Subject: [PATCH 089/109] fix(remotes/github[ghe]) Normalize GitHub Enterprise URL to /api/v3 why: GitHub Enterprise requires /api/v3 path prefix but the importer used the base URL as-is, unlike Gitea which correctly appends /api/v1. what: - Auto-append /api/v3 when base_url is provided and lacks /api/ path - Skip normalization for default api.github.com and pre-suffixed URLs - Add tests for GHE normalization, idempotency, and public URL --- src/vcspull/_internal/remotes/github.py | 8 +++++++- tests/_internal/remotes/test_github.py | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/vcspull/_internal/remotes/github.py b/src/vcspull/_internal/remotes/github.py index ed43addb9..8face72ac 100644 --- a/src/vcspull/_internal/remotes/github.py +++ b/src/vcspull/_internal/remotes/github.py @@ -71,8 +71,14 @@ def __init__( """ self._token = token or get_token_from_env("GITHUB_TOKEN", "GH_TOKEN") self._base_url = (base_url or GITHUB_API_URL).rstrip("/") + + # GitHub Enterprise needs /api/v3; public api.github.com does not + api_url = self._base_url + if base_url and "/api/" not in self._base_url: + api_url = f"{self._base_url}/api/v3" + self._client = HTTPClient( - self._base_url, + api_url, token=self._token, auth_header="Authorization", auth_prefix="Bearer", diff --git a/tests/_internal/remotes/test_github.py b/tests/_internal/remotes/test_github.py index 126d3aeb7..db76231ca 100644 --- a/tests/_internal/remotes/test_github.py +++ b/tests/_internal/remotes/test_github.py @@ -344,6 +344,24 @@ def test_github_importer_service_name() -> None: assert importer.service_name == "GitHub" +def test_github_enterprise_url_normalized() -> None: + """Test that GitHub Enterprise URLs get /api/v3 appended.""" + importer = GitHubImporter(token="fake", base_url="https://ghe.example.com") + assert importer._client.base_url == "https://ghe.example.com/api/v3" + + +def test_github_enterprise_url_already_has_api() -> None: + """Test that GHE URLs with /api/v3 are not double-suffixed.""" + importer = GitHubImporter(token="fake", base_url="https://ghe.example.com/api/v3") + assert importer._client.base_url == "https://ghe.example.com/api/v3" + + +def test_github_public_url_not_modified() -> None: + """Test that default api.github.com URL is not modified.""" + importer = GitHubImporter(token="fake") + assert importer._client.base_url == "https://api.github.com" + + def test_github_handles_null_topics( mock_urlopen: t.Callable[..., None], ) -> None: From c20a2819d7bb6b81eaa919ebfb2f1665ac15519a Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 14 Feb 2026 20:11:46 -0600 Subject: [PATCH 090/109] fix(cli/import[exit_code]) Return non-zero exit for non-mapping workspace why: When a workspace section in the config is not a dict, the import loop logged an error but returned exit 0 with a misleading success message ("All repositories already exist"). what: - Track workspace sections that fail validation in error_labels set - Return exit 1 before the "all exist" message when errors occurred - Add test asserting non-mapping workspace returns exit code 1 --- src/vcspull/cli/import_cmd/_common.py | 5 ++++ tests/cli/test_import_repos.py | 42 +++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/src/vcspull/cli/import_cmd/_common.py b/src/vcspull/cli/import_cmd/_common.py index 69a79e523..8b494685c 100644 --- a/src/vcspull/cli/import_cmd/_common.py +++ b/src/vcspull/cli/import_cmd/_common.py @@ -543,6 +543,7 @@ def _run_import( # Add repositories to config checked_labels: set[str] = set() + error_labels: set[str] = set() added_count = 0 skipped_count = 0 @@ -589,6 +590,7 @@ def _run_import( colors.error("✗"), repo_workspace_label, ) + error_labels.add(repo_workspace_label) checked_labels.add(repo_workspace_label) if repo_workspace_label in raw_config and not isinstance( @@ -608,6 +610,9 @@ def _run_import( } added_count += 1 + if error_labels: + return 1 + if added_count == 0: log.info( "%s All repositories already exist in config. Nothing to add.", diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index 50fe6efcf..1fea4147f 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -1253,6 +1253,48 @@ def test_import_repos_rejects_non_dict_config( assert "not a valid mapping" in caplog.text +def test_import_repos_non_mapping_workspace_returns_error( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test _run_import returns non-zero when a workspace section is not a mapping.""" + caplog.set_level(logging.ERROR) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + config_file = tmp_path / ".vcspull.yaml" + # Workspace section is a string, not a mapping + label = workspace_root_label(workspace, cwd=pathlib.Path.cwd(), home=tmp_path) + config_file.write_text(f"{label}: invalid_string\n", encoding="utf-8") + + importer = MockImporter(repos=[_make_repo("repo1")]) + + result = _run_import( + importer, + service_name="github", + target="testuser", + workspace=str(workspace), + mode="user", + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(config_file), + dry_run=False, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + assert result == 1 + assert "not a mapping in config" in caplog.text + + class NestedGroupImportFixture(t.NamedTuple): """Fixture for nested-group workspace persistence cases.""" From 97100b08c1f1c0b76c7bbedf9f7971c389d72d5f Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 06:04:44 -0600 Subject: [PATCH 091/109] fix(cli/import[exit_code]) Return non-zero exit for non-interactive abort why: When stdin is not a TTY and --yes is not provided, _run_import returned 0 (success) even though no import occurred. CI/automation scripts chaining on exit codes would incorrectly proceed. what: - Change return 0 to return 1 at the non-interactive abort path - Add return value assertion to test_import_repos_non_tty_aborts --- src/vcspull/cli/import_cmd/_common.py | 2 +- tests/cli/test_import_repos.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/vcspull/cli/import_cmd/_common.py b/src/vcspull/cli/import_cmd/_common.py index 8b494685c..9bb122c2b 100644 --- a/src/vcspull/cli/import_cmd/_common.py +++ b/src/vcspull/cli/import_cmd/_common.py @@ -507,7 +507,7 @@ def _run_import( "%s Non-interactive mode: use --yes to skip confirmation.", colors.error("✗"), ) - return 0 + return 1 try: confirm = input( f"\n{colors.info('Import')} {len(repos)} repositories to " diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index 1fea4147f..5220caaaf 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -498,7 +498,7 @@ def test_import_repos_non_tty_aborts( importer = MockImporter(repos=[_make_repo("repo1")]) - _run_import( + result = _run_import( importer, service_name="github", target="testuser", @@ -518,6 +518,7 @@ def test_import_repos_non_tty_aborts( color="never", ) + assert result == 1, "Non-interactive abort must return non-zero exit code" assert "Non-interactive mode" in caplog.text assert not config_file.exists() From dbe3627748de7fcd5a1608368ca9c11d9bbf8302 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 06:06:38 -0600 Subject: [PATCH 092/109] fix(remotes[null_owner]) Guard against null owner/namespace in API responses why: dict.get("key", {}) returns None when the key exists with JSON null value, causing AttributeError on subsequent .get() calls. APIs may return null for deleted accounts, system repos, or self-hosted edge cases. what: - Change data.get("namespace", {}) to data.get("namespace") or {} in gitlab.py - Change data.get("owner", {}) to (data.get("owner") or {}) in github.py - Change data.get("owner", {}) to data.get("owner") or {} in gitea.py - Add test_github_parse_repo_null_owner - Add test_gitlab_parse_repo_null_namespace - Add test_gitea_parse_repo_null_owner --- src/vcspull/_internal/remotes/gitea.py | 2 +- src/vcspull/_internal/remotes/github.py | 2 +- src/vcspull/_internal/remotes/gitlab.py | 2 +- tests/_internal/remotes/test_gitea.py | 32 ++++++++++++++++++++ tests/_internal/remotes/test_github.py | 40 +++++++++++++++++++++++++ tests/_internal/remotes/test_gitlab.py | 30 +++++++++++++++++++ 6 files changed, 105 insertions(+), 3 deletions(-) diff --git a/src/vcspull/_internal/remotes/gitea.py b/src/vcspull/_internal/remotes/gitea.py index 45b416911..67b1487cb 100644 --- a/src/vcspull/_internal/remotes/gitea.py +++ b/src/vcspull/_internal/remotes/gitea.py @@ -304,7 +304,7 @@ def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo: RemoteRepo Parsed repository information """ - owner_data = data.get("owner", {}) + owner_data = data.get("owner") or {} return RemoteRepo( name=data.get("name", ""), diff --git a/src/vcspull/_internal/remotes/github.py b/src/vcspull/_internal/remotes/github.py index 8face72ac..e7a788dc0 100644 --- a/src/vcspull/_internal/remotes/github.py +++ b/src/vcspull/_internal/remotes/github.py @@ -319,7 +319,7 @@ def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo: is_fork=data.get("fork", False), is_archived=data.get("archived", False), default_branch=data.get("default_branch", "main"), - owner=data.get("owner", {}).get("login", ""), + owner=(data.get("owner") or {}).get("login", ""), ) def _log_rate_limit(self, headers: dict[str, str]) -> None: diff --git a/src/vcspull/_internal/remotes/gitlab.py b/src/vcspull/_internal/remotes/gitlab.py index aea8c4403..799d23561 100644 --- a/src/vcspull/_internal/remotes/gitlab.py +++ b/src/vcspull/_internal/remotes/gitlab.py @@ -310,7 +310,7 @@ def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo: name = data.get("path", data.get("name", "")) # Prefer the full namespace path for subgroup-aware import behavior. - namespace = data.get("namespace", {}) + namespace = data.get("namespace") or {} owner = namespace.get("full_path") if not owner: path_with_namespace = data.get("path_with_namespace") diff --git a/tests/_internal/remotes/test_gitea.py b/tests/_internal/remotes/test_gitea.py index d3ba212de..8ad9b6f76 100644 --- a/tests/_internal/remotes/test_gitea.py +++ b/tests/_internal/remotes/test_gitea.py @@ -227,3 +227,35 @@ def test_gitea_filters_by_language( repos = list(importer.fetch_repos(options)) assert len(repos) == 1 assert repos[0].name == "rust-repo" + + +def test_gitea_parse_repo_null_owner( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test Gitea _parse_repo handles null owner without crashing. + + Gitea/Forgejo APIs may return ``"owner": null`` for system repositories. + The importer must not raise AttributeError when this happens. + """ + response_json = [ + { + "name": "sys-repo", + "clone_url": "https://codeberg.org/sys-repo.git", + "ssh_url": "git@codeberg.org:sys-repo.git", + "html_url": "https://codeberg.org/sys-repo", + "description": "System repo", + "language": "Go", + "topics": [], + "stars_count": 0, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": None, + } + ] + mock_urlopen([(json.dumps(response_json).encode(), {}, 200)]) + importer = GiteaImporter(base_url="https://codeberg.org") + options = ImportOptions(mode=ImportMode.USER, target="testuser") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].owner == "" diff --git a/tests/_internal/remotes/test_github.py b/tests/_internal/remotes/test_github.py index db76231ca..b5fa79647 100644 --- a/tests/_internal/remotes/test_github.py +++ b/tests/_internal/remotes/test_github.py @@ -549,3 +549,43 @@ def test_github_parse_repo_missing_keys( assert repos[0].clone_url == "" assert repos[0].html_url == "" assert repos[0].ssh_url == "" + + +def test_github_parse_repo_null_owner( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test GitHub _parse_repo handles null owner without crashing. + + JSON APIs may return ``"owner": null`` for deleted/suspended accounts. + The importer must not raise AttributeError when this happens. + """ + response_json = [ + { + "name": "repo", + "clone_url": "https://github.com/ghost/repo.git", + "ssh_url": "git@github.com:ghost/repo.git", + "html_url": "https://github.com/ghost/repo", + "description": "Orphaned repo", + "language": "Python", + "topics": [], + "stargazers_count": 1, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": None, + } + ] + mock_urlopen( + [ + ( + json.dumps(response_json).encode(), + {"x-ratelimit-remaining": "100", "x-ratelimit-limit": "60"}, + 200, + ) + ] + ) + importer = GitHubImporter() + options = ImportOptions(mode=ImportMode.USER, target="ghost") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].owner == "" diff --git a/tests/_internal/remotes/test_gitlab.py b/tests/_internal/remotes/test_gitlab.py index 6a5033d02..3a65e0bc0 100644 --- a/tests/_internal/remotes/test_gitlab.py +++ b/tests/_internal/remotes/test_gitlab.py @@ -565,3 +565,33 @@ def urlopen_capture( assert "archived=" not in captured_urls[0], ( f"Expected no 'archived' param in search URL, got: {captured_urls[0]}" ) + + +def test_gitlab_parse_repo_null_namespace( + mock_urlopen: t.Callable[..., None], +) -> None: + """Test GitLab _parse_repo handles null namespace without crashing. + + Self-hosted GitLab instances may return ``"namespace": null`` for + system-level projects. The importer must not raise AttributeError. + """ + response_json = [ + { + "path": "my-project", + "name": "my-project", + "http_url_to_repo": "https://gitlab.example.com/my-project.git", + "ssh_url_to_repo": "git@gitlab.example.com:my-project.git", + "web_url": "https://gitlab.example.com/my-project", + "description": "Orphaned project", + "star_count": 0, + "namespace": None, + "path_with_namespace": "my-project", + } + ] + mock_urlopen([(json.dumps(response_json).encode(), {}, 200)]) + importer = GitLabImporter() + options = ImportOptions(mode=ImportMode.USER, target="testuser") + repos = list(importer.fetch_repos(options)) + assert len(repos) == 1 + assert repos[0].name == "my-project" + assert repos[0].owner == "" From b1db6c0c8e25a2e290b391144b7dece37e15f182 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 06:07:20 -0600 Subject: [PATCH 093/109] fix(cli/import[codecommit]) Correct help text from prefix to substring filter why: Help said "prefix filter" but the implementation uses substring matching (the `in` operator), which matches anywhere in the name. what: - Change help text from "prefix" to "substring" at codecommit.py --- src/vcspull/cli/import_cmd/codecommit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vcspull/cli/import_cmd/codecommit.py b/src/vcspull/cli/import_cmd/codecommit.py index 1dd343079..2a5efdf9b 100644 --- a/src/vcspull/cli/import_cmd/codecommit.py +++ b/src/vcspull/cli/import_cmd/codecommit.py @@ -37,7 +37,7 @@ def create_codecommit_subparser( metavar="TARGET", nargs="?", default="", - help="Optional filter prefix for repository names", + help="Optional substring filter for repository names", ) parser.add_argument( "--region", From 44945724690a89e60b61894d17d369941e459117 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 06:08:34 -0600 Subject: [PATCH 094/109] fix(remotes[http_client]) Use urlsplit/urlunsplit for query param merging why: Naive f"{url}?{urlencode(params)}" would produce a malformed URL with double question marks if the endpoint already contained query parameters. what: - Replace string concatenation with urllib.parse.urlsplit/urlunsplit to properly merge existing and new query parameters - Add test_http_client_get_merges_query_params to verify correct behavior --- src/vcspull/_internal/remotes/base.py | 10 +++++- tests/_internal/remotes/test_base.py | 47 +++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/src/vcspull/_internal/remotes/base.py b/src/vcspull/_internal/remotes/base.py index cb888c2c3..b15d8f50a 100644 --- a/src/vcspull/_internal/remotes/base.py +++ b/src/vcspull/_internal/remotes/base.py @@ -371,7 +371,15 @@ def get( url = f"{self.base_url}{endpoint}" if params: - url = f"{url}?{urllib.parse.urlencode(params)}" + parts = urllib.parse.urlsplit(url) + existing_qs = urllib.parse.parse_qs(parts.query) + existing_qs.update({k: [str(v)] for k, v in params.items()}) + new_query = urllib.parse.urlencode( + {k: v[0] for k, v in existing_qs.items()}, + ) + url = urllib.parse.urlunsplit( + (parts.scheme, parts.netloc, parts.path, new_query, parts.fragment), + ) headers = self._build_headers() request = urllib.request.Request(url, headers=headers) diff --git a/tests/_internal/remotes/test_base.py b/tests/_internal/remotes/test_base.py index 4ff8e8919..02b6c5245 100644 --- a/tests/_internal/remotes/test_base.py +++ b/tests/_internal/remotes/test_base.py @@ -529,3 +529,50 @@ def test_handle_http_error( client._handle_http_error(exc, "TestService") assert expected_message_contains.lower() in str(exc_info.value).lower() + + +def test_http_client_get_merges_query_params( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test HTTPClient.get properly merges params into URLs with existing query strings. + + Naive f"{url}?{params}" would produce a double-? URL when the endpoint + already contains query parameters. The implementation should use + urllib.parse to merge them correctly. + """ + import io + import json + import urllib.request + + from vcspull._internal.remotes.base import HTTPClient + + captured_urls: list[str] = [] + + def mock_urlopen( + request: urllib.request.Request, + **kwargs: t.Any, + ) -> io.BytesIO: + captured_urls.append(request.full_url) + resp = io.BytesIO(json.dumps({"ok": True}).encode()) + resp.getheaders = lambda: [] # type: ignore[attr-defined] + resp.read = lambda: json.dumps({"ok": True}).encode() # type: ignore[assignment] + return resp + + # Mock urlopen: capture the request URL to verify query param merging + monkeypatch.setattr("urllib.request.urlopen", mock_urlopen) + + client = HTTPClient("https://api.example.com") + + # Endpoint already has a query string; additional params should merge + client.get( + "/search?q=test", + params={"page": 1, "per_page": 10}, + service_name="TestService", + ) + + assert len(captured_urls) == 1 + url = captured_urls[0] + assert "??" not in url, f"Double question mark in URL: {url}" + assert "q=test" in url + assert "page=1" in url + assert "per_page=10" in url From 4c81a326f00951d811fa940c10cc6d334bc6e736 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 06:09:39 -0600 Subject: [PATCH 095/109] fix(remotes[http_client]) Warn when auth token sent over non-HTTPS why: Authorization tokens sent via HTTP are visible to network observers. Users who provide http:// URLs with --url should be warned about the security risk. what: - Add warning log in HTTPClient.__init__ when token + non-HTTPS base URL - Add test_http_client_warns_on_non_https_with_token - Add test_http_client_no_warning_on_https_with_token --- src/vcspull/_internal/remotes/base.py | 6 ++++++ tests/_internal/remotes/test_base.py | 31 +++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/src/vcspull/_internal/remotes/base.py b/src/vcspull/_internal/remotes/base.py index b15d8f50a..b750444e6 100644 --- a/src/vcspull/_internal/remotes/base.py +++ b/src/vcspull/_internal/remotes/base.py @@ -299,6 +299,12 @@ def __init__( """ self.base_url = base_url.rstrip("/") self.token = token + if token and not self.base_url.startswith("https://"): + log.warning( + "Authentication token will be sent over non-HTTPS connection " + "to %s — consider using HTTPS to protect credentials", + self.base_url, + ) self.auth_header = auth_header self.auth_prefix = auth_prefix self.user_agent = user_agent diff --git a/tests/_internal/remotes/test_base.py b/tests/_internal/remotes/test_base.py index 02b6c5245..9bb3a665f 100644 --- a/tests/_internal/remotes/test_base.py +++ b/tests/_internal/remotes/test_base.py @@ -576,3 +576,34 @@ def mock_urlopen( assert "q=test" in url assert "page=1" in url assert "per_page=10" in url + + +def test_http_client_warns_on_non_https_with_token( + caplog: pytest.LogCaptureFixture, +) -> None: + """Test HTTPClient logs a warning when token is sent over non-HTTPS.""" + import logging + + from vcspull._internal.remotes.base import HTTPClient + + caplog.set_level(logging.WARNING) + + HTTPClient("http://insecure.example.com", token="secret-token") + + assert "non-HTTPS" in caplog.text + assert "insecure.example.com" in caplog.text + + +def test_http_client_no_warning_on_https_with_token( + caplog: pytest.LogCaptureFixture, +) -> None: + """Test HTTPClient does not warn when token is sent over HTTPS.""" + import logging + + from vcspull._internal.remotes.base import HTTPClient + + caplog.set_level(logging.WARNING) + + HTTPClient("https://secure.example.com", token="secret-token") + + assert "non-HTTPS" not in caplog.text From 6091c5126af758fc409c79a82ac98e0cf3e458f5 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 06:10:58 -0600 Subject: [PATCH 096/109] test(config[json]) Add tests for save_config_json and JSON import path why: save_config_json had zero test coverage, and no integration test exercised the JSON config write path through _run_import. what: - Add test_save_config_json_write_and_readback - Add test_save_config_json_atomic_write - Add test_save_config_json_atomic_preserves_permissions - Add test_import_repos_json_config_write integration test --- tests/cli/test_import_repos.py | 43 ++++++++++++++++++++++++++++ tests/test_config_writer.py | 51 +++++++++++++++++++++++++++++++++- 2 files changed, 93 insertions(+), 1 deletion(-) diff --git a/tests/cli/test_import_repos.py b/tests/cli/test_import_repos.py index 5220caaaf..553d5a667 100644 --- a/tests/cli/test_import_repos.py +++ b/tests/cli/test_import_repos.py @@ -1214,6 +1214,49 @@ def test_import_repos_returns_zero_on_success( assert result == 0 +def test_import_repos_json_config_write( + tmp_path: pathlib.Path, + monkeypatch: MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test _run_import writes valid JSON when config path has .json extension.""" + caplog.set_level(logging.INFO) + + monkeypatch.setenv("HOME", str(tmp_path)) + workspace = tmp_path / "repos" + workspace.mkdir() + config_file = tmp_path / ".vcspull.json" + + importer = MockImporter(repos=[_make_repo("repo1")]) + + result = _run_import( + importer, + service_name="github", + target="testuser", + workspace=str(workspace), + mode="user", + language=None, + topics=None, + min_stars=0, + include_archived=False, + include_forks=False, + limit=100, + config_path_str=str(config_file), + dry_run=False, + yes=True, + output_json=False, + output_ndjson=False, + color="never", + ) + + assert result == 0 + assert config_file.exists() + loaded = json.loads(config_file.read_text(encoding="utf-8")) + assert isinstance(loaded, dict) + total_repos = sum(len(repos) for repos in loaded.values()) + assert total_repos == 1 + + def test_import_repos_rejects_non_dict_config( tmp_path: pathlib.Path, monkeypatch: MonkeyPatch, diff --git a/tests/test_config_writer.py b/tests/test_config_writer.py index 849de6d74..fbc7c7fb5 100644 --- a/tests/test_config_writer.py +++ b/tests/test_config_writer.py @@ -7,7 +7,11 @@ import pytest -from vcspull.config import save_config_yaml, save_config_yaml_with_items +from vcspull.config import ( + save_config_json, + save_config_yaml, + save_config_yaml_with_items, +) if t.TYPE_CHECKING: import pathlib @@ -122,3 +126,48 @@ def failing_replace(self: _pathlib.Path, target: t.Any) -> _pathlib.Path: f for f in tmp_path.iterdir() if f.name.startswith(".") and f != config_path ] assert tmp_files == [] + + +def test_save_config_json_write_and_readback( + tmp_path: pathlib.Path, +) -> None: + """Test that save_config_json writes valid JSON that round-trips.""" + import json + + config_path = tmp_path / ".vcspull.json" + data = {"~/code/": {"myrepo": {"repo": "git+https://example.com/repo.git"}}} + + save_config_json(config_path, data) + + assert config_path.exists() + loaded = json.loads(config_path.read_text(encoding="utf-8")) + assert loaded == data + + +def test_save_config_json_atomic_write( + tmp_path: pathlib.Path, +) -> None: + """Test that save_config_json uses atomic write (no temp files left).""" + config_path = tmp_path / ".vcspull.json" + data = {"~/code/": {"myrepo": {"repo": "git+https://example.com/repo.git"}}} + + save_config_json(config_path, data) + + assert config_path.exists() + # No stray temp files should be left in the directory + tmp_files = [f for f in tmp_path.iterdir() if f.name.startswith(".")] + assert tmp_files == [config_path] + + +def test_save_config_json_atomic_preserves_permissions( + tmp_path: pathlib.Path, +) -> None: + """Test that save_config_json preserves original file permissions.""" + config_path = tmp_path / ".vcspull.json" + config_path.write_text("{}", encoding="utf-8") + config_path.chmod(0o644) + + data = {"~/code/": {"myrepo": {"repo": "git+https://example.com/repo.git"}}} + save_config_json(config_path, data) + + assert config_path.stat().st_mode & 0o777 == 0o644 From d6208b0e45d1e69a6f54721e181cc000ebe1c7f9 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 06:12:46 -0600 Subject: [PATCH 097/109] fix(remotes/github[search]) Cap search pagination at 1000 results why: GitHub search API returns HTTP 422 when requesting results beyond offset 1000. Without a guard, the pagination loop would crash after partial progress when --limit exceeds 1000. what: - Add SEARCH_MAX_RESULTS = 1000 constant - Break pagination when page * DEFAULT_PER_PAGE >= SEARCH_MAX_RESULTS - Add test_github_search_caps_at_1000_results --- src/vcspull/_internal/remotes/github.py | 6 +++ tests/_internal/remotes/test_github.py | 64 +++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/src/vcspull/_internal/remotes/github.py b/src/vcspull/_internal/remotes/github.py index e7a788dc0..74f529d06 100644 --- a/src/vcspull/_internal/remotes/github.py +++ b/src/vcspull/_internal/remotes/github.py @@ -19,6 +19,8 @@ GITHUB_API_URL = "https://api.github.com" DEFAULT_PER_PAGE = 100 +# GitHub search API limits results to 1000; exceeding this causes HTTP 422. +SEARCH_MAX_RESULTS = 1000 class GitHubImporter: @@ -234,6 +236,10 @@ def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]: if len(items) < DEFAULT_PER_PAGE: break + # GitHub search API caps at 1000 results + if page * DEFAULT_PER_PAGE >= SEARCH_MAX_RESULTS: + break + page += 1 def _paginate_repos( diff --git a/tests/_internal/remotes/test_github.py b/tests/_internal/remotes/test_github.py index b5fa79647..3bb022b8a 100644 --- a/tests/_internal/remotes/test_github.py +++ b/tests/_internal/remotes/test_github.py @@ -589,3 +589,67 @@ def test_github_parse_repo_null_owner( repos = list(importer.fetch_repos(options)) assert len(repos) == 1 assert repos[0].owner == "" + + +def test_github_search_caps_at_1000_results( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test GitHub search stops paginating at 1000 results. + + GitHub's search API returns HTTP 422 beyond offset 1000. + The importer must stop before requesting page 11 (with per_page=100). + """ + from tests._internal.remotes.conftest import MockHTTPResponse + + call_count = 0 + + def make_search_page() -> dict[str, t.Any]: + """Create a full page of 100 search results.""" + return { + "total_count": 5000, + "items": [ + { + "name": f"repo-{i}", + "clone_url": f"https://github.com/user/repo-{i}.git", + "ssh_url": f"git@github.com:user/repo-{i}.git", + "html_url": f"https://github.com/user/repo-{i}", + "description": f"Repo {i}", + "language": "Python", + "topics": [], + "stargazers_count": 100, + "fork": False, + "archived": False, + "default_branch": "main", + "owner": {"login": "user"}, + } + for i in range(100) + ], + } + + def urlopen_side_effect( + request: t.Any, + timeout: int | None = None, + ) -> MockHTTPResponse: + nonlocal call_count + call_count += 1 + page_data = make_search_page() + return MockHTTPResponse( + json.dumps(page_data).encode(), + {"x-ratelimit-remaining": "100", "x-ratelimit-limit": "60"}, + 200, + ) + + # Mock urlopen: track how many API requests are made + monkeypatch.setattr("urllib.request.urlopen", urlopen_side_effect) + + importer = GitHubImporter() + options = ImportOptions( + mode=ImportMode.SEARCH, + target="test", + limit=5000, + ) + repos = list(importer.fetch_repos(options)) + + # Should have fetched at most 10 pages (1000 results) + assert call_count <= 10, f"Expected at most 10 API calls, got {call_count}" + assert len(repos) <= 1000 From b10c8fd3e9b07cd9fe7a3f9803b69c167fe9db41 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 06:14:31 -0600 Subject: [PATCH 098/109] fix(remotes/codecommit[timeout]) Add subprocess timeout to AWS CLI calls why: subprocess.run without timeout blocks indefinitely if the AWS CLI hangs due to network issues or broken credential providers. HTTP-based importers already have a 30-second timeout via HTTPClient. what: - Add timeout=60 to subprocess.run in _run_aws_command - Catch subprocess.TimeoutExpired and raise ServiceUnavailableError - Add ServiceUnavailableError to imports - Add test_codecommit_timeout_raises_service_unavailable --- src/vcspull/_internal/remotes/codecommit.py | 5 ++++ tests/_internal/remotes/test_codecommit.py | 30 +++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/src/vcspull/_internal/remotes/codecommit.py b/src/vcspull/_internal/remotes/codecommit.py index 5814cf6bc..455e4e5b3 100644 --- a/src/vcspull/_internal/remotes/codecommit.py +++ b/src/vcspull/_internal/remotes/codecommit.py @@ -13,6 +13,7 @@ DependencyError, ImportOptions, RemoteRepo, + ServiceUnavailableError, filter_repo, ) @@ -136,10 +137,14 @@ def _run_aws_command(self, *args: str) -> dict[str, t.Any]: capture_output=True, text=True, check=False, + timeout=60, ) except FileNotFoundError as exc: msg = "AWS CLI not found" raise DependencyError(msg, service=self.service_name) from exc + except subprocess.TimeoutExpired as exc: + msg = "AWS CLI command timed out" + raise ServiceUnavailableError(msg, service=self.service_name) from exc if result.returncode != 0: stderr = result.stderr.lower() diff --git a/tests/_internal/remotes/test_codecommit.py b/tests/_internal/remotes/test_codecommit.py index cc0cb7817..28624fcda 100644 --- a/tests/_internal/remotes/test_codecommit.py +++ b/tests/_internal/remotes/test_codecommit.py @@ -647,3 +647,33 @@ def mock_run(cmd: list[str], **kwargs: t.Any) -> subprocess.CompletedProcess[str importer = CodeCommitImporter() assert importer.is_authenticated is False + + +def test_codecommit_timeout_raises_service_unavailable( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test _run_aws_command raises ServiceUnavailableError on timeout. + + If the AWS CLI hangs (broken credential provider, network issue), + subprocess.run should time out and the error should propagate as + ServiceUnavailableError rather than blocking indefinitely. + """ + from vcspull._internal.remotes.base import ServiceUnavailableError + + call_count = 0 + + def mock_run(*args: t.Any, **kwargs: t.Any) -> subprocess.CompletedProcess[str]: + nonlocal call_count + call_count += 1 + if call_count == 1: + # _check_aws_cli: aws --version succeeds + return _aws_ok("aws-cli/2.x") + # Mock subprocess.run: second call (actual command) raises + # TimeoutExpired to simulate a hung AWS CLI process + raise subprocess.TimeoutExpired(cmd="aws", timeout=60) + + monkeypatch.setattr("subprocess.run", mock_run) + importer = CodeCommitImporter() + + with pytest.raises(ServiceUnavailableError, match="timed out"): + importer._run_aws_command("codecommit", "list-repositories") From d395ddf55ea6132a446f19b65eda2e5d698bcf7d Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 06:15:31 -0600 Subject: [PATCH 099/109] refactor(cli/import[logging]) Remove unused logger from 5 CLI handler files why: Each file defined log = logging.getLogger(__name__) but never used it. The logging import and log variable are dead code. what: - Remove import logging and log variable from github.py, gitlab.py, codeberg.py, forgejo.py, and gitea.py CLI handlers --- src/vcspull/cli/import_cmd/codeberg.py | 3 --- src/vcspull/cli/import_cmd/forgejo.py | 3 --- src/vcspull/cli/import_cmd/gitea.py | 3 --- src/vcspull/cli/import_cmd/github.py | 3 --- src/vcspull/cli/import_cmd/gitlab.py | 3 --- 5 files changed, 15 deletions(-) diff --git a/src/vcspull/cli/import_cmd/codeberg.py b/src/vcspull/cli/import_cmd/codeberg.py index 542ce0dfc..82f12e99b 100644 --- a/src/vcspull/cli/import_cmd/codeberg.py +++ b/src/vcspull/cli/import_cmd/codeberg.py @@ -3,7 +3,6 @@ from __future__ import annotations import argparse -import logging from vcspull._internal.remotes import GiteaImporter @@ -16,8 +15,6 @@ _run_import, ) -log = logging.getLogger(__name__) - def create_codeberg_subparser( subparsers: argparse._SubParsersAction[argparse.ArgumentParser], diff --git a/src/vcspull/cli/import_cmd/forgejo.py b/src/vcspull/cli/import_cmd/forgejo.py index 580ce31d5..b0d3f23b1 100644 --- a/src/vcspull/cli/import_cmd/forgejo.py +++ b/src/vcspull/cli/import_cmd/forgejo.py @@ -3,7 +3,6 @@ from __future__ import annotations import argparse -import logging from vcspull._internal.remotes import GiteaImporter @@ -16,8 +15,6 @@ _run_import, ) -log = logging.getLogger(__name__) - def create_forgejo_subparser( subparsers: argparse._SubParsersAction[argparse.ArgumentParser], diff --git a/src/vcspull/cli/import_cmd/gitea.py b/src/vcspull/cli/import_cmd/gitea.py index 6bf2039a8..492a471db 100644 --- a/src/vcspull/cli/import_cmd/gitea.py +++ b/src/vcspull/cli/import_cmd/gitea.py @@ -3,7 +3,6 @@ from __future__ import annotations import argparse -import logging from vcspull._internal.remotes import GiteaImporter @@ -16,8 +15,6 @@ _run_import, ) -log = logging.getLogger(__name__) - def create_gitea_subparser( subparsers: argparse._SubParsersAction[argparse.ArgumentParser], diff --git a/src/vcspull/cli/import_cmd/github.py b/src/vcspull/cli/import_cmd/github.py index f90dc4cc6..bbf87e61b 100644 --- a/src/vcspull/cli/import_cmd/github.py +++ b/src/vcspull/cli/import_cmd/github.py @@ -3,7 +3,6 @@ from __future__ import annotations import argparse -import logging from vcspull._internal.remotes import GitHubImporter @@ -16,8 +15,6 @@ _run_import, ) -log = logging.getLogger(__name__) - def create_github_subparser( subparsers: argparse._SubParsersAction[argparse.ArgumentParser], diff --git a/src/vcspull/cli/import_cmd/gitlab.py b/src/vcspull/cli/import_cmd/gitlab.py index fce1c610f..086d040d7 100644 --- a/src/vcspull/cli/import_cmd/gitlab.py +++ b/src/vcspull/cli/import_cmd/gitlab.py @@ -3,7 +3,6 @@ from __future__ import annotations import argparse -import logging from vcspull._internal.remotes import GitLabImporter @@ -16,8 +15,6 @@ _run_import, ) -log = logging.getLogger(__name__) - def create_gitlab_subparser( subparsers: argparse._SubParsersAction[argparse.ArgumentParser], From 5086392e6bd363841f70c3c2a859a19dbfe797a0 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 06:16:25 -0600 Subject: [PATCH 100/109] fix(cli/import[config_load]) Use json.loads for JSON config files why: yaml.safe_load was used for all config files regardless of extension. While YAML is a superset of JSON, dispatching on file extension is semantically correct and produces more specific error messages for JSON parse failures. what: - Dispatch on config file suffix: json.loads for .json, yaml.safe_load for .yaml/.yml - Use broad except to catch both json.JSONDecodeError and yaml.YAMLError --- src/vcspull/cli/import_cmd/_common.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/vcspull/cli/import_cmd/_common.py b/src/vcspull/cli/import_cmd/_common.py index 9bb122c2b..73cb6352d 100644 --- a/src/vcspull/cli/import_cmd/_common.py +++ b/src/vcspull/cli/import_cmd/_common.py @@ -522,12 +522,22 @@ def _run_import( # Load existing config or create new raw_config: dict[str, t.Any] if config_file_path.exists(): - import yaml - try: - with config_file_path.open() as f: - raw_config = yaml.safe_load(f) or {} - except (yaml.YAMLError, OSError): + if config_file_path.suffix.lower() == ".json": + import json as _json + + raw_config = ( + _json.loads( + config_file_path.read_text(encoding="utf-8"), + ) + or {} + ) + else: + import yaml + + with config_file_path.open() as f: + raw_config = yaml.safe_load(f) or {} + except Exception: log.exception("Error loading config file") return 1 From b0e2e3a91c7499232bab776609e048d55506db3f Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 06:17:23 -0600 Subject: [PATCH 101/109] fix(test[mypy]) Use MockHTTPResponse in URL merge test why: The lambda-based mock caused a mypy type inference error. what: - Replace inline io.BytesIO mock with shared MockHTTPResponse fixture --- tests/_internal/remotes/test_base.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/_internal/remotes/test_base.py b/tests/_internal/remotes/test_base.py index 9bb3a665f..bb6dc25b0 100644 --- a/tests/_internal/remotes/test_base.py +++ b/tests/_internal/remotes/test_base.py @@ -540,10 +540,10 @@ def test_http_client_get_merges_query_params( already contains query parameters. The implementation should use urllib.parse to merge them correctly. """ - import io import json import urllib.request + from tests._internal.remotes.conftest import MockHTTPResponse from vcspull._internal.remotes.base import HTTPClient captured_urls: list[str] = [] @@ -551,12 +551,9 @@ def test_http_client_get_merges_query_params( def mock_urlopen( request: urllib.request.Request, **kwargs: t.Any, - ) -> io.BytesIO: + ) -> MockHTTPResponse: captured_urls.append(request.full_url) - resp = io.BytesIO(json.dumps({"ok": True}).encode()) - resp.getheaders = lambda: [] # type: ignore[attr-defined] - resp.read = lambda: json.dumps({"ok": True}).encode() # type: ignore[assignment] - return resp + return MockHTTPResponse(json.dumps({"ok": True}).encode(), {}, 200) # Mock urlopen: capture the request URL to verify query param merging monkeypatch.setattr("urllib.request.urlopen", mock_urlopen) From b49033e14f7f7203d89d78c5cf4e06dc5b15f821 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 06:32:14 -0600 Subject: [PATCH 102/109] refactor(cli/import[config_load]) Use ConfigReader._from_file() for config loading why: The inline JSON/YAML dispatch duplicated what ConfigReader._from_file() already provides, creating an asymmetry with the save path that already uses ConfigReader._dump() via save_config_yaml/save_config_json. what: - Replace 12-line inline JSON/YAML dispatch block with ConfigReader._from_file() - Remove lazy imports of json and yaml that were only needed for inline dispatch --- src/vcspull/cli/import_cmd/_common.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/src/vcspull/cli/import_cmd/_common.py b/src/vcspull/cli/import_cmd/_common.py index 73cb6352d..1ae4820e7 100644 --- a/src/vcspull/cli/import_cmd/_common.py +++ b/src/vcspull/cli/import_cmd/_common.py @@ -12,6 +12,7 @@ import sys import typing as t +from vcspull._internal.config_reader import ConfigReader from vcspull._internal.private_path import PrivatePath from vcspull._internal.remotes import ( AuthenticationError, @@ -523,20 +524,7 @@ def _run_import( raw_config: dict[str, t.Any] if config_file_path.exists(): try: - if config_file_path.suffix.lower() == ".json": - import json as _json - - raw_config = ( - _json.loads( - config_file_path.read_text(encoding="utf-8"), - ) - or {} - ) - else: - import yaml - - with config_file_path.open() as f: - raw_config = yaml.safe_load(f) or {} + raw_config = ConfigReader._from_file(config_file_path) or {} except Exception: log.exception("Error loading config file") return 1 From d6f87edc9f5424f1b7c10d2c3d3a895629116fef Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 06:43:26 -0600 Subject: [PATCH 103/109] docs(cli/import[auth]) Split multi-command code blocks in service pages why: Project style guide requires one command per code block for copyability. what: - Split combined auth+import code blocks into separate blocks in 6 files - Add explanatory text between the blocks (github, gitlab, codeberg, gitea, forgejo, codecommit) --- docs/cli/import/codeberg.md | 7 +++++++ docs/cli/import/codecommit.md | 7 +++++++ docs/cli/import/forgejo.md | 7 +++++++ docs/cli/import/gitea.md | 7 +++++++ docs/cli/import/github.md | 7 +++++++ docs/cli/import/gitlab.md | 7 +++++++ 6 files changed, 42 insertions(+) diff --git a/docs/cli/import/codeberg.md b/docs/cli/import/codeberg.md index 78b3737e7..fe42e24cf 100644 --- a/docs/cli/import/codeberg.md +++ b/docs/cli/import/codeberg.md @@ -21,7 +21,14 @@ Import repositories from Codeberg. - **Scope**: no scopes needed for public repos; token required for private repos - **Create at**: +Set the token: + ```console $ export CODEBERG_TOKEN=... +``` + +Then import: + +```console $ vcspull import codeberg myuser -w ~/code/ ``` diff --git a/docs/cli/import/codecommit.md b/docs/cli/import/codecommit.md index 7f6d91b16..4887cd8b3 100644 --- a/docs/cli/import/codecommit.md +++ b/docs/cli/import/codecommit.md @@ -32,7 +32,14 @@ $ vcspull import codecommit -w ~/code/ --region us-east-1 --profile work - `codecommit:BatchGetRepositories` (resource: repo ARNs or `*`) - **Dependency**: AWS CLI must be installed (`pip install awscli`) +Configure your AWS credentials: + ```console $ aws configure +``` + +Then import: + +```console $ vcspull import codecommit -w ~/code/ --region us-east-1 ``` diff --git a/docs/cli/import/forgejo.md b/docs/cli/import/forgejo.md index 4480f48fe..1d01a0052 100644 --- a/docs/cli/import/forgejo.md +++ b/docs/cli/import/forgejo.md @@ -22,7 +22,14 @@ Import repositories from a self-hosted Forgejo instance. - **Scope**: `read:repository` - **Create at**: `https:///user/settings/applications` +Set the token: + ```console $ export FORGEJO_TOKEN=... +``` + +Then import: + +```console $ vcspull import forgejo myuser -w ~/code/ --url https://forgejo.example.com ``` diff --git a/docs/cli/import/gitea.md b/docs/cli/import/gitea.md index bceb9a644..05f94f999 100644 --- a/docs/cli/import/gitea.md +++ b/docs/cli/import/gitea.md @@ -21,7 +21,14 @@ Import repositories from a self-hosted Gitea instance. - **Scope**: `read:repository` (minimum for listing repos) - **Create at**: `https:///user/settings/applications` +Set the token: + ```console $ export GITEA_TOKEN=... +``` + +Then import: + +```console $ vcspull import gitea myuser -w ~/code/ --url https://git.example.com ``` diff --git a/docs/cli/import/github.md b/docs/cli/import/github.md index c476fd7e9..ad45230e6 100644 --- a/docs/cli/import/github.md +++ b/docs/cli/import/github.md @@ -25,7 +25,14 @@ Import repositories from GitHub or GitHub Enterprise. Read-only" for private - **Create at**: +Set the token: + ```console $ export GITHUB_TOKEN=ghp_... +``` + +Then import: + +```console $ vcspull import gh myuser -w ~/code/ ``` diff --git a/docs/cli/import/gitlab.md b/docs/cli/import/gitlab.md index 776bcba0f..a37c8554e 100644 --- a/docs/cli/import/gitlab.md +++ b/docs/cli/import/gitlab.md @@ -33,7 +33,14 @@ $ vcspull import gl my-group --mode org -w ~/code/ --flatten-groups - **Create at**: (self-hosted: `https:///-/user_settings/personal_access_tokens`) +Set the token: + ```console $ export GITLAB_TOKEN=glpat-... +``` + +Then import: + +```console $ vcspull import gl myuser -w ~/code/ ``` From 741cac674909525b5740bc5076826bb1ba33cce9 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 06:43:29 -0600 Subject: [PATCH 104/109] docs(CHANGES[import]) Add labels between SSH and HTTPS examples why: Consecutive code blocks without explanatory text leave the reader guessing. what: - Add "SSH (default):" label before the first block - Add "Use --https for HTTPS clone URLs:" before the second block --- CHANGES | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES b/CHANGES index 96ebd227a..81b2feea5 100644 --- a/CHANGES +++ b/CHANGES @@ -103,12 +103,16 @@ $ vcspull import codeberg user \ Clone URLs default to SSH. Use `--https` to get HTTPS URLs instead: +SSH (default): + ```console $ vcspull import github torvalds \ -w ~/repos/linux \ --mode user ``` +Use `--https` for HTTPS clone URLs: + ```console $ vcspull import github torvalds \ -w ~/repos/linux \ From a7865119f39119ef6ae851ab4656385b93535020 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 06:43:34 -0600 Subject: [PATCH 105/109] docs(README[import]) Add vcspull import section and mention why: README omitted vcspull import despite it being a major v1.55 feature. what: - Add vcspull import to the config-creation sentence at line 71 - Add "Import from remote services" subsection with example commands --- README.md | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index cb194e5df..a536a3df6 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ You can test the unpublished version of vcspull before its released. ## Configuration Add your repos to `~/.vcspull.yaml`. You can edit the file by hand or let -`vcspull add` or `vcspull discover` create entries for you. +`vcspull add`, `vcspull discover`, or `vcspull import` create entries for you. ```yaml ~/code/: @@ -123,6 +123,24 @@ The scan shows each repository before import unless you opt into `--yes`. Add than the default `~/.vcspull.yaml`. Duplicate workspace roots are merged by default; include `--no-merge` to keep them separate while you review the log. +### Import from remote services + +Pull repository lists from GitHub, GitLab, Codeberg, Gitea, Forgejo, or AWS +CodeCommit directly into your configuration: + +```console +$ vcspull import github myuser -w ~/code/ --mode user +``` + +```console +$ vcspull import gitlab my-group -w ~/work/ --mode org +``` + +Use `--dry-run` to preview changes, `--https` for HTTPS clone URLs, and +`--language`/`--topics`/`--min-stars` to filter results. See the +[import documentation](https://vcspull.git-pull.com/cli/import/) for all +supported services and options. + ### Inspect configured repositories List what vcspull already knows about without mutating anything: From 76944a1706c657a85fa2eab3bb731f4c816f6de3 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 06:51:35 -0600 Subject: [PATCH 106/109] docs(CLAUDE[code-blocks]) Prefer longform flags and split multi-flag commands why: Shortform flags are cryptic in user-facing docs; multi-flag one-liners are hard to scan and copy-paste. what: - Add "Prefer longform flags" rule to Documentation Standards - Add "Split multi-flag commands" rule with \-continuation style - Include Good/Bad examples showing both rules together --- AGENTS.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index 89b1c38d0..d5776277b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -332,6 +332,24 @@ $ vcspull search django $ vcspull search "name:flask" ``` +**Prefer longform flags** — use `--workspace` not `-w`, `--file` not `-f`. + +**Split multi-flag commands** — when a command has 2+ flags/options, place each on its own `\`-continuation line, indented by 4 spaces. + +Good: + +```console +$ vcspull import gh my-org \ + --mode org \ + --workspace ~/code/ +``` + +Bad: + +```console +$ vcspull import gh my-org --mode org -w ~/code/ +``` + ## Debugging Tips When stuck in debugging loops: From 596be9f7e876752789a5dc569c9248a80b414580 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 06:55:53 -0600 Subject: [PATCH 107/109] docs(cli[flags]) Use longform flags and split multi-flag commands why: Shortform flags (-w, -f, -S, -v) are cryptic in user-facing docs; multi-flag one-liners are hard to scan and copy-paste. what: - Replace -w with --workspace in all doc code blocks - Replace -f with --file in all doc code blocks - Replace -S with --smart-case and -v with --invert-match in search docs - Split multi-flag commands onto \-continuation lines - Update prose references to prefer longform names - Remove redundant "Short form" examples from fmt.md --- CHANGES | 20 +++++----- README.md | 16 +++++--- docs/cli/add.md | 5 ++- docs/cli/discover.md | 10 +++-- docs/cli/fmt.md | 18 ++------- docs/cli/import/codeberg.md | 2 +- docs/cli/import/codecommit.md | 9 ++++- docs/cli/import/forgejo.md | 4 +- docs/cli/import/gitea.md | 4 +- docs/cli/import/github.md | 2 +- docs/cli/import/gitlab.md | 7 +++- docs/cli/import/index.md | 69 ++++++++++++++++++++++++++--------- docs/cli/list.md | 4 +- docs/cli/search.md | 4 +- docs/cli/status.md | 4 +- docs/cli/sync.md | 4 +- docs/quickstart.md | 2 +- 17 files changed, 114 insertions(+), 70 deletions(-) diff --git a/CHANGES b/CHANGES index 81b2feea5..1aa6f551e 100644 --- a/CHANGES +++ b/CHANGES @@ -44,7 +44,7 @@ Import a user's repositories: ```console $ vcspull import github torvalds \ - -w ~/repos/linux \ + --workspace ~/repos/linux \ --mode user ``` @@ -52,7 +52,7 @@ Import an organization's repositories: ```console $ vcspull import github django \ - -w ~/study/python \ + --workspace ~/study/python \ --mode org ``` @@ -60,7 +60,7 @@ Search and import repositories: ```console $ vcspull import github "machine learning" \ - -w ~/ml-repos \ + --workspace ~/ml-repos \ --mode search \ --min-stars 1000 ``` @@ -69,7 +69,7 @@ Use with self-hosted GitLab: ```console $ vcspull import gitlab myuser \ - -w ~/work \ + --workspace ~/work \ --url https://gitlab.company.com ``` @@ -77,7 +77,7 @@ Import from AWS CodeCommit: ```console $ vcspull import codecommit \ - -w ~/work/aws \ + --workspace ~/work/aws \ --region us-east-1 ``` @@ -85,7 +85,7 @@ Preview without writing (dry run): ```console $ vcspull import codeberg user \ - -w ~/oss \ + --workspace ~/oss \ --dry-run ``` @@ -107,7 +107,7 @@ SSH (default): ```console $ vcspull import github torvalds \ - -w ~/repos/linux \ + --workspace ~/repos/linux \ --mode user ``` @@ -115,7 +115,7 @@ Use `--https` for HTTPS clone URLs: ```console $ vcspull import github torvalds \ - -w ~/repos/linux \ + --workspace ~/repos/linux \ --mode user \ --https ``` @@ -127,7 +127,7 @@ under the workspace root by default: ```console $ vcspull import gitlab vcs-python-group-test \ - -w ~/projects/python \ + --workspace ~/projects/python \ --mode org ``` @@ -142,7 +142,7 @@ workspace root: ```console $ vcspull import gitlab vcs-python-group-test \ - -w ~/projects/python \ + --workspace ~/projects/python \ --mode org \ --flatten-groups ``` diff --git a/README.md b/README.md index a536a3df6..fea8acaf2 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,7 @@ $ vcspull discover ~/code --recursive ``` The scan shows each repository before import unless you opt into `--yes`. Add -`-w ~/code/` to pin the resulting workspace root or `-f` to write somewhere other +`--workspace ~/code/` to pin the resulting workspace root or `-f/--file` to write somewhere other than the default `~/.vcspull.yaml`. Duplicate workspace roots are merged by default; include `--no-merge` to keep them separate while you review the log. @@ -129,11 +129,15 @@ Pull repository lists from GitHub, GitLab, Codeberg, Gitea, Forgejo, or AWS CodeCommit directly into your configuration: ```console -$ vcspull import github myuser -w ~/code/ --mode user +$ vcspull import github myuser \ + --workspace ~/code/ \ + --mode user ``` ```console -$ vcspull import gitlab my-group -w ~/work/ --mode org +$ vcspull import gitlab my-group \ + --workspace ~/work/ \ + --mode org ``` Use `--dry-run` to preview changes, `--https` for HTTPS clone URLs, and @@ -182,7 +186,9 @@ After importing or editing by hand, run the formatter to tidy up keys, merge duplicate workspace sections, and keep entries sorted: ```console -$ vcspull fmt -f ~/.vcspull.yaml --write +$ vcspull fmt \ + --file ~/.vcspull.yaml \ + --write ``` Use `vcspull fmt --all --write` to format every YAML file that vcspull can @@ -223,7 +229,7 @@ or svn project with a git dependency: Clone / update repos via config file: ```console -$ vcspull sync -f external_deps.yaml '*' +$ vcspull sync --file external_deps.yaml '*' ``` See the [Quickstart](https://vcspull.git-pull.com/quickstart.html) for diff --git a/docs/cli/add.md b/docs/cli/add.md index 3b80801b1..aebe9c36d 100644 --- a/docs/cli/add.md +++ b/docs/cli/add.md @@ -98,7 +98,8 @@ vcspull searches for configuration files in this order: Specify a file explicitly with `-f/--file`: ```console -$ vcspull add ~/study/python/pytest-docker -f ~/configs/python.yaml +$ vcspull add ~/study/python/pytest-docker \ + --file ~/configs/python.yaml ``` ## Handling duplicates @@ -122,7 +123,7 @@ by `vcspull add`: ```diff - $ vcspull import flask https://github.com/pallets/flask.git -c ~/.vcspull.yaml -+ $ vcspull add ~/code/flask --url https://github.com/pallets/flask.git -f ~/.vcspull.yaml ++ $ vcspull add ~/code/flask --url https://github.com/pallets/flask.git --file ~/.vcspull.yaml ``` Key differences: diff --git a/docs/cli/discover.md b/docs/cli/discover.md index 865c02b25..6df754d78 100644 --- a/docs/cli/discover.md +++ b/docs/cli/discover.md @@ -129,7 +129,9 @@ $ vcspull discover ~ --recursive --workspace-root ~/code/ --yes Specify a custom config file with `-f/--file`: ```console -$ vcspull discover ~/company --recursive -f ~/company/.vcspull.yaml +$ vcspull discover ~/company \ + --recursive \ + --file ~/company/.vcspull.yaml ``` If the config file doesn't exist, it will be created. @@ -195,7 +197,7 @@ Scan to specific config: $ vcspull discover ~/company/repos \ --recursive \ --yes \ - -f ~/company/.vcspull.yaml + --file ~/company/.vcspull.yaml ``` ## After discovering repositories @@ -229,7 +231,7 @@ If you previously used `vcspull import --scan`: ```diff - $ vcspull import --scan ~/code --recursive -c ~/.vcspull.yaml --yes -+ $ vcspull discover ~/code --recursive -f ~/.vcspull.yaml --yes ++ $ vcspull discover ~/code --recursive --file ~/.vcspull.yaml --yes ``` Changes: @@ -273,7 +275,7 @@ $ vcspull discover ~/projects --recursive --yes ```console $ vcspull discover ~/company \ --recursive \ - -f ~/company/.vcspull.yaml \ + --file ~/company/.vcspull.yaml \ --workspace-root ~/work/ \ --yes ``` diff --git a/docs/cli/fmt.md b/docs/cli/fmt.md index b28d06161..fade66757 100644 --- a/docs/cli/fmt.md +++ b/docs/cli/fmt.md @@ -59,22 +59,12 @@ Run the formatter in dry-run mode first to preview the adjustments: $ vcspull fmt --file ~/.vcspull.yaml ``` -Then add `--write` (or `-w`) to persist them back to disk: +Then add `--write` to persist them back to disk: ```console -$ vcspull fmt --file ~/.vcspull.yaml --write -``` - -Short form for preview: - -```console -$ vcspull fmt -f ~/.vcspull.yaml -``` - -Short form to apply: - -```console -$ vcspull fmt -f ~/.vcspull.yaml -w +$ vcspull fmt \ + --file ~/.vcspull.yaml \ + --write ``` Use `--all` to iterate over the default search locations: the current working diff --git a/docs/cli/import/codeberg.md b/docs/cli/import/codeberg.md index fe42e24cf..f0a0c4c25 100644 --- a/docs/cli/import/codeberg.md +++ b/docs/cli/import/codeberg.md @@ -30,5 +30,5 @@ $ export CODEBERG_TOKEN=... Then import: ```console -$ vcspull import codeberg myuser -w ~/code/ +$ vcspull import codeberg myuser --workspace ~/code/ ``` diff --git a/docs/cli/import/codecommit.md b/docs/cli/import/codecommit.md index 4887cd8b3..ef7285cb5 100644 --- a/docs/cli/import/codecommit.md +++ b/docs/cli/import/codecommit.md @@ -20,7 +20,10 @@ CodeCommit does not require a target argument. Use `--region` and `--profile` to select the AWS environment: ```console -$ vcspull import codecommit -w ~/code/ --region us-east-1 --profile work +$ vcspull import codecommit \ + --workspace ~/code/ \ + --region us-east-1 \ + --profile work ``` ## Authentication @@ -41,5 +44,7 @@ $ aws configure Then import: ```console -$ vcspull import codecommit -w ~/code/ --region us-east-1 +$ vcspull import codecommit \ + --workspace ~/code/ \ + --region us-east-1 ``` diff --git a/docs/cli/import/forgejo.md b/docs/cli/import/forgejo.md index 1d01a0052..bdedc8a2b 100644 --- a/docs/cli/import/forgejo.md +++ b/docs/cli/import/forgejo.md @@ -31,5 +31,7 @@ $ export FORGEJO_TOKEN=... Then import: ```console -$ vcspull import forgejo myuser -w ~/code/ --url https://forgejo.example.com +$ vcspull import forgejo myuser \ + --workspace ~/code/ \ + --url https://forgejo.example.com ``` diff --git a/docs/cli/import/gitea.md b/docs/cli/import/gitea.md index 05f94f999..be33aa21e 100644 --- a/docs/cli/import/gitea.md +++ b/docs/cli/import/gitea.md @@ -30,5 +30,7 @@ $ export GITEA_TOKEN=... Then import: ```console -$ vcspull import gitea myuser -w ~/code/ --url https://git.example.com +$ vcspull import gitea myuser \ + --workspace ~/code/ \ + --url https://git.example.com ``` diff --git a/docs/cli/import/github.md b/docs/cli/import/github.md index ad45230e6..49be1ebab 100644 --- a/docs/cli/import/github.md +++ b/docs/cli/import/github.md @@ -34,5 +34,5 @@ $ export GITHUB_TOKEN=ghp_... Then import: ```console -$ vcspull import gh myuser -w ~/code/ +$ vcspull import gh myuser --workspace ~/code/ ``` diff --git a/docs/cli/import/gitlab.md b/docs/cli/import/gitlab.md index a37c8554e..cb23522bd 100644 --- a/docs/cli/import/gitlab.md +++ b/docs/cli/import/gitlab.md @@ -21,7 +21,10 @@ structure as nested workspace directories by default. Use `--flatten-groups` to place all repositories directly in the base workspace: ```console -$ vcspull import gl my-group --mode org -w ~/code/ --flatten-groups +$ vcspull import gl my-group \ + --mode org \ + --workspace ~/code/ \ + --flatten-groups ``` ## Authentication @@ -42,5 +45,5 @@ $ export GITLAB_TOKEN=glpat-... Then import: ```console -$ vcspull import gl myuser -w ~/code/ +$ vcspull import gl myuser --workspace ~/code/ ``` diff --git a/docs/cli/import/index.md b/docs/cli/import/index.md index 5890c2d90..429261ff2 100644 --- a/docs/cli/import/index.md +++ b/docs/cli/import/index.md @@ -36,7 +36,7 @@ Choose a service subcommand for details: Import all repositories for a GitHub user into a workspace: ```vcspull-console -$ vcspull import github myuser -w ~/code/ +$ vcspull import github myuser --workspace ~/code/ → Fetching repositories from GitHub... ✓ Found 12 repositories + project-a [Python] @@ -80,7 +80,7 @@ codecommit Fetch all repositories owned by a user: ```console -$ vcspull import gh myuser -w ~/code/ +$ vcspull import gh myuser --workspace ~/code/ ``` ### Organization mode @@ -88,13 +88,17 @@ $ vcspull import gh myuser -w ~/code/ Fetch repositories belonging to an organization or group: ```console -$ vcspull import gh my-org --mode org -w ~/code/ +$ vcspull import gh my-org \ + --mode org \ + --workspace ~/code/ ``` For GitLab, subgroups are supported with slash notation: ```console -$ vcspull import gl my-group/sub-group --mode org -w ~/code/ +$ vcspull import gl my-group/sub-group \ + --mode org \ + --workspace ~/code/ ``` ### Search mode @@ -102,7 +106,10 @@ $ vcspull import gl my-group/sub-group --mode org -w ~/code/ Search for repositories matching a query: ```console -$ vcspull import gh django --mode search -w ~/code/ --min-stars 100 +$ vcspull import gh django \ + --mode search \ + --workspace ~/code/ \ + --min-stars 100 ``` ## Filtering @@ -110,27 +117,39 @@ $ vcspull import gh django --mode search -w ~/code/ --min-stars 100 Narrow results with filtering flags: ```console -$ vcspull import gh myuser -w ~/code/ --language python +$ vcspull import gh myuser \ + --workspace ~/code/ \ + --language python ``` ```console -$ vcspull import gh myuser -w ~/code/ --topics cli,automation +$ vcspull import gh myuser \ + --workspace ~/code/ \ + --topics cli,automation ``` ```console -$ vcspull import gh django --mode search -w ~/code/ --min-stars 50 +$ vcspull import gh django \ + --mode search \ + --workspace ~/code/ \ + --min-stars 50 ``` Include archived or forked repositories (excluded by default): ```console -$ vcspull import gh myuser -w ~/code/ --archived --forks +$ vcspull import gh myuser \ + --workspace ~/code/ \ + --archived \ + --forks ``` Limit the number of repositories fetched: ```console -$ vcspull import gh myuser -w ~/code/ --limit 50 +$ vcspull import gh myuser \ + --workspace ~/code/ \ + --limit 50 ``` ```{note} @@ -144,19 +163,23 @@ language metadata. vcspull warns when a filter is unlikely to work. Human-readable output (default): ```console -$ vcspull import gh myuser -w ~/code/ +$ vcspull import gh myuser --workspace ~/code/ ``` JSON for automation: ```console -$ vcspull import gh myuser -w ~/code/ --json +$ vcspull import gh myuser \ + --workspace ~/code/ \ + --json ``` NDJSON for streaming: ```console -$ vcspull import gh myuser -w ~/code/ --ndjson +$ vcspull import gh myuser \ + --workspace ~/code/ \ + --ndjson ``` ## Dry runs and confirmation @@ -164,13 +187,17 @@ $ vcspull import gh myuser -w ~/code/ --ndjson Preview what would be imported without writing to the config file: ```console -$ vcspull import gh myuser -w ~/code/ --dry-run +$ vcspull import gh myuser \ + --workspace ~/code/ \ + --dry-run ``` Skip the confirmation prompt (useful for scripts): ```console -$ vcspull import gh myuser -w ~/code/ --yes +$ vcspull import gh myuser \ + --workspace ~/code/ \ + --yes ``` ## Configuration file selection @@ -178,7 +205,9 @@ $ vcspull import gh myuser -w ~/code/ --yes vcspull writes to `~/.vcspull.yaml` by default. Override with `-f/--file`: ```console -$ vcspull import gh myuser -w ~/code/ -f ~/configs/github.yaml +$ vcspull import gh myuser \ + --workspace ~/code/ \ + --file ~/configs/github.yaml ``` ## Protocol selection @@ -186,7 +215,9 @@ $ vcspull import gh myuser -w ~/code/ -f ~/configs/github.yaml SSH clone URLs are used by default. Switch to HTTPS with `--https`: ```console -$ vcspull import gh myuser -w ~/code/ --https +$ vcspull import gh myuser \ + --workspace ~/code/ \ + --https ``` ## Self-hosted instances @@ -195,7 +226,9 @@ Point to a self-hosted GitHub Enterprise, GitLab, Gitea, or Forgejo instance with `--url`: ```console -$ vcspull import gitea myuser -w ~/code/ --url https://git.example.com +$ vcspull import gitea myuser \ + --workspace ~/code/ \ + --url https://git.example.com ``` ## Authentication diff --git a/docs/cli/list.md b/docs/cli/list.md index ad69646eb..ad80161a0 100644 --- a/docs/cli/list.md +++ b/docs/cli/list.md @@ -124,7 +124,7 @@ By default, vcspull searches for config files in standard locations Specify a custom config file with `-f/--file`: ```console -$ vcspull list -f ~/projects/.vcspull.yaml +$ vcspull list --file ~/projects/.vcspull.yaml ``` ## Workspace filtering @@ -132,7 +132,7 @@ $ vcspull list -f ~/projects/.vcspull.yaml Filter repositories by workspace root with `-w/--workspace/--workspace-root`: ```vcspull-console -$ vcspull list -w ~/code/ +$ vcspull list --workspace ~/code/ • flask → ~/code/flask • requests → ~/code/requests ``` diff --git a/docs/cli/search.md b/docs/cli/search.md index b7c186ff3..d7b0b72c9 100644 --- a/docs/cli/search.md +++ b/docs/cli/search.md @@ -55,7 +55,7 @@ $ vcspull search --fixed-strings 'git+https://github.com/org/repo.git' case-insensitively unless your query includes uppercase characters. ```console -$ vcspull search -S Django +$ vcspull search --smart-case Django ``` ## Boolean matching @@ -69,7 +69,7 @@ $ vcspull search --any django flask Invert matches with `-v/--invert-match`: ```console -$ vcspull search -v --fixed-strings github +$ vcspull search --invert-match --fixed-strings github ``` ## JSON output diff --git a/docs/cli/status.md b/docs/cli/status.md index 6959d3c00..34d4d1614 100644 --- a/docs/cli/status.md +++ b/docs/cli/status.md @@ -180,7 +180,7 @@ $ vcspull status --json > workspace-status-$(date +%Y%m%d).json Specify a custom config file with `-f/--file`: ```console -$ vcspull status -f ~/projects/.vcspull.yaml +$ vcspull status --file ~/projects/.vcspull.yaml ``` ## Workspace filtering @@ -188,7 +188,7 @@ $ vcspull status -f ~/projects/.vcspull.yaml Filter repositories by workspace root (planned feature): ```console -$ vcspull status -w ~/code/ +$ vcspull status --workspace ~/code/ ``` ## Color output diff --git a/docs/cli/sync.md b/docs/cli/sync.md index d3cb26f5d..3b2327e06 100644 --- a/docs/cli/sync.md +++ b/docs/cli/sync.md @@ -88,7 +88,7 @@ Each line is a JSON object representing a sync event, ideal for: Specify a custom config file with `-f/--file`: ```console -$ vcspull sync -f ~/projects/.vcspull.yaml '*' +$ vcspull sync --file ~/projects/.vcspull.yaml '*' ``` By default, vcspull searches for config files in: @@ -101,7 +101,7 @@ By default, vcspull searches for config files in: Filter repositories by workspace root with `-w/--workspace` or `--workspace-root`: ```console -$ vcspull sync -w ~/code/ '*' +$ vcspull sync --workspace ~/code/ '*' ``` This syncs only repositories in the specified workspace root, useful for: diff --git a/docs/quickstart.md b/docs/quickstart.md index 80681a3d5..7ff20143f 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -154,7 +154,7 @@ be any name): Use `-f/--file` to specify a config. ```console -$ vcspull sync -f .deps.yaml --all +$ vcspull sync --file .deps.yaml --all ``` You can also use [fnmatch] to pull repositories from your config in From 92c468ccc1ca78d6039f92f1342ba9621128e9e0 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 07:50:26 -0600 Subject: [PATCH 108/109] chore(scripts[gitlab]) Remove legacy generate_gitlab scripts why: `vcspull import gitlab` (PR #510) fully replaces these community scripts with built-in pagination, dry-run, filtering, and config merging. what: - Remove scripts/generate_gitlab.py - Remove scripts/generate_gitlab.sh --- scripts/generate_gitlab.py | 125 ------------------------------------- scripts/generate_gitlab.sh | 38 ----------- 2 files changed, 163 deletions(-) delete mode 100755 scripts/generate_gitlab.py delete mode 100755 scripts/generate_gitlab.sh diff --git a/scripts/generate_gitlab.py b/scripts/generate_gitlab.py deleted file mode 100755 index 0274bd26f..000000000 --- a/scripts/generate_gitlab.py +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/env python -"""Example script for export gitlab organization to vcspull config file.""" - -from __future__ import annotations - -import argparse -import logging -import os -import pathlib -import sys -import typing as t - -import requests -import yaml -from libvcs.sync.git import GitRemote - -from vcspull.cli.sync import CouldNotGuessVCSFromURL, guess_vcs - -if t.TYPE_CHECKING: - from vcspull.types import RawConfig - -log = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO, format="%(message)s") - -try: - gitlab_token = os.environ["GITLAB_TOKEN"] -except KeyError: - log.info("Please provide the environment variable GITLAB_TOKEN") - sys.exit(1) - -parser = argparse.ArgumentParser( - description="Script to generate vcsconfig for all repositories \ - under the given namespace (needs Gitlab >= 10.3)", -) -parser.add_argument("gitlab_host", type=str, help="url to the gitlab instance") -parser.add_argument( - "gitlab_namespace", - type=str, - help="namespace/group in gitlab to generate vcsconfig for", -) -parser.add_argument( - "-c", - type=str, - help="path to the target config file (default: ./vcspull.yaml)", - dest="config_file_name", - required=False, - default="./vcspull.yaml", -) - -args = vars(parser.parse_args()) -gitlab_host = args["gitlab_host"] -gitlab_namespace = args["gitlab_namespace"] -config_filename = pathlib.Path(args["config_file_name"]) - -try: - if config_filename.is_file(): - result = input( - f"The target config file ({config_filename}) already exists, \ - do you want to overwrite it? [y/N] ", - ) - - if result != "y": - log.info( - "Aborting per user request as existing config file (%s) should not be " - "overwritten!", - config_filename, - ) - sys.exit(0) - - config_file = config_filename.open(encoding="utf-8", mode="w") -except OSError: - log.info("File %s not accessible", config_filename) - sys.exit(1) - -response = requests.get( - f"{gitlab_host}/api/v4/groups/{gitlab_namespace}/projects", - params={"include_subgroups": "true", "per_page": "100"}, - headers={"Authorization": f"Bearer {gitlab_token}"}, -) - -if response.status_code != 200: - log.info("Error: %s", response) - sys.exit(1) - -path_prefix = pathlib.Path().cwd() -config: RawConfig = {} - - -for group in response.json(): - url_to_repo = group["ssh_url_to_repo"].replace(":", "/") - namespace_path = group["namespace"]["full_path"] - reponame = group["path"] - - path = f"{path_prefix}/{namespace_path}" - - if path not in config: - config[path] = {} - - # simplified config not working - https://github.com/vcs-python/vcspull/issues/332 - # config[path][reponame] = 'git+ssh://%s' % (url_to_repo) - - vcs = guess_vcs(url_to_repo) - if vcs is None: - raise CouldNotGuessVCSFromURL(url_to_repo) - - config[path][reponame] = { - "name": reponame, - "path": path / reponame, - "url": f"git+ssh://{url_to_repo}", - "remotes": { - "origin": GitRemote( - name="origin", - fetch_url=f"ssh://{url_to_repo}", - push_url=f"ssh://{url_to_repo}", - ), - }, - "vcs": vcs, - } - -config_yaml = yaml.dump(config) - -log.info(config_yaml) - -config_file.write(config_yaml) -config_file.close() diff --git a/scripts/generate_gitlab.sh b/scripts/generate_gitlab.sh deleted file mode 100755 index 86068bd9e..000000000 --- a/scripts/generate_gitlab.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env bash - -if [ -z "${GITLAB_TOKEN}" ]; then - echo 'Please provide the environment variable $GITLAB_TOKEN' - exit 1 -fi - -if [ $# -lt 2 ]; then - echo "Usage: $0 []" - exit 1 -fi - -prefix="$(pwd)" -gitlab_host="${1}" -namespace="${2}" -config_file="${3:-./vcspull.yaml}" - -current_namespace_path="" - -curl --silent --show-error --header "Authorization: Bearer ${GITLAB_TOKEN}" "https://${gitlab_host}/api/v4/groups/${namespace}/projects?include_subgroups=true&per_page=100" \ - | jq -r '.[]|.namespace.full_path + " " + .path' \ - | LC_ALL=C sort \ - | while read namespace_path reponame; do - if [ "${current_namespace_path}" != "${namespace_path}" ]; then - current_namespace_path="${namespace_path}" - - echo "${prefix}/${current_namespace_path}:" - fi - - # simplified config not working - https://github.com/vcs-python/vcspull/issues/332 - #echo " ${reponame}: 'git+ssh://git@${gitlab_host}/${current_namespace_path}/${reponame}.git'" - - echo " ${reponame}:" - echo " url: 'git+ssh://git@${gitlab_host}/${current_namespace_path}/${reponame}.git'" - echo " remotes:" - echo " origin: 'ssh://git@${gitlab_host}/${current_namespace_path}/${reponame}.git'" - done \ - | tee "${config_file}" From 90bbd7d289efe5d84078719c2e7bb35209acd9b3 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 15 Feb 2026 07:50:32 -0600 Subject: [PATCH 109/109] docs(configuration[generation]) Replace script listings with import redirect why: The generation page referenced the now-removed gitlab scripts. Redirect readers to `vcspull import` which is the supported approach. what: - Replace generation.md content with stub pointing to {ref}cli-import - Update quickstart.md seealso to reference cli-import instead of config-generation - Remove generation toctree entry from configuration/index.md --- docs/configuration/generation.md | 112 +++---------------------------- docs/configuration/index.md | 7 -- docs/quickstart.md | 2 +- 3 files changed, 9 insertions(+), 112 deletions(-) diff --git a/docs/configuration/generation.md b/docs/configuration/generation.md index 781986754..7503c419e 100644 --- a/docs/configuration/generation.md +++ b/docs/configuration/generation.md @@ -2,112 +2,16 @@ # Config generation -As a temporary solution for `vcspull` not being able to generate {ref}`configuration` through scanning directories or fetching them via API (e.g. gitlab, github, etc), you can write scripts to generate configs in the mean time. +The `vcspull import` command can generate configuration by fetching +repositories from remote services. See {ref}`cli-import` for details. -(config-generation-gitlab)= +Supported services: GitHub, GitLab, Codeberg, Gitea, Forgejo, +AWS CodeCommit. -## Collect repos from Gitlab - -Contributed by Andreas Schleifer (a.schleifer@bigpoint.net) - -Limitation on both, no pagination support in either, so only returns the -first page of repos (as of Feb 26th this is 100). - -````{tab} Shell-script - -_Requires [jq] and [curl]._ - -```{literalinclude} ../../scripts/generate_gitlab.sh -:language: shell -``` +Example — import all repos from a GitLab group: ```console -$ env GITLAB_TOKEN=mySecretToken \ - /path/to/generate_gitlab.sh gitlab.mycompany.com desired_namespace -``` - -To be executed from the path where the repos should later be stored. It will use -the current working directory as a "prefix" for the path used in the new config file. - -Optional: Set config file output path as additional argument (_will overwrite_) - -```console -$ env GITLAB_TOKEN=mySecretToken \ - /path/to/generate_gitlab.sh gitlab.mycompany.com desired_namespace /path/to/config.yaml -``` - -**Demonstration** - -Assume current directory of _/home/user/workspace/_ and script at _/home/user/workspace/scripts/generate_gitlab.sh_: - -```console -$ ./scripts/generate_gitlab.sh gitlab.com vcs-python -``` - -New file _vcspull.yaml_: - -```yaml -/my/workspace/: - g: - url: "git+ssh://git@gitlab.com/vcs-python/g.git" - remotes: - origin: "ssh://git@gitlab.com/vcs-python/g.git" - libvcs: - url: "git+ssh://git@gitlab.com/vcs-python/libvcs.git" - remotes: - origin: "ssh://git@gitlab.com/vcs-python/libvcs.git" - vcspull: - url: "git+ssh://git@gitlab.com/vcs-python/vcspull.git" - remotes: - origin: "ssh://git@gitlab.com/vcs-python/vcspull.git" +$ vcspull import gitlab my-group \ + --workspace ~/code \ + --mode org ``` - -[jq]: https://stedolan.github.io/jq/ - -[curl]: https://curl.se/ - -```` - -````{tab} Python -_Requires [requests] and [pyyaml]._ - -This confirms file overwrite, if already exists. It also requires passing the protocol/schema -of the gitlab mirror, e.g. `https://gitlab.com` instead of `gitlab.com`. - -```{literalinclude} ../../scripts/generate_gitlab.py -:language: python -``` - -**Demonstration** - -Assume current directory of _/home/user/workspace/_ and script at _/home/user/workspace/scripts/generate_gitlab.sh_: - -```console -$ ./scripts/generate_gitlab.py https://gitlab.com vcs-python -``` - -```yaml -/my/workspace/vcs-python: - g: - remotes: - origin: ssh://git@gitlab.com/vcs-python/g.git - url: git+ssh://git@gitlab.com/vcs-python/g.git - libvcs: - remotes: - origin: ssh://git@gitlab.com/vcs-python/libvcs.git - url: git+ssh://git@gitlab.com/vcs-python/libvcs.git - vcspull: - remotes: - origin: ssh://git@gitlab.com/vcs-python/vcspull.git - url: git+ssh://git@gitlab.com/vcs-python/vcspull.git -``` - -[requests]: https://docs.python-requests.org/en/latest/ -[pyyaml]: https://pyyaml.org/ - -```` - -### Contribute your own - -Post yours on or create a PR to add -yours to scripts/ and be featured here diff --git a/docs/configuration/index.md b/docs/configuration/index.md index b966410b0..1fd453ff7 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -88,13 +88,6 @@ YAML: ```` -```{toctree} -:maxdepth: 2 -:hidden: - -generation -``` - ## Caveats (git-remote-ssh-git)= diff --git a/docs/quickstart.md b/docs/quickstart.md index 7ff20143f..f2a838093 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -102,7 +102,7 @@ via trunk (can break easily): ## Configuration ```{seealso} -{ref}`configuration` and {ref}`config-generation`. +{ref}`configuration` and {ref}`cli-import`. ``` We will check out the source code of [flask][flask] to `~/code/flask`.