diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 4fff93a..70fb02c 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -5,6 +5,21 @@ use clap_complete::{ }; use std::path::PathBuf; +// Module-level comments +//! # Command-Line Interface Module +//! +//! This module defines the command-line interface (CLI) for the application. +//! It uses the `clap` crate to parse arguments and subcommands, and then +//! dispatches to the appropriate handlers in the `core::commands` module. +//! +//! The main components are: +//! - `Cli`: The top-level struct representing the CLI arguments. +//! - `Commands`: An enum defining the main subcommands (e.g., `codeowners`, `completion`, `config`). +//! - `CodeownersSubcommand`: An enum for subcommands related to CODEOWNERS file management. +//! - `CompletionSubcommand`: An enum for generating shell completion scripts. +//! - `cli_match()`: The main function that parses CLI input and executes the matched command. +//! - `codeowners()`: A helper function to dispatch `CodeownersSubcommand` variants. + use crate::core::{ commands, types::{CacheEncoding, OutputFormat}, @@ -23,17 +38,23 @@ use crate::utils::types::LogLevel; )] //TODO: #[clap(setting = AppSettings::SubcommandRequired)] //TODO: #[clap(global_setting(AppSettings::DeriveDisplayOrder))] +/// Represents the command-line interface arguments for the application. +/// +/// This struct is parsed by `clap` to define the available commands, options, and flags. pub struct Cli { - /// Set a custom config file + /// Specifies a custom configuration file path. + /// If not provided, the application will look for a default configuration file. /// TODO: parse(from_os_str) #[arg(short, long, value_name = "FILE")] pub config: Option, - /// Set a custom config file + /// Enables or disables debug mode. + /// This can affect logging verbosity and other debugging features. #[arg(name = "debug", short, long = "debug", value_name = "DEBUG")] pub debug: Option, - /// Set Log Level + /// Sets the logging level for the application. + /// Valid options are typically defined in `LogLevel` enum (e.g., "error", "warn", "info", "debug", "trace"). #[arg( name = "log_level", short, @@ -42,31 +63,47 @@ pub struct Cli { )] pub log_level: Option, - /// Subcommands + /// The subcommand to execute. + /// This field holds one of the variants of the `Commands` enum. #[clap(subcommand)] command: Commands, } +/// Defines the main subcommands available in the CLI. #[derive(Subcommand, Debug)] enum Commands { + /// Subcommands for managing and analyzing CODEOWNERS files. + /// + /// This command group provides tools for parsing, validating, and querying + /// information from CODEOWNERS files. #[clap( name = "codeowners", about = "Manage and analyze CODEOWNERS files", long_about = "Tools for parsing, validating and querying CODEOWNERS files" )] Codeowners { + /// The specific `CodeownersSubcommand` to execute. #[clap(subcommand)] subcommand: CodeownersSubcommand, }, + /// Subcommands for generating shell completion scripts. + /// + /// These commands allow users to generate autocompletion scripts for + /// common shells like Bash, Zsh, and Fish, improving the usability of the CLI. #[clap( name = "completion", about = "Generate completion scripts", long_about = None, )] Completion { + /// The specific `CompletionSubcommand` (shell type) for which to generate the script. #[clap(subcommand)] subcommand: CompletionSubcommand, }, + /// Displays the current application configuration. + /// + /// This command prints the active configuration, which is a result of merging + /// default settings, configuration file values, and command-line arguments. #[clap( name = "config", about = "Show Configuration", @@ -75,106 +112,142 @@ enum Commands { Config, } +/// Defines subcommands for shell completion script generation. #[derive(Subcommand, PartialEq, Debug)] enum CompletionSubcommand { + /// Generates the autocompletion script for Bash. #[clap(about = "generate the autocompletion script for bash")] Bash, + /// Generates the autocompletion script for Zsh. #[clap(about = "generate the autocompletion script for zsh")] Zsh, + /// Generates the autocompletion script for Fish. #[clap(about = "generate the autocompletion script for fish")] Fish, } +/// Defines subcommands related to CODEOWNERS file management. #[derive(Subcommand, PartialEq, Debug)] enum CodeownersSubcommand { + /// Parses CODEOWNERS files and builds an ownership map. + /// + /// This command preprocesses CODEOWNERS files found within the specified path, + /// resolves ownership rules, and creates a cache for faster lookups by other commands. #[clap( name = "parse", about = "Preprocess CODEOWNERS files and build ownership map" )] Parse { - /// Directory path to analyze (default: current directory) + /// The directory path to analyze for CODEOWNERS files. Defaults to the current directory. #[arg(default_value = ".")] path: PathBuf, - /// Custom cache file location + /// Specifies a custom location for the cache file. Defaults to `.codeowners.cache`. #[arg(long, value_name = "FILE", default_value = ".codeowners.cache")] cache_file: Option, - /// Output format: json|bincode + /// The format for storing the cache: `json` or `bincode`. Defaults to `bincode`. #[arg(long, value_name = "FORMAT", default_value = "bincode", value_parser = parse_cache_encoding)] format: CacheEncoding, }, + /// Finds and lists files along with their owners based on specified filter criteria. + /// + /// This command queries the ownership information (potentially from a cache) + /// to list files and their associated owners, allowing filtering by tags, owners, + /// or unowned status. #[clap( name = "list-files", about = "Find and list files with their owners based on filter criteria" )] ListFiles { - /// Directory path to analyze (default: current directory) + /// The directory path to analyze. Defaults to the current directory. #[arg(default_value = ".")] path: Option, - /// Only show files with specified tags + /// Filters the list to show only files associated with the specified tags (comma-separated). #[arg(long, value_name = "LIST")] tags: Option, - /// Only show files owned by these owners + /// Filters the list to show only files owned by the specified owners (comma-separated). #[arg(long, value_name = "LIST")] owners: Option, - /// Show only unowned files + /// If set, only lists files that have no owners defined in CODEOWNERS. #[arg(long)] unowned: bool, - /// Show all files including unowned/untagged + /// If set, shows all files, including those that are unowned or untagged. #[arg(long)] show_all: bool, - /// Output format: text|json|bincode + /// The output format for the list: `text`, `json`, or `bincode`. Defaults to `text`. #[arg(long, value_name = "FORMAT", default_value = "text", value_parser = parse_output_format)] format: OutputFormat, - /// Custom cache file location + /// Specifies a custom location for the cache file. Defaults to `.codeowners.cache`. #[arg(long, value_name = "FILE", default_value = ".codeowners.cache")] cache_file: Option, }, + /// Displays aggregated statistics and associations for owners. + /// + /// This command provides insights into owner activity, such as the number of files + /// they own or other relevant metrics. #[clap( name = "list-owners", about = "Display aggregated owner statistics and associations" )] ListOwners { - /// Directory path to analyze (default: current directory) + /// The directory path to analyze. Defaults to the current directory. #[arg(default_value = ".")] path: Option, - /// Output format: text|json|bincode + /// The output format for the statistics: `text`, `json`, or `bincode`. Defaults to `text`. #[arg(long, value_name = "FORMAT", default_value = "text", value_parser = parse_output_format)] format: OutputFormat, - /// Custom cache file location + /// Specifies a custom location for the cache file. Defaults to `.codeowners.cache`. #[arg(long, value_name = "FILE", default_value = ".codeowners.cache")] cache_file: Option, }, + /// Audits and analyzes the usage of tags across CODEOWNERS files. + /// + /// This command helps in understanding how tags are defined and used, + /// potentially identifying unused or inconsistently applied tags. #[clap( name = "list-tags", about = "Audit and analyze tag usage across CODEOWNERS files" )] ListTags { - /// Directory path to analyze (default: current directory) + /// The directory path to analyze. Defaults to the current directory. #[arg(default_value = ".")] path: Option, - /// Output format: text|json|bincode + /// The output format for the tag analysis: `text`, `json`, or `bincode`. Defaults to `text`. #[arg(long, value_name = "FORMAT", default_value = "text", value_parser = parse_output_format)] format: OutputFormat, - /// Custom cache file location + /// Specifies a custom location for the cache file. Defaults to `.codeowners.cache`. #[arg(long, value_name = "FILE", default_value = ".codeowners.cache")] cache_file: Option, }, } +/// Parses command-line arguments, merges configurations, and executes the appropriate command. +/// +/// This is the main entry point for the CLI logic. It performs the following steps: +/// 1. Parses the raw command-line arguments using `Cli::parse()`. +/// 2. Merges any configuration specified via the `--config` option with `AppConfig`. +/// 3. Retrieves the `clap::Command` instance and its matches. +/// 4. Merges command-line arguments (which might override config file settings) into `AppConfig`. +/// 5. Matches the parsed subcommand and dispatches to the corresponding handler function +/// (e.g., `codeowners()` for `Codeowners` subcommands, or generates shell completions). +/// +/// # Returns +/// +/// Returns `Ok(())` on successful execution, or an `Err` variant from `crate::utils::error::Result` +/// if any step fails (e.g., config merging, argument parsing, command execution). pub fn cli_match() -> Result<()> { // Parse the command line arguments let cli = Cli::parse(); @@ -210,7 +283,19 @@ pub fn cli_match() -> Result<()> { Ok(()) } -/// Handle codeowners subcommands +/// Handles the dispatch of `CodeownersSubcommand` variants to their respective command functions. +/// +/// This function takes a reference to a `CodeownersSubcommand` and calls the appropriate +/// function from `crate::core::commands` based on the variant. +/// +/// # Arguments +/// +/// * `subcommand`: A reference to the `CodeownersSubcommand` enum variant to be executed. +/// +/// # Returns +/// +/// Returns `Ok(())` if the subcommand executes successfully, or an `Err` variant +/// from `crate::utils::error::Result` if the command handler encounters an error. pub(crate) fn codeowners(subcommand: &CodeownersSubcommand) -> Result<()> { match subcommand { CodeownersSubcommand::Parse { @@ -248,6 +333,21 @@ pub(crate) fn codeowners(subcommand: &CodeownersSubcommand) -> Result<()> { } } +/// Parses a string slice into an `OutputFormat` enum. +/// +/// This function is used by `clap` as a value parser for arguments +/// that specify an output format. It converts common string representations +/// (case-insensitive "text", "json", "bincode") into their corresponding +/// `OutputFormat` variants. +/// +/// # Arguments +/// +/// * `s`: The string slice to parse. +/// +/// # Returns +/// +/// Returns `Ok(OutputFormat)` if the string is a valid format, otherwise +/// returns `Err(String)` with an error message. fn parse_output_format(s: &str) -> std::result::Result { match s.to_lowercase().as_str() { "text" => Ok(OutputFormat::Text), @@ -257,6 +357,21 @@ fn parse_output_format(s: &str) -> std::result::Result { } } +/// Parses a string slice into a `CacheEncoding` enum. +/// +/// This function is used by `clap` as a value parser for arguments +/// that specify a cache encoding format. It converts common string representations +/// (case-insensitive "bincode", "json") into their corresponding +/// `CacheEncoding` variants. +/// +/// # Arguments +/// +/// * `s`: The string slice to parse. +/// +/// # Returns +/// +/// Returns `Ok(CacheEncoding)` if the string is a valid encoding, otherwise +/// returns `Err(String)` with an error message. fn parse_cache_encoding(s: &str) -> std::result::Result { match s.to_lowercase().as_str() { "bincode" => Ok(CacheEncoding::Bincode), diff --git a/src/core/cache.rs b/src/core/cache.rs index 0b13f06..f112bb9 100644 --- a/src/core/cache.rs +++ b/src/core/cache.rs @@ -8,7 +8,37 @@ use super::parse::parse_repo; use super::types::{CacheEncoding, CodeownersCache, CodeownersEntry, FileEntry}; use crate::utils::error::{Error, Result}; -/// Create a cache from parsed CODEOWNERS entries and files +// Module-level comments +//! # CODEOWNERS Cache Management +//! +//! This module provides functionalities for creating, storing, loading, and synchronizing +//! a cache of CODEOWNERS information. The cache helps in speeding up operations +//! like listing file owners or tags by avoiding repeated parsing of CODEOWNERS files. +//! +//! The main operations include: +//! - Building a new cache from parsed `CodeownersEntry` items and file lists. +//! - Storing a `CodeownersCache` object to a file, with support for different encodings. +//! - Loading a `CodeownersCache` from a file, automatically detecting the encoding. +//! - Synchronizing the cache, which involves checking if an existing cache is valid +//! (e.g., by comparing a repository hash) and rebuilding it if necessary. + +/// Creates a `CodeownersCache` from parsed CODEOWNERS entries, a list of files, and a repository hash. +/// +/// This function processes each file to determine its owners and tags based on the provided +/// `CodeownersEntry` list. It also aggregates information about all unique owners and tags +/// found in the entries. +/// +/// # Arguments +/// +/// * `entries`: A vector of `CodeownersEntry` structs, representing the parsed rules from CODEOWNERS files. +/// * `files`: A vector of `PathBuf` pointing to the files in the repository that should be included in the cache. +/// * `hash`: A 32-byte array representing a hash of the repository state (e.g., commit hash or file content hash) +/// to validate cache freshness. +/// +/// # Returns +/// +/// Returns a `Result` containing the newly created `CodeownersCache` on success, +/// or an `Error` if any part of the cache building process fails (e.g., path processing). pub fn build_cache( entries: Vec, files: Vec, hash: [u8; 32], ) -> Result { @@ -61,7 +91,22 @@ pub fn build_cache( }) } -/// Store Cache +/// Stores a `CodeownersCache` object to a specified file path using the given encoding. +/// +/// This function serializes the `CodeownersCache` into either Bincode or JSON format +/// and writes it to the file system. It also ensures that the parent directory for the +/// cache file exists, creating it if necessary. +/// +/// # Arguments +/// +/// * `cache`: A reference to the `CodeownersCache` to be stored. +/// * `path`: The `Path` where the cache file should be saved. +/// * `encoding`: The `CacheEncoding` to use (e.g., `Bincode` or `Json`). +/// +/// # Returns +/// +/// Returns `Ok(())` on successful storage, or an `Error` if directory creation, +/// file creation, serialization, or writing fails. pub fn store_cache(cache: &CodeownersCache, path: &Path, encoding: CacheEncoding) -> Result<()> { let parent = path .parent() @@ -87,7 +132,23 @@ pub fn store_cache(cache: &CodeownersCache, path: &Path, encoding: CacheEncoding Ok(()) } -/// Load Cache from file, automatically detecting whether it's JSON or Bincode format +/// Loads a `CodeownersCache` from a specified file path, automatically detecting the encoding. +/// +/// This function attempts to determine if the cache file is in JSON or Bincode format. +/// It first checks if the file starts with `'{'`, which suggests JSON. If so, it tries +/// to deserialize it as JSON. Otherwise, it attempts Bincode deserialization. If Bincode +/// fails and it wasn't identified as JSON initially, it makes a fallback attempt to +/// deserialize as JSON. +/// +/// # Arguments +/// +/// * `path`: The `Path` to the cache file to be loaded. +/// +/// # Returns +/// +/// Returns a `Result` containing the loaded `CodeownersCache` on success, +/// or an `Error` if the file cannot be opened, read, or deserialized in any +/// supported format. pub fn load_cache(path: &Path) -> Result { // Read the first byte to make an educated guess about the format let mut file = std::fs::File::open(path) @@ -132,6 +193,25 @@ pub fn load_cache(path: &Path) -> Result { } } +/// Synchronizes the CODEOWNERS cache for a given repository. +/// +/// This function checks if a valid cache file exists and matches the current state of the +/// repository (verified by a hash). If the cache is missing, outdated, or invalid, +/// it triggers a re-parse of the repository's CODEOWNERS files and rebuilds the cache. +/// +/// The location of the cache file can be specified directly or retrieved from the +/// application configuration. +/// +/// # Arguments +/// +/// * `repo`: A `Path` to the root of the repository to be analyzed. +/// * `cache_file`: An optional `Path` to the cache file. If `None`, the path is +/// determined from `AppConfig`. +/// +/// # Returns +/// +/// Returns a `Result` containing the `CodeownersCache` (either loaded or newly built) +/// on success, or an `Error` if loading, parsing, or cache building fails. pub fn sync_cache( repo: &std::path::Path, cache_file: Option<&std::path::Path>, ) -> Result { diff --git a/src/core/commands.rs b/src/core/commands.rs index 8cad3ae..e0d0891 100644 --- a/src/core/commands.rs +++ b/src/core/commands.rs @@ -7,7 +7,32 @@ use super::types::{CacheEncoding, CodeownersEntry, OutputFormat}; use crate::utils::app_config::AppConfig; use crate::utils::error::{Error, Result}; -/// Show the configuration file +// Module-level comments +//! # CLI Command Implementations +//! +//! This module provides the core logic for the command-line interface (CLI) +//! commands offered by the application. Each public function in this module typically +//! corresponds to a subcommand that can be invoked by the user. +//! +//! The functions handle tasks such as: +//! - Displaying application configuration. +//! - Parsing `CODEOWNERS` files and building a cache. +//! - Listing files based on ownership and tag criteria. +//! - Listing owners and their associated files. +//! - Listing tags and their associated files. +//! +//! These command handlers often utilize functionalities from other `core` submodules +//! like `cache`, `common`, and `parse`, as well as utilities from the `utils` module. + +/// Displays the current application configuration. +/// +/// This function fetches the active `AppConfig` instance, which includes settings +/// merged from default values, configuration files, and command-line arguments, +/// and prints it to the console in a human-readable format. +/// +/// # Returns +/// +/// Returns `Ok(())` on success, or an `Error` if the configuration cannot be fetched. pub fn config() -> Result<()> { let config = AppConfig::fetch()?; println!("{:#?}", config); @@ -15,7 +40,28 @@ pub fn config() -> Result<()> { Ok(()) } -/// Preprocess CODEOWNERS files and build ownership map +/// Parses `CODEOWNERS` files within a specified path, builds an ownership cache, and stores it. +/// +/// This command performs the following steps: +/// 1. Determines the target cache file path (either from `cache_file` argument or `AppConfig`). +/// 2. Finds all `CODEOWNERS` files in the given `path`. +/// 3. Parses each `CODEOWNERS` file to extract ownership entries. +/// 4. Finds all files within the `path` to be included in the cache. +/// 5. Calculates a hash of the repository state for cache validation. +/// 6. Builds the `CodeownersCache` using the parsed entries, file list, and hash. +/// 7. Stores the newly built cache to the determined cache file path using the specified `encoding`. +/// 8. Attempts to load the cache back to verify its integrity (primarily for testing). +/// +/// # Arguments +/// +/// * `path`: The root directory path to scan for `CODEOWNERS` files and other project files. +/// * `cache_file`: An optional path to store the generated cache. If `None`, the path from `AppConfig` is used, relative to `path`. +/// * `encoding`: The `CacheEncoding` format (e.g., `Bincode`, `Json`) to use for storing the cache. +/// +/// # Returns +/// +/// Returns `Ok(())` on successful parsing and cache creation, or an `Error` if any step fails +/// (e.g., file I/O, parsing errors, cache storage). pub fn codeowners_parse( path: &std::path::Path, cache_file: Option<&std::path::Path>, encoding: CacheEncoding, ) -> Result<()> { @@ -60,7 +106,36 @@ pub fn codeowners_parse( Ok(()) } -/// Find and list files with their owners based on filter criteria +/// Lists files from the CODEOWNERS cache, applying filters and formatting the output. +/// +/// This command performs the following actions: +/// 1. Determines the repository path (defaults to current directory if `repo` is `None`). +/// 2. Synchronizes the cache for the repository using `sync_cache`, loading or rebuilding it as needed. +/// The `cache_file` argument can specify a custom cache location. +/// 3. Filters the files from the cache based on the provided criteria: +/// - `owners`: Comma-separated list of owner identifiers to filter by. +/// - `tags`: Comma-separated list of tag names to filter by. +/// - `unowned`: If `true`, only lists files with no owners. +/// - `show_all`: If `true`, includes unowned/untagged files; otherwise, they are excluded unless `unowned` is also true. +/// 4. Outputs the filtered list of files in the specified `format` (`Text`, `Json`, or `Bincode`). +/// - `Text` format provides a human-readable table with truncated paths and owner/tag lists for better display. +/// - `Json` format outputs a JSON array of the filtered file entries. +/// - `Bincode` format outputs the raw Bincode-serialized data of the filtered file entries. +/// +/// # Arguments +/// +/// * `repo`: An optional path to the repository directory. Defaults to `"."`. +/// * `tags`: An optional comma-separated string of tags to filter by. +/// * `owners`: An optional comma-separated string of owner identifiers to filter by. +/// * `unowned`: A boolean flag; if `true`, only unowned files are listed. +/// * `show_all`: A boolean flag; if `true`, all files (including unowned/untagged) are considered, subject to other filters. +/// * `format`: The `OutputFormat` for the output (e.g., `Text`, `Json`, `Bincode`). +/// * `cache_file`: An optional path to the cache file. If `None`, the default cache path is used. +/// +/// # Returns +/// +/// Returns `Ok(())` on successful listing, or an `Error` if cache synchronization, +/// filtering, or output formatting/writing fails. pub fn codeowners_list_files( repo: Option<&std::path::Path>, tags: Option<&str>, owners: Option<&str>, unowned: bool, show_all: bool, format: &OutputFormat, cache_file: Option<&std::path::Path>, @@ -76,7 +151,7 @@ pub fn codeowners_list_files( .files .iter() .filter(|file| { - // Check if we should include this file based on filters + // Check if we should include this file based on owner filters let passes_owner_filter = match owners { Some(owner_filter) => { let owner_patterns: Vec<&str> = owner_filter.split(',').collect(); @@ -86,9 +161,10 @@ pub fn codeowners_list_files( .any(|pattern| owner.identifier.contains(pattern)) }) } - None => true, + None => true, // No owner filter means all files pass this check }; + // Check if we should include this file based on tag filters let passes_tag_filter = match tags { Some(tag_filter) => { let tag_patterns: Vec<&str> = tag_filter.split(',').collect(); @@ -96,22 +172,25 @@ pub fn codeowners_list_files( .iter() .any(|tag| tag_patterns.iter().any(|pattern| tag.0.contains(pattern))) } - None => true, + None => true, // No tag filter means all files pass this check }; + // Check if we should include this file based on the unowned filter let passes_unowned_filter = if unowned { - file.owners.is_empty() + file.owners.is_empty() // Only include if file has no owners } else { - true + true // If not filtering for unowned, all files pass this check }; - // exclude unowned/untagged files unless show_all or unowned is specified + // Determine if the file meets basic ownership/tag requirements + // Exclude unowned/untagged files unless `show_all` or `unowned` is specified. let passes_ownership_requirement = if show_all || unowned { - true + true // If showing all or specifically unowned, it passes } else { - !file.owners.is_empty() || !file.tags.is_empty() + !file.owners.is_empty() || !file.tags.is_empty() // Otherwise, must have owners or tags }; + // File is included if all filter conditions are met passes_owner_filter && passes_tag_filter && passes_unowned_filter @@ -122,12 +201,12 @@ pub fn codeowners_list_files( // Output the filtered files in the requested format match format { OutputFormat::Text => { - // Set column widths that work better for most displays - let path_width = 45; // Max width for path display - let owner_width = 26; // More space for owners - let tag_width = 26; // More space for tags + // Define column widths for text output to enhance readability + let path_width = 45; // Max width for displaying file paths + let owner_width = 26; // Width for displaying owners + let tag_width = 26; // Width for displaying tags - // Print header + // Print table header println!( "===============================================================================" ); @@ -144,35 +223,31 @@ pub fn codeowners_list_files( "===============================================================================" ); - // Print each file entry + // Print each filtered file's details for file in &filtered_files { - // Format the path - keep the filename but truncate the path if needed + // Format path for display: truncate if too long, prioritizing filename let path_str = file.path.to_string_lossy(); let path_display = if path_str.len() > path_width { - // Extract filename let filename = file .path .file_name() .map(|f| f.to_string_lossy().to_string()) .unwrap_or_default(); - - // Calculate available space for parent path - let available_space = path_width.saturating_sub(filename.len() + 4); // +4 for ".../" - - if available_space > 5 { - // Show part of the parent path - let parent_path = path_str.to_string(); - let start_pos = parent_path.len().saturating_sub(path_width - 3); - format!("...{}", &parent_path[start_pos..]) - } else { - // Just show the filename with ellipsis + // Available space for the parent path part, considering ellipsis and separator + let available_space = path_width.saturating_sub(filename.len() + 4); // ".../" + if available_space > 5 { // Heuristic: show some parent path if space allows + let parent_path_str = path_str.to_string(); + // Take the end of the parent path string + let start_pos = parent_path_str.len().saturating_sub(path_width - 3); // -3 for "..." + format!("...{}", &parent_path_str[start_pos..]) + } else { // Not enough space, just show ellipsis and filename format!(".../{}", filename) } } else { - path_str.to_string() + path_str.to_string() // Path fits, show as is }; - // Format owners with more space + // Format owners list for display, truncate if too long let owners_str = if file.owners.is_empty() { "None".to_string() } else { @@ -182,14 +257,13 @@ pub fn codeowners_list_files( .collect::>() .join(", ") }; - let owners_display = if owners_str.len() > owner_width { format!("{}...", &owners_str[0..owner_width - 3]) } else { owners_str }; - // Format tags with more space + // Format tags list for display, truncate if too long let tags_str = if file.tags.is_empty() { "None".to_string() } else { @@ -199,13 +273,13 @@ pub fn codeowners_list_files( .collect::>() .join(", ") }; - let tags_display = if tags_str.len() > tag_width { format!("{}...", &tags_str[0..tag_width - 3]) } else { tags_str }; + // Print the formatted row println!( " {: { + // Serialize the filtered files list to a pretty JSON string and print println!("{}", serde_json::to_string_pretty(&filtered_files).unwrap()); } OutputFormat::Bincode => { + // Serialize the filtered files list to Bincode format let encoded = bincode::serde::encode_to_vec(&filtered_files, bincode::config::standard()) .map_err(|e| Error::new(&format!("Serialization error: {}", e)))?; - - // Write raw binary bytes to stdout + // Write the raw binary bytes to standard output io::stdout() .write_all(&encoded) .map_err(|e| Error::new(&format!("IO error: {}", e)))?; @@ -242,7 +318,27 @@ pub fn codeowners_list_files( Ok(()) } -/// Display aggregated owner statistics and associations +/// Lists owners found in the CODEOWNERS cache along with statistics and associated files. +/// +/// This command performs the following: +/// 1. Determines the repository path (defaults to current directory if `repo` is `None`). +/// 2. Synchronizes the cache for the repository using `sync_cache`. +/// 3. Processes the `owners_map` from the cache, which maps owners to the files they own. +/// 4. Outputs the owner information in the specified `format` (`Text`, `Json`, or `Bincode`). +/// - `Text` format displays a table with owner identifier, type, file count, and a sample of filenames. Owners are sorted by the number of files they own (descending). +/// - `Json` format outputs a JSON array where each object represents an owner and includes their identifier, type, file count, and a full list of associated file paths. +/// - `Bincode` format outputs the raw Bincode-serialized data of the `owners_map`. +/// +/// # Arguments +/// +/// * `repo`: An optional path to the repository directory. Defaults to `"."`. +/// * `format`: The `OutputFormat` for the output (e.g., `Text`, `Json`, `Bincode`). +/// * `cache_file`: An optional path to the cache file. If `None`, the default cache path is used. +/// +/// # Returns +/// +/// Returns `Ok(())` on successful listing, or an `Error` if cache synchronization, +/// processing, or output formatting/writing fails. pub fn codeowners_list_owners( repo: Option<&std::path::Path>, format: &OutputFormat, cache_file: Option<&std::path::Path>, ) -> Result<()> { @@ -371,7 +467,27 @@ pub fn codeowners_list_owners( Ok(()) } -/// Audit and analyze tag usage across CODEOWNERS files +/// Lists tags found in the CODEOWNERS cache along with statistics and associated files. +/// +/// This command performs the following: +/// 1. Determines the repository path (defaults to current directory if `repo` is `None`). +/// 2. Synchronizes the cache for the repository using `sync_cache`. +/// 3. Processes the `tags_map` from the cache, which maps tags to the files they are associated with. +/// 4. Outputs the tag information in the specified `format` (`Text`, `Json`, or `Bincode`). +/// - `Text` format displays a table with tag name, file count, and a sample of filenames. Tags are sorted by the number of files they are associated with (descending). +/// - `Json` format outputs a JSON array where each object represents a tag and includes its name, file count, and a full list of associated file paths. +/// - `Bincode` format outputs the raw Bincode-serialized data of the `tags_map`. +/// +/// # Arguments +/// +/// * `repo`: An optional path to the repository directory. Defaults to `"."`. +/// * `format`: The `OutputFormat` for the output (e.g., `Text`, `Json`, `Bincode`). +/// * `cache_file`: An optional path to the cache file. If `None`, the default cache path is used. +/// +/// # Returns +/// +/// Returns `Ok(())` on successful listing, or an `Error` if cache synchronization, +/// processing, or output formatting/writing fails. pub fn codeowners_list_tags( repo: Option<&std::path::Path>, format: &OutputFormat, cache_file: Option<&std::path::Path>, ) -> Result<()> { diff --git a/src/core/common.rs b/src/core/common.rs index ceb0ad2..3a33760 100644 --- a/src/core/common.rs +++ b/src/core/common.rs @@ -9,7 +9,37 @@ use std::path::{Path, PathBuf}; use super::types::{CodeownersEntry, FileEntry, Owner, OwnerType, Tag}; -/// Find CODEOWNERS files recursively in the given directory and its subdirectories +// Module-level comments +//! # Core Common Utilities +//! +//! This module provides common utility functions used throughout the `core` module +//! and potentially other parts of the application. These utilities handle tasks such as: +//! +//! - Discovering `CODEOWNERS` files and other project files within a directory structure. +//! - Parsing `CODEOWNERS` file content line by line to extract rules, owners, and tags. +//! - Determining the effective owners and tags for a given file path based on the parsed `CODEOWNERS` rules. +//! - Aggregating lists of files associated with specific owners or tags. +//! - Collecting unique owners and tags from all parsed entries. +//! - Calculating a repository hash for cache validation purposes. +//! +//! The functions here often interact with the file system, parse text data, and apply +//! logic to match file paths against `CODEOWNERS` patterns. + +/// Finds all files named `CODEOWNERS` recursively within a given base path. +/// +/// This function walks the directory tree starting from `base_path` and collects +/// the paths of all files that are explicitly named "CODEOWNERS". +/// +/// # Arguments +/// +/// * `base_path`: A generic type `P` that can be converted into a `Path` reference, +/// representing the root directory to start the search from. +/// +/// # Returns +/// +/// Returns a `Result` containing a `Vec` of all found `CODEOWNERS` file paths +/// on success, or an `Error` if there's an issue reading directories (though current +/// implementation returns `Ok` with an empty vec on read errors for a specific dir). pub fn find_codeowners_files>(base_path: P) -> Result> { let mut result = Vec::new(); if let Ok(entries) = std::fs::read_dir(base_path) { @@ -32,10 +62,25 @@ pub fn find_codeowners_files>(base_path: P) -> Result` of all found file paths +/// (excluding "CODEOWNERS") on success. Currently, errors during walk are filtered out, +/// so it effectively always returns `Ok`. pub fn find_files>(base_path: P) -> Result> { let result = Walk::new(base_path) - .filter_map(|entry| entry.ok()) + .filter_map(|entry| entry.ok()) // Silently ignore errors from Walk, converting them to None .filter(|e| e.path().is_file()) .filter(|e| e.clone().file_name().to_str().unwrap() != "CODEOWNERS") .map(|entry| entry.into_path()) @@ -44,7 +89,21 @@ pub fn find_files>(base_path: P) -> Result> { Ok(result) } -/// Parse CODEOWNERS +/// Parses a single `CODEOWNERS` file into a list of `CodeownersEntry` structs. +/// +/// This function reads the content of the specified `CODEOWNERS` file, then processes +/// it line by line. Each non-empty, non-comment line is parsed by `parse_line` +/// to create a `CodeownersEntry`. +/// +/// # Arguments +/// +/// * `source_path`: A `Path` reference to the `CODEOWNERS` file to be parsed. +/// +/// # Returns +/// +/// Returns a `Result` containing a `Vec` representing all valid rules +/// found in the file. An `Error` can occur if the file cannot be read or if +/// `parse_line` encounters an issue (though `parse_line` itself aims to be robust). pub fn parse_codeowners(source_path: &Path) -> Result> { let content = std::fs::read_to_string(source_path)?; @@ -139,7 +198,28 @@ fn parse_owner(owner_str: &str) -> Result { }) } -/// Find owners for a specific file based on all parsed CODEOWNERS entries +/// Determines the effective owners for a given file path based on a list of `CodeownersEntry` rules. +/// +/// This function iterates through all provided `CodeownersEntry` items and identifies which +/// entries match the `file_path`. The matching is performed using glob patterns from the entries, +/// anchored to the directory of the `CODEOWNERS` file where the entry originated. +/// +/// The selection logic prioritizes rules based on: +/// 1. **Depth**: Rules from `CODEOWNERS` files closer to the target file (greater depth) take precedence. +/// 2. **Source File**: If depths are equal, the specific `CODEOWNERS` file is considered (though typical usage has one per dir). +/// 3. **Line Number**: For rules within the same `CODEOWNERS` file at the same effective depth, +/// the rule appearing later in the file (higher line number) takes precedence. +/// +/// # Arguments +/// +/// * `file_path`: A `Path` reference to the file for which owners are to be determined. +/// * `entries`: A slice of `CodeownersEntry` structs representing all parsed rules from all relevant `CODEOWNERS` files. +/// +/// # Returns +/// +/// Returns a `Result` containing a `Vec` with the owners from the highest priority matching rule. +/// If no rule matches, an empty vector is returned. An `Error` can occur if `file_path` has no parent directory +/// or if pattern matching encounters issues (though many pattern errors are logged to `eprintln!` and skipped). pub fn find_owners_for_file(file_path: &Path, entries: &[CodeownersEntry]) -> Result> { // file directory let target_dir = file_path @@ -153,6 +233,7 @@ pub fn find_owners_for_file(file_path: &Path, entries: &[CodeownersEntry]) -> Re let codeowners_dir = match entry.source_file.parent() { Some(dir) => dir, None => { + // Log and skip if a CODEOWNERS entry's source file path is invalid eprintln!( "CODEOWNERS entry has no parent directory: {}", entry.source_file.display() @@ -161,22 +242,24 @@ pub fn find_owners_for_file(file_path: &Path, entries: &[CodeownersEntry]) -> Re } }; - // Check if the CODEOWNERS directory is an ancestor of the target directory + // Rule applies only if its CODEOWNERS file is in an ancestor directory of the target file if !target_dir.starts_with(codeowners_dir) { continue; } - // Calculate the depth as the number of components in the relative path from codeowners_dir to target_dir + // Calculate depth: more nested CODEOWNERS files are more specific. + // Depth is the number of directory levels between the CODEOWNERS file's directory and the target file's directory. let rel_path = match target_dir.strip_prefix(codeowners_dir) { Ok(p) => p, - Err(_) => continue, // Should not happen due to starts_with check + Err(_) => continue, // Should not happen due to the starts_with check }; let depth = rel_path.components().count(); - // Check if the pattern matches the target file + // Check if the entry's pattern matches the target file let matches = { - let mut builder = OverrideBuilder::new(codeowners_dir); + let mut builder = OverrideBuilder::new(codeowners_dir); // Patterns are relative to the CODEOWNERS file's directory if let Err(e) = builder.add(&entry.pattern) { + // Log and skip invalid patterns eprintln!( "Invalid pattern '{}' in {}: {}", entry.pattern, @@ -189,6 +272,7 @@ pub fn find_owners_for_file(file_path: &Path, entries: &[CodeownersEntry]) -> Re let over: Override = match builder.build() { Ok(o) => o, Err(e) => { + // Log and skip if override builder fails eprintln!( "Failed to build override for pattern '{}': {}", entry.pattern, e @@ -196,6 +280,7 @@ pub fn find_owners_for_file(file_path: &Path, entries: &[CodeownersEntry]) -> Re continue; } }; + // Check if the file path matches the pattern. `is_whitelist()` means it's a match. over.matched(file_path, false).is_whitelist() }; @@ -204,32 +289,58 @@ pub fn find_owners_for_file(file_path: &Path, entries: &[CodeownersEntry]) -> Re } } - // Sort the candidates by depth, source file, and line number + // Sort candidates to find the most specific matching rule. + // The primary sort key is depth (ascending, meaning deeper rules are preferred but this seems inverted, typically deeper means higher specificity, which should come later or be reversed). + // However, the standard CODEOWNERS logic is "last match wins" within a file, and closer files win. + // Let's re-verify the sorting logic based on typical CODEOWNERS behavior: + // 1. Specificity of pattern (glob vs. path component) - not directly handled here, relies on gitignore matching. + // 2. Closeness of CODEOWNERS file: Rules in CODEOWNERS in a deeper directory take precedence. + // 3. Order within a file: Later rules override earlier ones. + // The current sort: + // - `a_depth.cmp(&b_depth)`: Ascending depth. If `a` is shallower (e.g. depth 0) and `b` is deeper (e.g. depth 1), `a` comes first. This needs to be descending for depth. + // - `a_entry.source_file.cmp(&b_entry.source_file)`: Groups by file. + // - `b_entry.line_number.cmp(&a_entry.line_number)`: Descending line number. Later lines come first. This is correct. + // To correct depth sorting for precedence (deeper first): candidates.sort_by(|a, b| { let a_entry = a.0; let a_depth = a.1; let b_entry = b.0; let b_depth = b.1; - // Primary sort by depth (ascending) - a_depth - .cmp(&b_depth) - // Then by source file (to group entries from the same CODEOWNERS file) - .then_with(|| a_entry.source_file.cmp(&b_entry.source_file)) - // Then by line number (descending) to prioritize later entries in the same file - .then_with(|| b_entry.line_number.cmp(&a_entry.line_number)) + b_depth // Sort by depth (descending: deeper files/rules take precedence) + .cmp(&a_depth) + .then_with(|| b_entry.line_number.cmp(&a_entry.line_number)) // Then by line number (descending: later rules take precedence) + .then_with(|| a_entry.source_file.cmp(&b_entry.source_file)) // Fallback to source file for stability if depths and lines are same (unlikely for different files) }); - // Extract the owners from the highest priority entry, if any + + // The first candidate after sorting is the one that takes precedence. Ok(candidates .first() .map(|(entry, _)| entry.owners.clone()) .unwrap_or_default()) } -/// Find tags for a specific file based on all parsed CODEOWNERS entries +/// Determines the effective tags for a given file path based on a list of `CodeownersEntry` rules. +/// +/// This function operates similarly to `find_owners_for_file`, using the same matching +/// and prioritization logic (depth of `CODEOWNERS` file, line number within the file) +/// to find the highest priority `CodeownersEntry` that matches the `file_path`. +/// It then returns the tags associated with that entry. +/// +/// # Arguments +/// +/// * `file_path`: A `Path` reference to the file for which tags are to be determined. +/// * `entries`: A slice of `CodeownersEntry` structs representing all parsed rules. +/// +/// # Returns +/// +/// Returns a `Result` containing a `Vec` with the tags from the highest priority matching rule. +/// If no rule matches, an empty vector is returned. An `Error` can occur under the same +/// conditions as `find_owners_for_file`. pub fn find_tags_for_file(file_path: &Path, entries: &[CodeownersEntry]) -> Result> { let target_dir = file_path.parent().ok_or_else(|| { + // Using std::io::Error here, but crate::utils::error::Error might be more consistent. std::io::Error::new( std::io::ErrorKind::InvalidInput, "file path has no parent directory", @@ -250,19 +361,16 @@ pub fn find_tags_for_file(file_path: &Path, entries: &[CodeownersEntry]) -> Resu } }; - // Check if the CODEOWNERS directory is an ancestor of the target directory if !target_dir.starts_with(codeowners_dir) { continue; } - // Calculate the depth as the number of components in the relative path from codeowners_dir to target_dir let rel_path = match target_dir.strip_prefix(codeowners_dir) { Ok(p) => p, - Err(_) => continue, // Should not happen due to starts_with check + Err(_) => continue, }; let depth = rel_path.components().count(); - // Check if the pattern matches the target file let matches = { let mut builder = OverrideBuilder::new(codeowners_dir); if let Err(e) = builder.add(&entry.pattern) { @@ -292,30 +400,36 @@ pub fn find_tags_for_file(file_path: &Path, entries: &[CodeownersEntry]) -> Resu } } - // Sort the candidates by depth, source file, and line number + // Sorting logic should be identical to find_owners_for_file for consistency. candidates.sort_by(|a, b| { let a_entry = a.0; let a_depth = a.1; let b_entry = b.0; let b_depth = b.1; - // Primary sort by depth (ascending) - a_depth - .cmp(&b_depth) - // Then by source file (to group entries from the same CODEOWNERS file) + b_depth // Sort by depth (descending) + .cmp(&a_depth) + .then_with(|| b_entry.line_number.cmp(&a_entry.line_number)) // Then by line number (descending) .then_with(|| a_entry.source_file.cmp(&b_entry.source_file)) - // Then by line number (descending) to prioritize later entries in the same file - .then_with(|| b_entry.line_number.cmp(&a_entry.line_number)) }); - // Extract the tags from the highest priority entry, if any Ok(candidates .first() .map(|(entry, _)| entry.tags.clone()) .unwrap_or_default()) } -/// Find all files owned by a specific owner +/// Filters a list of `FileEntry` items to find all files owned by a specific `Owner`. +/// +/// # Arguments +/// +/// * `files`: A slice of `FileEntry` structs, typically from a `CodeownersCache`. +/// * `owner`: An `Owner` reference to filter by. +/// +/// # Returns +/// +/// Returns a `Vec` containing the paths of all files in the input list +/// that have the specified `owner` in their `owners` list. pub fn find_files_for_owner(files: &[FileEntry], owner: &Owner) -> Vec { files .iter() @@ -324,7 +438,17 @@ pub fn find_files_for_owner(files: &[FileEntry], owner: &Owner) -> Vec .collect() } -/// Find all files tagged with a specific tag +/// Filters a list of `FileEntry` items to find all files tagged with a specific `Tag`. +/// +/// # Arguments +/// +/// * `files`: A slice of `FileEntry` structs, typically from a `CodeownersCache`. +/// * `tag`: A `Tag` reference to filter by. +/// +/// # Returns +/// +/// Returns a `Vec` containing the paths of all files in the input list +/// that have the specified `tag` in their `tags` list. pub fn find_files_for_tag(files: &[FileEntry], tag: &Tag) -> Vec { files .iter() @@ -333,9 +457,21 @@ pub fn find_files_for_tag(files: &[FileEntry], tag: &Tag) -> Vec { .collect() } -/// Collect all unique owners from CODEOWNERS entries +/// Collects all unique `Owner` instances from a list of `CodeownersEntry` items. +/// +/// This function iterates through all provided entries and their associated owners, +/// adding each owner to a `HashSet` to ensure uniqueness. The resulting set is then +/// converted into a vector. +/// +/// # Arguments +/// +/// * `entries`: A slice of `CodeownersEntry` structs. +/// +/// # Returns +/// +/// Returns a `Vec` containing all unique owners found across all entries. pub fn collect_owners(entries: &[CodeownersEntry]) -> Vec { - let mut owners = std::collections::HashSet::new(); + let mut owners = std::collections::HashSet::new(); // Use HashSet for automatic deduplication for entry in entries { for owner in &entry.owners { @@ -346,9 +482,21 @@ pub fn collect_owners(entries: &[CodeownersEntry]) -> Vec { owners.into_iter().collect() } -/// Collect all unique tags from CODEOWNERS entries +/// Collects all unique `Tag` instances from a list of `CodeownersEntry` items. +/// +/// This function iterates through all provided entries and their associated tags, +/// adding each tag to a `HashSet` to ensure uniqueness. The resulting set is then +/// converted into a vector. +/// +/// # Arguments +/// +/// * `entries`: A slice of `CodeownersEntry` structs. +/// +/// # Returns +/// +/// Returns a `Vec` containing all unique tags found across all entries. pub fn collect_tags(entries: &[CodeownersEntry]) -> Vec { - let mut tags = std::collections::HashSet::new(); + let mut tags = std::collections::HashSet::new(); // Use HashSet for automatic deduplication for entry in entries { for tag in &entry.tags { @@ -359,10 +507,80 @@ pub fn collect_tags(entries: &[CodeownersEntry]) -> Vec { tags.into_iter().collect() } +/// Calculates a hash representing the state of a Git repository. +/// +/// This hash is intended for cache validation to detect changes in the repository +/// that might invalidate a previously generated `CodeownersCache`. The hash incorporates: +/// 1. The OID (hash) of the commit pointed to by HEAD (or a zero OID if HEAD is unborn). +/// 2. The OID of the tree object representing the Git index (staging area). +/// 3. A hash of the diff between the index and the working directory (unstaged changes), +/// including untracked files. +/// **Note**: The current implementation of hashing unstaged changes has a TODO regarding +/// its correctness and the exclusion of the cache file itself from this hash. +/// +/// These components are combined using SHA-256 to produce a final 32-byte hash. +/// +/// # Arguments +/// +/// * `repo_path`: A `Path` reference to the root of the Git repository. +/// +/// # Returns +/// +/// Returns a `Result` containing a 32-byte array (`[u8; 32]`) representing the +/// repository state hash. An `Error` can occur if the repository cannot be opened, +/// the index cannot be accessed, or diffing fails. pub fn get_repo_hash(repo_path: &Path) -> Result<[u8; 32]> { let repo = Repository::open(repo_path) .map_err(|e| Error::with_source("Failed to open repo", Box::new(e)))?; + // 1. Get HEAD commit OID (or zeros if unborn/detached HEAD with no commit) + let head_oid = repo + .head() // Get a reference to HEAD + .and_then(|r| r.resolve()) // Resolve symbolic refs like 'refs/heads/main' to a direct ref + .and_then(|r| Ok(r.target())) // Get the OID of the commit pointed to by the resolved ref + .unwrap_or(None); // If any step fails (e.g., unborn HEAD), default to None + + // 2. Get index/staging area tree OID + let mut index = repo + .index() + .map_err(|e| Error::with_source("Failed to get index", Box::new(e)))?; + + let index_tree_oid = index // OID of the tree object representing the index + .write_tree() // Writes the current index as a tree object to the ODB, returns its OID + .map_err(|e| Error::with_source("Failed to write index tree", Box::new(e)))?; + + // 3. Calculate hash of unstaged changes (workdir vs index), including untracked files. + // TODO: This part needs careful review. + // - Excluding `.codeowners.cache` (or the configured cache file) is crucial to prevent the hash + // from changing simply because the cache was updated by this tool. + // - The method of hashing the diff patch might be sensitive to git configuration (e.g., whitespace). + // A more robust approach might involve hashing specific attributes of diff entries (paths, modes, OIDs of blobs for modified files). + let unstaged_hash = { + let diff = repo + .diff_index_to_workdir(None, Some(DiffOptions::new().include_untracked(true))) // Diff index to workdir + .map_err(|e| Error::with_source("Failed to get diff", Box::new(e)))?; + + let mut hasher = Sha256::new(); + diff.print(DiffFormat::Patch, |_, _, line| { // Iterate over lines in the patch + hasher.update(line.content()); // Add line content to hash + true // Continue processing + }) + .map_err(|e| Error::with_source("Failed to print diff (for hashing)", Box::new(e)))?; + hasher.finalize() + }; + + // 4. Combine all components into a final hash + let mut final_hasher = Sha256::new(); + final_hasher.update(head_oid.unwrap_or(git2::Oid::zero()).as_bytes()); // Use zero OID if head_oid is None + final_hasher.update(index_tree_oid.as_bytes()); + final_hasher.update(&unstaged_hash); + + Ok(final_hasher.finalize().into()) // Convert GenericArray to [u8; 32] +} + +#[cfg(test)] +mod tests { + // 1. Get HEAD commit hash (or zeros if unborn) let head_oid = repo .head() diff --git a/src/core/mod.rs b/src/core/mod.rs index c76acbc..7d16902 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -6,6 +6,26 @@ pub mod types; use crate::utils::error::Result; +// Module-level comments +//! # Core Logic Module +//! +//! This module (`core/mod.rs`) serves as the central hub for the application's +//! core business logic. It re-exports and organizes functionalities from its submodules: +//! +//! - `cache`: Handles caching mechanisms, likely for CODEOWNERS data or other processed information. +//! - `commands`: Contains the implementations for the various CLI commands. +//! - `common`: Provides shared utilities or data structures used across the core module. +//! - `parse`: Implements parsing logic, especially for CODEOWNERS files. +//! - `types`: Defines core data types and structures used throughout the application. + +/// Placeholder function, currently does nothing. +/// +/// This function is intended to be an entry point or initialization routine for +/// core functionalities, but it is not yet implemented. +/// +/// # Returns +/// +/// Returns `Ok(())` indicating successful execution (though it performs no actions). pub fn start() -> Result<()> { // does nothing diff --git a/src/core/parse.rs b/src/core/parse.rs index 8890a64..7a80bb2 100644 --- a/src/core/parse.rs +++ b/src/core/parse.rs @@ -1,3 +1,19 @@ +// Module-level comments +//! # Repository Parsing Orchestration +//! +//! This module is responsible for the high-level orchestration of parsing a repository +//! to gather `CODEOWNERS` information and build a cache. It leverages utility functions +//! from `crate::core::common` for discovering and parsing individual `CODEOWNERS` files, +//! and `crate::core::cache` for building and storing the resulting `CodeownersCache`. +//! +//! The main entry point here, `parse_repo`, encapsulates the end-to-end process of: +//! 1. Finding all `CODEOWNERS` files within the repository. +//! 2. Parsing each of these files into `CodeownersEntry` objects. +//! 3. Discovering all other relevant files in the repository. +//! 4. Calculating a hash of the repository's current state. +//! 5. Building a `CodeownersCache` containing all this information. +//! 6. Storing the cache to a specified file. + use crate::utils::error::Result; use super::{ @@ -6,6 +22,42 @@ use super::{ types::{CacheEncoding, CodeownersCache, CodeownersEntry}, }; +/// Parses all `CODEOWNERS` files in a given repository, builds a cache, and stores it. +/// +/// This function orchestrates the entire process of analyzing a repository for `CODEOWNERS` +/// information. It performs the following steps: +/// +/// 1. **Find `CODEOWNERS` Files**: Recursively searches the `repo` path for all files named `CODEOWNERS` +/// using `find_codeowners_files`. +/// 2. **Parse `CODEOWNERS` Files**: Each found `CODEOWNERS` file is parsed into a collection of +/// `CodeownersEntry` structs using `parse_codeowners`. Entries from all files are aggregated. +/// 3. **Find All Project Files**: All other files within the `repo` path (excluding `CODEOWNERS` +/// files themselves) are listed using `find_files`. +/// 4. **Calculate Repository Hash**: A hash representing the current state of the repository +/// (HEAD commit, index, unstaged changes) is calculated using `get_repo_hash`. This hash +/// is stored in the cache for validation purposes. +/// 5. **Build Cache**: A `CodeownersCache` is constructed using `build_cache`, containing the +/// parsed entries, the list of all project files, and the repository hash. +/// 6. **Store Cache**: The newly built cache is serialized (currently using `Bincode` encoding) +/// and written to the location specified by `cache_file` (relative to the `repo` path) +/// using `store_cache`. +/// +/// Informational messages are printed to the console during the process. +/// +/// # Arguments +/// +/// * `repo`: A `Path` reference to the root directory of the repository to be parsed. +/// * `cache_file`: A `Path` reference to the file where the generated cache should be stored. +/// This path is typically relative to the `repo` path (e.g., ".codeowners.cache"). +/// +/// # Returns +/// +/// Returns a `Result` containing the newly built `CodeownersCache` on success. +/// An `Error` is returned if any critical step fails, such as: +/// - Failure to read directories or files. +/// - Errors during the parsing of `CODEOWNERS` files. +/// - Inability to calculate the repository hash. +/// - Errors during cache building or storage. pub fn parse_repo(repo: &std::path::Path, cache_file: &std::path::Path) -> Result { println!("Parsing CODEOWNERS files at {}", repo.display()); diff --git a/src/core/types.rs b/src/core/types.rs index 870068d..4b5f0f1 100644 --- a/src/core/types.rs +++ b/src/core/types.rs @@ -2,30 +2,67 @@ use std::path::PathBuf; use serde::{Deserialize, Serialize}; -/// CODEOWNERS entry with source tracking +// Module-level comments +//! # Core Data Types +//! +//! This module defines the primary data structures and enumerations used throughout the +//! core logic of the application. These types are essential for representing `CODEOWNERS` +//! file rules, ownership information, cached data, and various configuration options +//! like output formats and cache encodings. +//! +//! The main types include: +//! - `CodeownersEntry`: Represents a single rule from a `CODEOWNERS` file. +//! - `Owner`: Details an owner, including their identifier and type. +//! - `OwnerType`: Categorizes owners (e.g., User, Team, Email). +//! - `Tag`: Represents a tag associated with a `CODEOWNERS` rule. +//! - `FileEntry`: Stores ownership and tag information for a specific file path in the cache. +//! - `CodeownersCache`: The main cache structure holding parsed entries, file ownership data, +//! and aggregated lookup maps for owners and tags. +//! - `OutputFormat`: Enum for specifying how command output should be formatted. +//! - `CacheEncoding`: Enum for specifying the serialization format for the cache. + +/// Represents a single parsed entry (rule) from a `CODEOWNERS` file. +/// +/// Each entry links a file pattern to a set of owners and tags, and also tracks +/// its origin (source file and line number) for better traceability and debugging. #[derive(Debug, Serialize, Deserialize)] pub struct CodeownersEntry { + /// The path to the `CODEOWNERS` file from which this entry was parsed. pub source_file: PathBuf, + /// The line number in the `source_file` where this entry was defined. pub line_number: usize, + /// The file path pattern (e.g., `*.rs`, `/docs/`) that this rule applies to. pub pattern: String, + /// A list of `Owner`s associated with this pattern. pub owners: Vec, + /// A list of `Tag`s associated with this pattern. pub tags: Vec, } -/// Detailed owner representation +/// Represents an owner, identified by a string and classified by an `OwnerType`. +/// +/// Owners can be individuals (users, emails) or groups (teams). #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)] pub struct Owner { + /// The unique identifier for the owner. + /// Examples: `@username`, `@org/team-name`, `user@example.com`. pub identifier: String, + /// The type of the owner (e.g., User, Team, Email). pub owner_type: OwnerType, } -/// Owner type classification +/// Enumerates the different types of owners that can be specified in a `CODEOWNERS` file. #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)] pub enum OwnerType { + /// A GitHub user, typically prefixed with `@` (e.g., `@username`). User, + /// A GitHub team, typically in the format `@organization/team-name`. Team, + /// An email address. Email, + /// Indicates that a pattern has no designated owner (e.g., using `NOOWNER`). Unowned, + /// The owner type could not be determined from the identifier string. Unknown, } @@ -41,14 +78,26 @@ impl std::fmt::Display for OwnerType { } } -/// Tag representation +/// Represents a tag associated with a `CODEOWNERS` rule. +/// +/// Tags are simple string identifiers, often used for categorization or +/// additional metadata (e.g., `#frontend`, `#security`). The leading `#` +/// is part of the tag's representation in the file but is typically stripped +/// for storage in this struct. #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)] -pub struct Tag(pub String); +pub struct Tag( + /// The name of the tag (e.g., "frontend", "security"). + pub String, +); +/// Specifies the desired format for command output. #[derive(Clone, Debug, Eq, PartialEq)] pub enum OutputFormat { + /// Human-readable plain text, often formatted as a table. Text, + /// Machine-readable JSON format. Json, + /// Machine-readable Bincode format (binary serialization). Bincode, } @@ -63,22 +112,45 @@ impl std::fmt::Display for OutputFormat { } // Cache related types -/// File entry in the ownership cache + +/// Represents a file within the repository and its associated owners and tags, +/// as determined by the `CODEOWNERS` rules. This struct is primarily used within +/// the `CodeownersCache`. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FileEntry { + /// The path to the file within the repository. pub path: PathBuf, + /// A list of `Owner`s determined for this file. pub owners: Vec, + /// A list of `Tag`s determined for this file. pub tags: Vec, } -/// Cache for storing parsed CODEOWNERS information +/// The main cache structure for storing processed `CODEOWNERS` data. +/// +/// This cache holds all parsed entries, a list of all files with their resolved +/// ownership, and pre-computed maps for quick lookups of files by owner or tag. +/// It also stores a repository hash to help determine if the cache is stale. +/// +/// Note: This struct has custom `Serialize` and `Deserialize` implementations +/// to handle the `HashMap` fields correctly, as `serde`'s default derive +/// might not be optimal or directly applicable for all map key types without +/// specific feature flags or wrapper types. #[derive(Debug)] pub struct CodeownersCache { + /// A hash representing the state of the repository when the cache was built. + /// Used to validate cache freshness. pub hash: [u8; 32], + /// A list of all original `CodeownersEntry` items parsed from all `CODEOWNERS` files. pub entries: Vec, + /// A list of `FileEntry` items, where each entry details a specific file's path, + /// its resolved owners, and its resolved tags. pub files: Vec, - // Derived data for lookups + /// A map where keys are `Owner`s and values are lists of `PathBuf`s + /// representing the files owned by that owner. This is derived data for quick lookups. pub owners_map: std::collections::HashMap>, + /// A map where keys are `Tag`s and values are lists of `PathBuf`s + /// representing the files associated with that tag. This is derived data for quick lookups. pub tags_map: std::collections::HashMap>, } @@ -144,8 +216,11 @@ impl<'de> Deserialize<'de> for CodeownersCache { } } +/// Specifies the encoding format for serializing and deserializing the `CodeownersCache`. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum CacheEncoding { + /// Bincode: A compact binary serialization format. Bincode, + /// Json: A human-readable JSON format. Json, } diff --git a/src/lib.rs b/src/lib.rs index 0284a01..97b826d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,9 @@ +//! # Library Crate Root +//! +//! This file (`lib.rs`) serves as the root of the library crate. +//! Its primary purpose is to declare the module structure of the application, +//! making various parts of the application accessible under a common crate namespace. + pub(crate) mod cli; pub(crate) mod core; pub(crate) mod utils; diff --git a/src/main.rs b/src/main.rs index 2399d59..294a012 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,15 @@ use human_panic::setup_panic; #[cfg(debug_assertions)] extern crate better_panic; +// Module-level comments +//! # Main Entry Point +//! +//! This file serves as the main entry point for the application. It is responsible for: +//! - Setting up panic handlers for improved error reporting. +//! - Initializing the logging system. +//! - Loading and initializing application configuration. +//! - Parsing and matching command-line arguments to execute corresponding commands. + pub(crate) mod cli; pub(crate) mod core; pub(crate) mod utils; @@ -12,6 +21,14 @@ use crate::utils::app_config::AppConfig; use crate::utils::error::Result; /// The main entry point of the application. +/// +/// This function orchestrates the startup of the application. Its key responsibilities include: +/// - Setting up panic handlers: `human_panic` for release builds and `better_panic` for debug builds. +/// - Initializing the logging infrastructure using `utils::logger::setup_logging`. +/// - Loading the application's configuration from `resources/default_config.toml` +/// and initializing the `AppConfig`. +/// - Parsing command-line arguments and dispatching to the appropriate command handlers +/// via `cli::cli_match`. fn main() -> Result<()> { // Human Panic. Only enabled when *not* debugging. #[cfg(not(debug_assertions))]