From b082da65bf1991a87ca1d8eaf4e670c17ea004de Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 7 Dec 2024 00:41:08 +0100 Subject: [PATCH 1/8] stat: fix the quotes when dealing with %N and other formats should fix tests/stat/stat-fmt.sh --- src/uu/stat/src/stat.rs | 65 ++++++++++++++++++++++++++++++++++---- tests/by-util/test_stat.rs | 21 ++++++++++++ 2 files changed, 80 insertions(+), 6 deletions(-) diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index ee417834461..31c540b1301 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -19,10 +19,10 @@ use chrono::{DateTime, Local}; use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; use std::borrow::Cow; use std::ffi::{OsStr, OsString}; -use std::fs; use std::os::unix::fs::{FileTypeExt, MetadataExt}; use std::os::unix::prelude::OsStrExt; use std::path::Path; +use std::{env, fs}; const ABOUT: &str = help_about!("stat.md"); const USAGE: &str = help_usage!("stat.md"); @@ -293,6 +293,21 @@ fn print_str(s: &str, flags: &Flags, width: usize, precision: Option) { pad_and_print(s, flags.left, width, Padding::Space); } +fn quote_file_name(file_name: &str, quoting_style: &str) -> String { + match quoting_style { + "locale" | "shell" => { + // Escape single quotes and enclose in single quotes + let escaped = file_name.replace('\'', r"\'"); + format!("'{}'", escaped) + } + "default" => { + format!("\"{}\"", file_name) + } + "quote" => file_name.to_string(), + _ => file_name.quote().to_string(), + } +} + /// Prints an integer value based on the provided flags, width, and precision. /// /// # Arguments @@ -431,12 +446,10 @@ impl Stater { ' ' => flag.space = true, '+' => flag.sign = true, '\'' => flag.group = true, - 'I' => unimplemented!(), _ => break, } *i += 1; } - check_bound(format_str, bound, old, *i)?; let mut width = 0; let mut precision = None; @@ -465,9 +478,27 @@ impl Stater { } *i = j; + + // Check for multi-character specifiers (e.g., `%Hd`, `%Lr`) + if *i + 1 < bound { + if let Some(&next_char) = chars.get(*i + 1) { + if (chars[*i] == 'H' || chars[*i] == 'L') && (next_char == 'd' || next_char == 'r') + { + let specifier = format!("{}{}", chars[*i], next_char); + *i += 1; + return Ok(Token::Directive { + flag, + width, + precision, + format: specifier.chars().next().unwrap(), + }); + } + } + } + Ok(Token::Directive { - width, flag, + width, precision, format: chars[*i], }) @@ -778,6 +809,8 @@ impl Stater { 'n' => OutputType::Str(display_name.to_string()), // quoted file name with dereference if symbolic link 'N' => { + let quoting_style = env::var("QUOTING_STYLE") + .unwrap_or_else(|_| "default".to_string()); let file_name = if file_type.is_symlink() { let dst = match fs::read_link(&file) { Ok(path) => path, @@ -786,12 +819,24 @@ impl Stater { return 1; } }; - format!("{} -> {}", display_name.quote(), dst.quote()) + format!( + "{} -> {}", + quote_file_name(&display_name, "ing_style), + quote_file_name( + &dst.to_string_lossy(), + "ing_style + ) + ) } else { - display_name.to_string() + if self.from_user { + quote_file_name(&display_name, "ing_style) + } else { + quote_file_name(&display_name, "quote") + } }; OutputType::Str(file_name) } + // optimal I/O transfer size hint 'o' => OutputType::Unsigned(meta.blksize()), // total size, in bytes @@ -842,6 +887,14 @@ impl Stater { )), // time of last status change, seconds since Epoch 'Z' => OutputType::Integer(meta.ctime()), + 'R' => { + let major = meta.rdev() >> 8; + let minor = meta.rdev() & 0xff; + OutputType::Str(format!("{},{}", major, minor)) + } + 'r' => OutputType::Unsigned(meta.rdev()), + 'H' => OutputType::Unsigned(meta.rdev() >> 8), // Major in decimal + 'L' => OutputType::Unsigned(meta.rdev() & 0xff), // Minor in decimal _ => OutputType::Unknown, }; diff --git a/tests/by-util/test_stat.rs b/tests/by-util/test_stat.rs index 8cb4493f0eb..002f9ee2293 100644 --- a/tests/by-util/test_stat.rs +++ b/tests/by-util/test_stat.rs @@ -352,3 +352,24 @@ fn test_without_argument() { .fails() .stderr_contains("missing operand\nTry 'stat --help' for more information."); } + +#[test] +fn test_quoting_style_locale() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + at.touch("'"); + ts.ucmd() + .env("QUOTING_STYLE", "locale") + .args(&["-c", "%N", "'"]) + .run() + .no_stderr() + .stdout_is("'\\''\n") + .succeeded(); + + ts.ucmd() + .args(&["-c", "%N", "'"]) + .run() + .no_stderr() + .stdout_is("\"'\"\n") + .succeeded(); +} From 25bae640cdb08cf72abbb9d4422e5deb6ec4ccce Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 7 Dec 2024 00:56:40 +0100 Subject: [PATCH 2/8] stats: use an enum instead of a string --- src/uu/stat/src/stat.rs | 69 +++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 24 deletions(-) diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index 31c540b1301..d5cd5f4b7b4 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -93,6 +93,26 @@ pub enum OutputType { Unknown, } +enum QuotingStyle { + Locale, + Shell, + Default, + Quote, +} + +impl std::str::FromStr for QuotingStyle { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "locale" => Ok(QuotingStyle::Locale), + "shell" => Ok(QuotingStyle::Shell), + // The others aren't exposed to the user + _ => Err(format!("Invalid quoting style: {}", s)), + } + } +} + #[derive(Debug, PartialEq, Eq)] enum Token { Char(char), @@ -293,18 +313,14 @@ fn print_str(s: &str, flags: &Flags, width: usize, precision: Option) { pad_and_print(s, flags.left, width, Padding::Space); } -fn quote_file_name(file_name: &str, quoting_style: &str) -> String { +fn quote_file_name(file_name: &str, quoting_style: &QuotingStyle) -> String { match quoting_style { - "locale" | "shell" => { - // Escape single quotes and enclose in single quotes + QuotingStyle::Locale | QuotingStyle::Shell => { let escaped = file_name.replace('\'', r"\'"); format!("'{}'", escaped) } - "default" => { - format!("\"{}\"", file_name) - } - "quote" => file_name.to_string(), - _ => file_name.quote().to_string(), + QuotingStyle::Default => format!("\"{}\"", file_name), + QuotingStyle::Quote => file_name.to_string(), } } @@ -810,30 +826,35 @@ impl Stater { // quoted file name with dereference if symbolic link 'N' => { let quoting_style = env::var("QUOTING_STYLE") - .unwrap_or_else(|_| "default".to_string()); + .ok() + .and_then(|style| style.parse().ok()) + .unwrap_or(QuotingStyle::Default); + let file_name = if file_type.is_symlink() { - let dst = match fs::read_link(&file) { - Ok(path) => path, + let quoted_display_name = + quote_file_name(&display_name, "ing_style); + match fs::read_link(&file) { + Ok(dst) => { + let quoted_dst = quote_file_name( + &dst.to_string_lossy(), + "ing_style, + ); + format!("{quoted_display_name} -> {quoted_dst}") + } Err(e) => { println!("{e}"); return 1; } - }; - format!( - "{} -> {}", - quote_file_name(&display_name, "ing_style), - quote_file_name( - &dst.to_string_lossy(), - "ing_style - ) - ) + } } else { - if self.from_user { - quote_file_name(&display_name, "ing_style) + let style = if self.from_user { + quoting_style } else { - quote_file_name(&display_name, "quote") - } + QuotingStyle::Quote + }; + quote_file_name(&display_name, &style) }; + OutputType::Str(file_name) } From 5433c5abe404e477810d8936eb0ec7060f6afec4 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 7 Dec 2024 01:38:14 +0100 Subject: [PATCH 3/8] stats: split the functions into smaller functions --- src/uu/stat/src/stat.rs | 425 +++++++++++++++++++++------------------- 1 file changed, 226 insertions(+), 199 deletions(-) diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index d5cd5f4b7b4..4093691c0cd 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -9,7 +9,9 @@ use uucore::error::{UResult, USimpleError}; use clap::builder::ValueParser; use uucore::display::Quotable; use uucore::fs::display_permissions; -use uucore::fsext::{pretty_filetype, pretty_fstype, read_fs_list, statfs, BirthTime, FsMeta}; +use uucore::fsext::{ + pretty_filetype, pretty_fstype, read_fs_list, statfs, BirthTime, FsMeta, StatFs, +}; use uucore::libc::mode_t; use uucore::{ entries, format_usage, help_about, help_section, help_usage, show_error, show_warning, @@ -19,6 +21,7 @@ use chrono::{DateTime, Local}; use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; use std::borrow::Cow; use std::ffi::{OsStr, OsString}; +use std::fs::{FileType, Metadata}; use std::os::unix::fs::{FileTypeExt, MetadataExt}; use std::os::unix::prelude::OsStrExt; use std::path::Path; @@ -324,6 +327,81 @@ fn quote_file_name(file_name: &str, quoting_style: &QuotingStyle) -> String { } } +fn get_quoted_file_name( + display_name: &str, + file: &OsString, + file_type: &FileType, + from_user: bool, +) -> Result { + let quoting_style = env::var("QUOTING_STYLE") + .ok() + .and_then(|style| style.parse().ok()) + .unwrap_or(QuotingStyle::Default); + + if file_type.is_symlink() { + let quoted_display_name = quote_file_name(display_name, "ing_style); + match fs::read_link(file) { + Ok(dst) => { + let quoted_dst = quote_file_name(&dst.to_string_lossy(), "ing_style); + Ok(format!("{quoted_display_name} -> {quoted_dst}")) + } + Err(e) => { + show_error!("{e}"); + Err(1) + } + } + } else { + let style = if from_user { + quoting_style + } else { + QuotingStyle::Quote + }; + Ok(quote_file_name(display_name, &style)) + } +} + +fn process_token_fs(t: &Token, meta: StatFs, display_name: &str) { + match *t { + Token::Char(c) => print!("{c}"), + Token::Directive { + flag, + width, + precision, + format, + } => { + let output = match format { + // free blocks available to non-superuser + 'a' => OutputType::Unsigned(meta.avail_blocks()), + // total data blocks in file system + 'b' => OutputType::Unsigned(meta.total_blocks()), + // total file nodes in file system + 'c' => OutputType::Unsigned(meta.total_file_nodes()), + // free file nodes in file system + 'd' => OutputType::Unsigned(meta.free_file_nodes()), + // free blocks in file system + 'f' => OutputType::Unsigned(meta.free_blocks()), + // file system ID in hex + 'i' => OutputType::UnsignedHex(meta.fsid()), + // maximum length of filenames + 'l' => OutputType::Unsigned(meta.namelen()), + // file name + 'n' => OutputType::Str(display_name.to_string()), + // block size (for faster transfers) + 's' => OutputType::Unsigned(meta.io_size()), + // fundamental block size (for block counts) + 'S' => OutputType::Integer(meta.block_size()), + // file system type in hex + 't' => OutputType::UnsignedHex(meta.fs_type() as u64), + // file system type in human readable form + 'T' => OutputType::Str(pretty_fstype(meta.fs_type()).into()), + _ => OutputType::Unknown, + }; + + print_it(&output, flag, width, precision); + } + } +} + /// Prints an integer value based on the provided flags, width, and precision. /// /// # Arguments @@ -435,6 +513,21 @@ fn print_unsigned_hex( } impl Stater { + fn process_flags(chars: &[char], i: &mut usize, bound: usize, flag: &mut Flags) { + while *i < bound { + match chars[*i] { + '#' => flag.alter = true, + '0' => flag.zero = true, + '-' => flag.left = true, + ' ' => flag.space = true, + '+' => flag.sign = true, + '\'' => flag.group = true, + _ => break, + } + *i += 1; + } + } + fn handle_percent_case( chars: &[char], i: &mut usize, @@ -454,18 +547,7 @@ impl Stater { let mut flag = Flags::default(); - while *i < bound { - match chars[*i] { - '#' => flag.alter = true, - '0' => flag.zero = true, - '-' => flag.left = true, - ' ' => flag.space = true, - '+' => flag.sign = true, - '\'' => flag.group = true, - _ => break, - } - *i += 1; - } + Self::process_flags(chars, i, bound, &mut flag); let mut width = 0; let mut precision = None; @@ -681,7 +763,126 @@ impl Stater { ret } - #[allow(clippy::cognitive_complexity)] + fn process_token_files( + &self, + t: &Token, + meta: &Metadata, + display_name: &str, + file: &OsString, + file_type: &FileType, + from_user: bool, + ) -> Result<(), i32> { + match *t { + Token::Char(c) => print!("{c}"), + Token::Directive { + flag, + width, + precision, + format, + } => { + let output = match format { + // access rights in octal + 'a' => OutputType::UnsignedOct(0o7777 & meta.mode()), + // access rights in human readable form + 'A' => OutputType::Str(display_permissions(meta, true)), + // number of blocks allocated (see %B) + 'b' => OutputType::Unsigned(meta.blocks()), + + // the size in bytes of each block reported by %b + // FIXME: blocksize differs on various platform + // See coreutils/gnulib/lib/stat-size.h ST_NBLOCKSIZE // spell-checker:disable-line + 'B' => OutputType::Unsigned(512), + + // device number in decimal + 'd' => OutputType::Unsigned(meta.dev()), + // device number in hex + 'D' => OutputType::UnsignedHex(meta.dev()), + // raw mode in hex + 'f' => OutputType::UnsignedHex(meta.mode() as u64), + // file type + 'F' => OutputType::Str( + pretty_filetype(meta.mode() as mode_t, meta.len()).to_owned(), + ), + // group ID of owner + 'g' => OutputType::Unsigned(meta.gid() as u64), + // group name of owner + 'G' => { + let group_name = + entries::gid2grp(meta.gid()).unwrap_or_else(|_| "UNKNOWN".to_owned()); + OutputType::Str(group_name) + } + // number of hard links + 'h' => OutputType::Unsigned(meta.nlink()), + // inode number + 'i' => OutputType::Unsigned(meta.ino()), + // mount point + 'm' => OutputType::Str(self.find_mount_point(file).unwrap()), + // file name + 'n' => OutputType::Str(display_name.to_string()), + // quoted file name with dereference if symbolic link + 'N' => { + let file_name = + get_quoted_file_name(display_name, file, file_type, from_user)?; + OutputType::Str(file_name) + } + // optimal I/O transfer size hint + 'o' => OutputType::Unsigned(meta.blksize()), + // total size, in bytes + 's' => OutputType::Integer(meta.len() as i64), + // major device type in hex, for character/block device special + // files + 't' => OutputType::UnsignedHex(meta.rdev() >> 8), + // minor device type in hex, for character/block device special + // files + 'T' => OutputType::UnsignedHex(meta.rdev() & 0xff), + // user ID of owner + 'u' => OutputType::Unsigned(meta.uid() as u64), + // user name of owner + 'U' => { + let user_name = + entries::uid2usr(meta.uid()).unwrap_or_else(|_| "UNKNOWN".to_owned()); + OutputType::Str(user_name) + } + + // time of file birth, human-readable; - if unknown + 'w' => OutputType::Str( + meta.birth() + .map(|(sec, nsec)| pretty_time(sec as i64, nsec as i64)) + .unwrap_or(String::from("-")), + ), + + // time of file birth, seconds since Epoch; 0 if unknown + 'W' => OutputType::Unsigned(meta.birth().unwrap_or_default().0), + + // time of last access, human-readable + 'x' => OutputType::Str(pretty_time(meta.atime(), meta.atime_nsec())), + // time of last access, seconds since Epoch + 'X' => OutputType::Integer(meta.atime()), + // time of last data modification, human-readable + 'y' => OutputType::Str(pretty_time(meta.mtime(), meta.mtime_nsec())), + // time of last data modification, seconds since Epoch + 'Y' => OutputType::Integer(meta.mtime()), + // time of last status change, human-readable + 'z' => OutputType::Str(pretty_time(meta.ctime(), meta.ctime_nsec())), + // time of last status change, seconds since Epoch + 'Z' => OutputType::Integer(meta.ctime()), + 'R' => { + let major = meta.rdev() >> 8; + let minor = meta.rdev() & 0xff; + OutputType::Str(format!("{},{}", major, minor)) + } + 'r' => OutputType::Unsigned(meta.rdev()), + 'H' => OutputType::Unsigned(meta.rdev() >> 8), // Major in decimal + 'L' => OutputType::Unsigned(meta.rdev() & 0xff), // Minor in decimal + + _ => OutputType::Unknown, + }; + print_it(&output, flag, width, precision); + } + } + Ok(()) + } + fn do_stat(&self, file: &OsStr, stdin_is_fifo: bool) -> i32 { let display_name = file.to_string_lossy(); let file = if cfg!(unix) && display_name == "-" { @@ -706,46 +907,9 @@ impl Stater { Ok(meta) => { let tokens = &self.default_tokens; + // Usage for t in tokens { - match *t { - Token::Char(c) => print!("{c}"), - Token::Directive { - flag, - width, - precision, - format, - } => { - let output = match format { - // free blocks available to non-superuser - 'a' => OutputType::Unsigned(meta.avail_blocks()), - // total data blocks in file system - 'b' => OutputType::Unsigned(meta.total_blocks()), - // total file nodes in file system - 'c' => OutputType::Unsigned(meta.total_file_nodes()), - // free file nodes in file system - 'd' => OutputType::Unsigned(meta.free_file_nodes()), - // free blocks in file system - 'f' => OutputType::Unsigned(meta.free_blocks()), - // file system ID in hex - 'i' => OutputType::UnsignedHex(meta.fsid()), - // maximum length of filenames - 'l' => OutputType::Unsigned(meta.namelen()), - // file name - 'n' => OutputType::Str(display_name.to_string()), - // block size (for faster transfers) - 's' => OutputType::Unsigned(meta.io_size()), - // fundamental block size (for block counts) - 'S' => OutputType::Integer(meta.block_size()), - // file system type in hex - 't' => OutputType::UnsignedHex(meta.fs_type() as u64), - // file system type in human readable form - 'T' => OutputType::Str(pretty_fstype(meta.fs_type()).into()), - _ => OutputType::Unknown, - }; - - print_it(&output, flag, width, precision); - } - } + process_token_fs(t, meta, &display_name); } } Err(e) => { @@ -775,152 +939,15 @@ impl Stater { }; for t in tokens { - match *t { - Token::Char(c) => print!("{c}"), - Token::Directive { - flag, - width, - precision, - format, - } => { - let output = match format { - // access rights in octal - 'a' => OutputType::UnsignedOct(0o7777 & meta.mode()), - // access rights in human readable form - 'A' => OutputType::Str(display_permissions(&meta, true)), - // number of blocks allocated (see %B) - 'b' => OutputType::Unsigned(meta.blocks()), - - // the size in bytes of each block reported by %b - // FIXME: blocksize differs on various platform - // See coreutils/gnulib/lib/stat-size.h ST_NBLOCKSIZE // spell-checker:disable-line - 'B' => OutputType::Unsigned(512), - - // device number in decimal - 'd' => OutputType::Unsigned(meta.dev()), - // device number in hex - 'D' => OutputType::UnsignedHex(meta.dev()), - // raw mode in hex - 'f' => OutputType::UnsignedHex(meta.mode() as u64), - // file type - 'F' => OutputType::Str( - pretty_filetype(meta.mode() as mode_t, meta.len()) - .to_owned(), - ), - // group ID of owner - 'g' => OutputType::Unsigned(meta.gid() as u64), - // group name of owner - 'G' => { - let group_name = entries::gid2grp(meta.gid()) - .unwrap_or_else(|_| "UNKNOWN".to_owned()); - OutputType::Str(group_name) - } - // number of hard links - 'h' => OutputType::Unsigned(meta.nlink()), - // inode number - 'i' => OutputType::Unsigned(meta.ino()), - // mount point - 'm' => OutputType::Str(self.find_mount_point(&file).unwrap()), - // file name - 'n' => OutputType::Str(display_name.to_string()), - // quoted file name with dereference if symbolic link - 'N' => { - let quoting_style = env::var("QUOTING_STYLE") - .ok() - .and_then(|style| style.parse().ok()) - .unwrap_or(QuotingStyle::Default); - - let file_name = if file_type.is_symlink() { - let quoted_display_name = - quote_file_name(&display_name, "ing_style); - match fs::read_link(&file) { - Ok(dst) => { - let quoted_dst = quote_file_name( - &dst.to_string_lossy(), - "ing_style, - ); - format!("{quoted_display_name} -> {quoted_dst}") - } - Err(e) => { - println!("{e}"); - return 1; - } - } - } else { - let style = if self.from_user { - quoting_style - } else { - QuotingStyle::Quote - }; - quote_file_name(&display_name, &style) - }; - - OutputType::Str(file_name) - } - - // optimal I/O transfer size hint - 'o' => OutputType::Unsigned(meta.blksize()), - // total size, in bytes - 's' => OutputType::Integer(meta.len() as i64), - // major device type in hex, for character/block device special - // files - 't' => OutputType::UnsignedHex(meta.rdev() >> 8), - // minor device type in hex, for character/block device special - // files - 'T' => OutputType::UnsignedHex(meta.rdev() & 0xff), - // user ID of owner - 'u' => OutputType::Unsigned(meta.uid() as u64), - // user name of owner - 'U' => { - let user_name = entries::uid2usr(meta.uid()) - .unwrap_or_else(|_| "UNKNOWN".to_owned()); - OutputType::Str(user_name) - } - - // time of file birth, human-readable; - if unknown - 'w' => OutputType::Str( - meta.birth() - .map(|(sec, nsec)| pretty_time(sec as i64, nsec as i64)) - .unwrap_or(String::from("-")), - ), - - // time of file birth, seconds since Epoch; 0 if unknown - 'W' => OutputType::Unsigned(meta.birth().unwrap_or_default().0), - - // time of last access, human-readable - 'x' => OutputType::Str(pretty_time( - meta.atime(), - meta.atime_nsec(), - )), - // time of last access, seconds since Epoch - 'X' => OutputType::Integer(meta.atime()), - // time of last data modification, human-readable - 'y' => OutputType::Str(pretty_time( - meta.mtime(), - meta.mtime_nsec(), - )), - // time of last data modification, seconds since Epoch - 'Y' => OutputType::Integer(meta.mtime()), - // time of last status change, human-readable - 'z' => OutputType::Str(pretty_time( - meta.ctime(), - meta.ctime_nsec(), - )), - // time of last status change, seconds since Epoch - 'Z' => OutputType::Integer(meta.ctime()), - 'R' => { - let major = meta.rdev() >> 8; - let minor = meta.rdev() & 0xff; - OutputType::Str(format!("{},{}", major, minor)) - } - 'r' => OutputType::Unsigned(meta.rdev()), - 'H' => OutputType::Unsigned(meta.rdev() >> 8), // Major in decimal - 'L' => OutputType::Unsigned(meta.rdev() & 0xff), // Minor in decimal - - _ => OutputType::Unknown, - }; - print_it(&output, flag, width, precision); - } + if let Err(code) = self.process_token_files( + t, + &meta, + &display_name, + &file, + &file_type, + self.from_user, + ) { + return code; } } } From 853fc02999c18c0505623a2f619bfb64459c6267 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 7 Dec 2024 10:10:49 +0100 Subject: [PATCH 4/8] stat: handle byte as a format for better display --- src/uu/stat/src/stat.rs | 73 +++++++++++++++++++++++++------------- tests/by-util/test_stat.rs | 39 ++++++++++++++++++++ util/build-gnu.sh | 3 ++ 3 files changed, 91 insertions(+), 24 deletions(-) diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index 4093691c0cd..acfedbb8c7e 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -22,6 +22,7 @@ use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; use std::borrow::Cow; use std::ffi::{OsStr, OsString}; use std::fs::{FileType, Metadata}; +use std::io::Write; use std::os::unix::fs::{FileTypeExt, MetadataExt}; use std::os::unix::prelude::OsStrExt; use std::path::Path; @@ -119,6 +120,7 @@ impl std::str::FromStr for QuotingStyle { #[derive(Debug, PartialEq, Eq)] enum Token { Char(char), + Byte(u8), Directive { flag: Flags, width: usize, @@ -362,6 +364,7 @@ fn get_quoted_file_name( fn process_token_fs(t: &Token, meta: StatFs, display_name: &str) { match *t { + Token::Byte(byte) => write_raw_byte(byte), Token::Char(c) => print!("{c}"), Token::Directive { flag, @@ -512,6 +515,10 @@ fn print_unsigned_hex( pad_and_print(&s, flags.left, width, padding_char); } +fn write_raw_byte(byte: u8) { + std::io::stdout().write_all(&[byte]).unwrap(); +} + impl Stater { fn process_flags(chars: &[char], i: &mut usize, bound: usize, flag: &mut Flags) { while *i < bound { @@ -614,33 +621,49 @@ impl Stater { return Token::Char('\\'); } match chars[*i] { - 'x' if *i + 1 < bound => { - if let Some((c, offset)) = format_str[*i + 1..].scan_char(16) { - *i += offset; - Token::Char(c) - } else { - show_warning!("unrecognized escape '\\x'"); - Token::Char('x') + 'a' => Token::Byte(0x07), // BEL + 'b' => Token::Byte(0x08), // Backspace + 'f' => Token::Byte(0x0C), // Form feed + 'n' => Token::Byte(0x0A), // Line feed + 'r' => Token::Byte(0x0D), // Carriage return + 't' => Token::Byte(0x09), // Horizontal tab + '\\' => Token::Byte(b'\\'), // Backslash + '\'' => Token::Byte(b'\''), // Single quote + '"' => Token::Byte(b'"'), // Double quote + '0'..='7' => { + // Parse octal escape sequence (up to 3 digits) + let mut value = 0u8; + let mut count = 0; + while *i < bound && count < 3 { + if let Some(digit) = chars[*i].to_digit(8) { + value = value * 8 + digit as u8; + *i += 1; + count += 1; + } else { + break; + } } + *i -= 1; // Adjust index to account for the outer loop increment + Token::Byte(value) } - '0'..='7' => { - let (c, offset) = format_str[*i..].scan_char(8).unwrap(); - *i += offset - 1; - Token::Char(c) + 'x' => { + // Parse hexadecimal escape sequence + if *i + 1 < bound { + if let Some((c, offset)) = format_str[*i + 1..].scan_char(16) { + *i += offset; + Token::Byte(c as u8) + } else { + show_warning!("unrecognized escape '\\x'"); + Token::Byte(b'x') + } + } else { + show_warning!("incomplete hex escape '\\x'"); + Token::Byte(b'x') + } } - '"' => Token::Char('"'), - '\\' => Token::Char('\\'), - 'a' => Token::Char('\x07'), - 'b' => Token::Char('\x08'), - 'e' => Token::Char('\x1B'), - 'f' => Token::Char('\x0C'), - 'n' => Token::Char('\n'), - 'r' => Token::Char('\r'), - 't' => Token::Char('\t'), - 'v' => Token::Char('\x0B'), - c => { - show_warning!("unrecognized escape '\\{}'", c); - Token::Char(c) + other => { + show_warning!("unrecognized escape '\\{}'", other); + Token::Byte(other as u8) } } } @@ -773,7 +796,9 @@ impl Stater { from_user: bool, ) -> Result<(), i32> { match *t { + Token::Byte(byte) => write_raw_byte(byte), Token::Char(c) => print!("{c}"), + Token::Directive { flag, width, diff --git a/tests/by-util/test_stat.rs b/tests/by-util/test_stat.rs index 002f9ee2293..b4c37e07f1e 100644 --- a/tests/by-util/test_stat.rs +++ b/tests/by-util/test_stat.rs @@ -373,3 +373,42 @@ fn test_quoting_style_locale() { .stdout_is("\"'\"\n") .succeeded(); } + +#[test] +fn test_printf_octal_1() { + let ts = TestScenario::new(util_name!()); + let expected_stdout = vec![0x0A, 0xFF]; // Newline + byte 255 + ts.ucmd() + .args(&["--printf=\\012\\377", "."]) + .succeeds() + .stdout_is_bytes(expected_stdout); +} + +#[test] +fn test_printf_octal_2() { + let ts = TestScenario::new(util_name!()); + let expected_stdout = vec![b'.', 0x0A, b'a', 0xFF, b'b']; + ts.ucmd() + .args(&["--printf=.\\012a\\377b", "."]) + .succeeds() + .stdout_is_bytes(expected_stdout); +} + +#[test] +fn test_printf_hex_3() { + let ts = TestScenario::new(util_name!()); + ts.ucmd() + .args(&["--printf=\\x", "."]) + .run() + .stderr_contains("warning: incomplete hex escape"); +} + +#[test] +fn test_printf_bel_etc() { + let ts = TestScenario::new(util_name!()); + let expected_stdout = vec![0x07, 0x08, 0x0C, 0x0A, 0x0D, 0x09]; // BEL, BS, FF, LF, CR, TAB + ts.ucmd() + .args(&["--printf=\\a\\b\\f\\n\\r\\t", "."]) + .succeeds() + .stdout_is_bytes(expected_stdout); +} diff --git a/util/build-gnu.sh b/util/build-gnu.sh index 684187733de..e33e64429bf 100755 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -204,6 +204,9 @@ sed -i "s|cp: target directory 'symlink': Permission denied|cp: 'symlink' is not # Our message is a bit better sed -i "s|cannot create regular file 'no-such/': Not a directory|'no-such/' is not a directory|" tests/mv/trailing-slash.sh +# Our message is better +sed -i "s|warning: unrecognized escape|warning: incomplete hex escape|" tests/stat/stat-printf.pl + sed -i 's|cp |/usr/bin/cp |' tests/mv/hard-2.sh sed -i 's|paste |/usr/bin/paste |' tests/od/od-endian.sh sed -i 's|timeout |'"${SYSTEM_TIMEOUT}"' |' tests/tail/follow-stdin.sh From 828598dae85fee7c72e54ba3356b8267d6a4a819 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 7 Dec 2024 10:54:25 +0100 Subject: [PATCH 5/8] stat: handle error better. should make tests/stat/stat-printf.pl pass --- src/uu/stat/src/stat.rs | 9 +++++++++ tests/by-util/test_stat.rs | 17 +++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index acfedbb8c7e..b928bee85d7 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -563,6 +563,15 @@ impl Stater { if let Some((field_width, offset)) = format_str[j..].scan_num::() { width = field_width; j += offset; + + // Reject directives like `%` by checking if width has been parsed. + if j >= bound || chars[j] == '%' { + let invalid_directive: String = chars[old..=j.min(bound - 1)].iter().collect(); + return Err(USimpleError::new( + 1, + format!("{}: invalid directive", invalid_directive.quote()), + )); + } } check_bound(format_str, bound, old, j)?; diff --git a/tests/by-util/test_stat.rs b/tests/by-util/test_stat.rs index b4c37e07f1e..70efb1c7a8a 100644 --- a/tests/by-util/test_stat.rs +++ b/tests/by-util/test_stat.rs @@ -412,3 +412,20 @@ fn test_printf_bel_etc() { .succeeds() .stdout_is_bytes(expected_stdout); } + +#[test] +fn test_printf_invalid_directive() { + let ts = TestScenario::new(util_name!()); + + ts.ucmd() + .args(&["--printf=%9", "."]) + .fails() + .code_is(1) + .stderr_contains("'%9': invalid directive"); + + ts.ucmd() + .args(&["--printf=%9%", "."]) + .fails() + .code_is(1) + .stderr_contains("'%9%': invalid directive"); +} From 51b2211d36bb984f5c1283a0b9f189d98f833224 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 7 Dec 2024 21:48:30 +0100 Subject: [PATCH 6/8] stat: Some escape sequences are non-standard --- src/uu/stat/src/stat.rs | 30 +++++++++++++++--------------- tests/by-util/test_stat.rs | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index b928bee85d7..d68d63d5767 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -1173,7 +1173,7 @@ mod tests { #[test] fn printf_format() { - let s = r#"%-# 15a\t\r\"\\\a\b\e\f\v%+020.-23w\x12\167\132\112\n"#; + let s = r#"%-# 15a\t\r\"\\\a\b\x1B\f\x0B%+020.-23w\x12\167\132\112\n"#; let expected = vec![ Token::Directive { flag: Flags { @@ -1186,15 +1186,15 @@ mod tests { precision: None, format: 'a', }, - Token::Char('\t'), - Token::Char('\r'), - Token::Char('"'), - Token::Char('\\'), - Token::Char('\x07'), - Token::Char('\x08'), - Token::Char('\x1B'), - Token::Char('\x0C'), - Token::Char('\x0B'), + Token::Byte(b'\t'), + Token::Byte(b'\r'), + Token::Byte(b'"'), + Token::Byte(b'\\'), + Token::Byte(b'\x07'), + Token::Byte(b'\x08'), + Token::Byte(b'\x1B'), + Token::Byte(b'\x0C'), + Token::Byte(b'\x0B'), Token::Directive { flag: Flags { sign: true, @@ -1205,11 +1205,11 @@ mod tests { precision: None, format: 'w', }, - Token::Char('\x12'), - Token::Char('w'), - Token::Char('Z'), - Token::Char('J'), - Token::Char('\n'), + Token::Byte(b'\x12'), + Token::Byte(b'w'), + Token::Byte(b'Z'), + Token::Byte(b'J'), + Token::Byte(b'\n'), ]; assert_eq!(&expected, &Stater::generate_tokens(s, true).unwrap()); } diff --git a/tests/by-util/test_stat.rs b/tests/by-util/test_stat.rs index 70efb1c7a8a..7f6a5d446dc 100644 --- a/tests/by-util/test_stat.rs +++ b/tests/by-util/test_stat.rs @@ -242,7 +242,7 @@ fn test_multi_files() { #[test] fn test_printf() { let args = [ - "--printf=123%-# 15q\\r\\\"\\\\\\a\\b\\e\\f\\v%+020.23m\\x12\\167\\132\\112\\n", + "--printf=123%-# 15q\\r\\\"\\\\\\a\\b\\x1B\\f\\x0B%+020.23m\\x12\\167\\132\\112\\n", "/", ]; let ts = TestScenario::new(util_name!()); From 070bbe2624b4d68de3c1d175c0ed86b370464dfa Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 8 Dec 2024 17:32:27 +0100 Subject: [PATCH 7/8] Fix tests --- tests/by-util/test_stat.rs | 41 +++++++++++++++----------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/tests/by-util/test_stat.rs b/tests/by-util/test_stat.rs index 7f6a5d446dc..cbd36832f48 100644 --- a/tests/by-util/test_stat.rs +++ b/tests/by-util/test_stat.rs @@ -256,11 +256,10 @@ fn test_pipe_fifo() { let (at, mut ucmd) = at_and_ucmd!(); at.mkfifo("FIFO"); ucmd.arg("FIFO") - .run() + .succeeds() .no_stderr() .stdout_contains("fifo") - .stdout_contains("File: FIFO") - .succeeded(); + .stdout_contains("File: FIFO"); } #[test] @@ -275,19 +274,17 @@ fn test_stdin_pipe_fifo1() { new_ucmd!() .arg("-") .set_stdin(std::process::Stdio::piped()) - .run() + .succeeds() .no_stderr() .stdout_contains("fifo") - .stdout_contains("File: -") - .succeeded(); + .stdout_contains("File: -"); new_ucmd!() .args(&["-L", "-"]) .set_stdin(std::process::Stdio::piped()) - .run() + .succeeds() .no_stderr() .stdout_contains("fifo") - .stdout_contains("File: -") - .succeeded(); + .stdout_contains("File: -"); } #[test] @@ -299,11 +296,10 @@ fn test_stdin_pipe_fifo2() { new_ucmd!() .arg("-") .set_stdin(std::process::Stdio::null()) - .run() + .succeeds() .no_stderr() .stdout_contains("character special file") - .stdout_contains("File: -") - .succeeded(); + .stdout_contains("File: -"); } #[test] @@ -339,11 +335,10 @@ fn test_stdin_redirect() { ts.ucmd() .arg("-") .set_stdin(std::fs::File::open(at.plus("f")).unwrap()) - .run() + .succeeds() .no_stderr() .stdout_contains("regular empty file") - .stdout_contains("File: -") - .succeeded(); + .stdout_contains("File: -"); } #[test] @@ -361,17 +356,13 @@ fn test_quoting_style_locale() { ts.ucmd() .env("QUOTING_STYLE", "locale") .args(&["-c", "%N", "'"]) - .run() - .no_stderr() - .stdout_is("'\\''\n") - .succeeded(); + .succeeds() + .stdout_only("'\\''\n"); ts.ucmd() .args(&["-c", "%N", "'"]) - .run() - .no_stderr() - .stdout_is("\"'\"\n") - .succeeded(); + .succeeds() + .stdout_only("\"'\"\n"); } #[test] @@ -395,11 +386,11 @@ fn test_printf_octal_2() { } #[test] -fn test_printf_hex_3() { +fn test_printf_incomplete_hex() { let ts = TestScenario::new(util_name!()); ts.ucmd() .args(&["--printf=\\x", "."]) - .run() + .succeeds() .stderr_contains("warning: incomplete hex escape"); } From 574aec55e55b27b9a446cfd52f1515e2d23bcb7c Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 9 Dec 2024 21:35:52 +0100 Subject: [PATCH 8/8] Take comments into account --- src/uu/stat/src/stat.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index d68d63d5767..5e617e7a31a 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -97,10 +97,12 @@ pub enum OutputType { Unknown, } +#[derive(Default)] enum QuotingStyle { Locale, Shell, - Default, + #[default] + ShellEscapeAlways, Quote, } @@ -111,6 +113,7 @@ impl std::str::FromStr for QuotingStyle { match s { "locale" => Ok(QuotingStyle::Locale), "shell" => Ok(QuotingStyle::Shell), + "shell-escape-always" => Ok(QuotingStyle::ShellEscapeAlways), // The others aren't exposed to the user _ => Err(format!("Invalid quoting style: {}", s)), } @@ -324,7 +327,7 @@ fn quote_file_name(file_name: &str, quoting_style: &QuotingStyle) -> String { let escaped = file_name.replace('\'', r"\'"); format!("'{}'", escaped) } - QuotingStyle::Default => format!("\"{}\"", file_name), + QuotingStyle::ShellEscapeAlways => format!("\"{}\"", file_name), QuotingStyle::Quote => file_name.to_string(), } } @@ -338,7 +341,7 @@ fn get_quoted_file_name( let quoting_style = env::var("QUOTING_STYLE") .ok() .and_then(|style| style.parse().ok()) - .unwrap_or(QuotingStyle::Default); + .unwrap_or_default(); if file_type.is_symlink() { let quoted_display_name = quote_file_name(display_name, "ing_style); @@ -362,7 +365,7 @@ fn get_quoted_file_name( } } -fn process_token_fs(t: &Token, meta: StatFs, display_name: &str) { +fn process_token_filesystem(t: &Token, meta: StatFs, display_name: &str) { match *t { Token::Byte(byte) => write_raw_byte(byte), Token::Char(c) => print!("{c}"), @@ -943,7 +946,7 @@ impl Stater { // Usage for t in tokens { - process_token_fs(t, meta, &display_name); + process_token_filesystem(t, meta, &display_name); } } Err(e) => {