diff --git a/src/uu/date/src/locale.rs b/src/uu/date/src/locale.rs index 6b756e97d48..0dea975f97f 100644 --- a/src/uu/date/src/locale.rs +++ b/src/uu/date/src/locale.rs @@ -28,115 +28,69 @@ macro_rules! cfg_langinfo { cfg_langinfo! { use std::ffi::CStr; use std::sync::OnceLock; + use nix::libc; } cfg_langinfo! { - /// Cached result of locale time format detection - static TIME_FORMAT_CACHE: OnceLock = OnceLock::new(); - - /// Safe wrapper around libc setlocale - fn set_time_locale() { - unsafe { - nix::libc::setlocale(nix::libc::LC_TIME, c"".as_ptr()); - } - } + /// Cached locale date/time format string + static DEFAULT_FORMAT_CACHE: OnceLock<&'static str> = OnceLock::new(); - /// Safe wrapper around libc nl_langinfo that returns `Option` - fn get_locale_info(item: nix::libc::nl_item) -> Option { - unsafe { - let ptr = nix::libc::nl_langinfo(item); - if ptr.is_null() { - None - } else { - CStr::from_ptr(ptr).to_str().ok().map(String::from) + /// Returns the default date format string for the current locale. + /// + /// The format respects locale preferences for time display (12-hour vs 24-hour), + /// component ordering, and numeric formatting conventions. Ensures timezone + /// information is included in the output. + pub fn get_locale_default_format() -> &'static str { + DEFAULT_FORMAT_CACHE.get_or_init(|| { + // Try to get locale format string + if let Some(format) = get_locale_format_string() { + let format_with_tz = ensure_timezone_in_format(&format); + return Box::leak(format_with_tz.into_boxed_str()); } - } - } - - /// Internal function that performs the actual locale detection - fn detect_12_hour_format() -> bool { - // Helper function to check for 12-hour format indicators - fn has_12_hour_indicators(format_str: &str) -> bool { - const INDICATORS: &[&str] = &["%I", "%l", "%r"]; - INDICATORS.iter().any(|&indicator| format_str.contains(indicator)) - } - // Helper function to check for 24-hour format indicators - fn has_24_hour_indicators(format_str: &str) -> bool { - const INDICATORS: &[&str] = &["%H", "%k", "%R", "%T"]; - INDICATORS.iter().any(|&indicator| format_str.contains(indicator)) - } - - // Set locale from environment variables (empty string = use LC_TIME/LANG env vars) - set_time_locale(); - - // Get locale format strings using safe wrappers - let d_t_fmt = get_locale_info(nix::libc::D_T_FMT); - let t_fmt_opt = get_locale_info(nix::libc::T_FMT); - let t_fmt_ampm_opt = get_locale_info(nix::libc::T_FMT_AMPM); + // Fallback: use 24-hour format as safe default + "%a %b %e %X %Z %Y" + }) + } - // Check D_T_FMT first - if let Some(ref format) = d_t_fmt { - // Check for 12-hour indicators first (higher priority) - if has_12_hour_indicators(format) { - return true; - } + /// Retrieves the date/time format string from the system locale + fn get_locale_format_string() -> Option { + unsafe { + // Set locale from environment variables + libc::setlocale(libc::LC_TIME, c"".as_ptr()); - // If we find 24-hour indicators, it's definitely not 12-hour - if has_24_hour_indicators(format) { - return false; + // Get the date/time format string + let d_t_fmt_ptr = libc::nl_langinfo(libc::D_T_FMT); + if d_t_fmt_ptr.is_null() { + return None; } - } - // Also check the time-only format as a fallback - if let Some(ref time_format) = t_fmt_opt { - if has_12_hour_indicators(time_format) { - return true; + let format = CStr::from_ptr(d_t_fmt_ptr).to_str().ok()?; + if format.is_empty() { + return None; } - } - // Check if there's a specific 12-hour format defined - if let Some(ref ampm_format) = t_fmt_ampm_opt { - // If T_FMT_AMPM is non-empty and different from T_FMT, locale supports 12-hour - if !ampm_format.is_empty() { - if let Some(ref time_format) = t_fmt_opt { - if ampm_format != time_format { - return true; - } - } else { - return true; - } - } + Some(format.to_string()) } - - // Default to 24-hour format if we can't determine - false } -} -cfg_langinfo! { - /// Detects whether the current locale prefers 12-hour or 24-hour time format - /// Results are cached for performance - pub fn uses_12_hour_format() -> bool { - *TIME_FORMAT_CACHE.get_or_init(detect_12_hour_format) - } - - /// Cached default format string - static DEFAULT_FORMAT_CACHE: OnceLock<&'static str> = OnceLock::new(); + /// Ensures the format string includes timezone (%Z) + fn ensure_timezone_in_format(format: &str) -> String { + if format.contains("%Z") { + return format.to_string(); + } - /// Get the locale-appropriate default format string for date output - /// This respects the locale's preference for 12-hour vs 24-hour time - /// Results are cached for performance (following uucore patterns) - pub fn get_locale_default_format() -> &'static str { - DEFAULT_FORMAT_CACHE.get_or_init(|| { - if uses_12_hour_format() { - // Use 12-hour format with AM/PM - "%a %b %e %r %Z %Y" - } else { - // Use 24-hour format - "%a %b %e %X %Z %Y" - } - }) + // Try to insert %Z before year specifier (%Y or %y) + if let Some(pos) = format.find("%Y").or_else(|| format.find("%y")) { + let mut result = String::with_capacity(format.len() + 3); + result.push_str(&format[..pos]); + result.push_str("%Z "); + result.push_str(&format[pos..]); + result + } else { + // No year found, append %Z at the end + format.to_string() + " %Z" + } } } @@ -161,7 +115,6 @@ mod tests { #[test] fn test_locale_detection() { // Just verify the function doesn't panic - let _ = uses_12_hour_format(); let _ = get_locale_default_format(); } @@ -170,8 +123,101 @@ mod tests { let format = get_locale_default_format(); assert!(format.contains("%a")); // abbreviated weekday assert!(format.contains("%b")); // abbreviated month - assert!(format.contains("%Y")); // year + assert!(format.contains("%Y") || format.contains("%y")); // year (4-digit or 2-digit) assert!(format.contains("%Z")); // timezone } + + #[test] + fn test_locale_format_structure() { + // Verify we're using actual locale format strings, not hardcoded ones + let format = get_locale_default_format(); + + // The format should not be empty + assert!(!format.is_empty(), "Locale format should not be empty"); + + // Should contain date/time components + let has_date_component = format.contains("%a") + || format.contains("%A") + || format.contains("%b") + || format.contains("%B") + || format.contains("%d") + || format.contains("%e"); + assert!(has_date_component, "Format should contain date components"); + + // Should contain time component (hour) + let has_time_component = format.contains("%H") + || format.contains("%I") + || format.contains("%k") + || format.contains("%l") + || format.contains("%r") + || format.contains("%R") + || format.contains("%T") + || format.contains("%X"); + assert!(has_time_component, "Format should contain time components"); + } + + #[test] + fn test_c_locale_format() { + // Save original locale + let original_lc_all = std::env::var("LC_ALL").ok(); + let original_lc_time = std::env::var("LC_TIME").ok(); + let original_lang = std::env::var("LANG").ok(); + + unsafe { + // Set C locale + std::env::set_var("LC_ALL", "C"); + std::env::remove_var("LC_TIME"); + std::env::remove_var("LANG"); + } + + // Get the locale format + let format = unsafe { + libc::setlocale(libc::LC_TIME, c"C".as_ptr()); + let d_t_fmt_ptr = libc::nl_langinfo(libc::D_T_FMT); + if d_t_fmt_ptr.is_null() { + None + } else { + std::ffi::CStr::from_ptr(d_t_fmt_ptr).to_str().ok() + } + }; + + if let Some(locale_format) = format { + // C locale typically uses 24-hour format + // Common patterns: %H (24-hour with leading zero) or %T (HH:MM:SS) + let uses_24_hour = locale_format.contains("%H") + || locale_format.contains("%T") + || locale_format.contains("%R"); + assert!(uses_24_hour, "C locale should use 24-hour format, got: {locale_format}"); + } + + // Restore original locale + unsafe { + if let Some(val) = original_lc_all { + std::env::set_var("LC_ALL", val); + } else { + std::env::remove_var("LC_ALL"); + } + if let Some(val) = original_lc_time { + std::env::set_var("LC_TIME", val); + } else { + std::env::remove_var("LC_TIME"); + } + if let Some(val) = original_lang { + std::env::set_var("LANG", val); + } else { + std::env::remove_var("LANG"); + } + } + } + + #[test] + fn test_timezone_included_in_format() { + // The implementation should ensure %Z is present + let format = get_locale_default_format(); + assert!( + format.contains("%Z") || format.contains("%z"), + "Format should contain timezone indicator: {format}" + ); + } } } diff --git a/tests/by-util/test_date.rs b/tests/by-util/test_date.rs index bd1c31cc1a9..aca14f226b7 100644 --- a/tests/by-util/test_date.rs +++ b/tests/by-util/test_date.rs @@ -1147,3 +1147,221 @@ fn test_date_explicit_format_overrides_locale() { .succeeds() .stdout_is("13:00\n"); } + +// Comprehensive locale formatting tests to verify actual locale format strings are used +#[test] +#[cfg(any( + target_os = "linux", + target_vendor = "apple", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + target_os = "dragonfly" +))] +fn test_date_locale_leading_zeros_en_us() { + // Test for leading zeros in en_US locale + // en_US uses %I (01-12) with leading zeros, not %l (1-12) without + let result = new_ucmd!() + .env("LC_ALL", "en_US.UTF-8") + .env("TZ", "UTC") + .arg("-d") + .arg("2025-12-14T01:00") + .succeeds(); + + let stdout = result.stdout_str(); + // If locale is available, should have leading zero: "01:00" + // If locale unavailable (falls back to C), may have "01:00" (24-hour) or " 1:00" + // Key point: output should match what nl_langinfo(D_T_FMT) specifies + if stdout.contains("AM") || stdout.contains("PM") { + // 12-hour format detected - should have leading zero in en_US + assert!( + stdout.contains("01:00") || stdout.contains(" 1:00"), + "en_US 12-hour format should show '01:00 AM' or ' 1:00 AM', got: {stdout}" + ); + } +} + +#[test] +#[cfg(unix)] +fn test_date_locale_c_uses_24_hour() { + // C/POSIX locale must use 24-hour format + let result = new_ucmd!() + .env("LC_ALL", "C") + .env("TZ", "UTC") + .arg("-d") + .arg("2025-12-14T13:00") + .succeeds(); + + let stdout = result.stdout_str(); + // C locale uses 24-hour format, no AM/PM + assert!( + !stdout.contains("AM") && !stdout.contains("PM"), + "C locale should not use AM/PM, got: {stdout}" + ); + assert!( + stdout.contains("13"), + "C locale should show 13 (24-hour), got: {stdout}" + ); +} + +#[test] +#[cfg(unix)] +fn test_date_locale_timezone_included() { + // Verify timezone is included in output (implementation adds %Z if missing) + let result = new_ucmd!() + .env("LC_ALL", "C") + .env("TZ", "UTC") + .arg("-d") + .arg("2025-12-14T13:00") + .succeeds(); + + let stdout = result.stdout_str(); + assert!( + stdout.contains("UTC") || stdout.contains("+00"), + "Output should contain timezone information, got: {stdout}" + ); +} + +#[test] +#[cfg(unix)] +fn test_date_locale_format_structure() { + // Test that output follows locale-defined structure (not hardcoded) + let result = new_ucmd!() + .env("LC_ALL", "C") + .env("TZ", "UTC") + .arg("-d") + .arg("2025-12-14T13:00:00") + .succeeds(); + + let stdout = result.stdout_str(); + + // Should contain weekday abbreviation + let weekdays = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"]; + assert!( + weekdays.iter().any(|day| stdout.contains(day)), + "Output should contain weekday, got: {stdout}" + ); + + // Should contain month + let months = [ + "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", + ]; + assert!( + months.iter().any(|month| stdout.contains(month)), + "Output should contain month, got: {stdout}" + ); + + // Should contain year + assert!( + stdout.contains("2025"), + "Output should contain year, got: {stdout}" + ); +} + +#[test] +#[cfg(unix)] +fn test_date_locale_format_not_hardcoded() { + // This test verifies we're not using hardcoded format strings + // by checking that the format actually comes from the locale system + + // Test with C locale + let c_result = new_ucmd!() + .env("LC_ALL", "C") + .env("TZ", "UTC") + .arg("-d") + .arg("2025-12-14T01:00:00") + .succeeds(); + + let c_output = c_result.stdout_str(); + + // C locale should use 24-hour format + assert!( + c_output.contains("01:00") || c_output.contains(" 1:00"), + "C locale output: {c_output}" + ); + assert!( + !c_output.contains("AM") && !c_output.contains("PM"), + "C locale should not have AM/PM: {c_output}" + ); +} + +#[test] +#[cfg(any( + target_os = "linux", + target_vendor = "apple", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + target_os = "dragonfly" +))] +fn test_date_locale_en_us_vs_c_difference() { + // Verify that en_US and C locales produce different outputs + // (if en_US locale is available on the system) + + let c_result = new_ucmd!() + .env("LC_ALL", "C") + .env("TZ", "UTC") + .arg("-d") + .arg("2025-12-14T13:00:00") + .succeeds(); + + let en_us_result = new_ucmd!() + .env("LC_ALL", "en_US.UTF-8") + .env("TZ", "UTC") + .arg("-d") + .arg("2025-12-14T13:00:00") + .succeeds(); + + let c_output = c_result.stdout_str(); + let en_us_output = en_us_result.stdout_str(); + + // C locale: 24-hour, no AM/PM + assert!( + !c_output.contains("AM") && !c_output.contains("PM"), + "C locale should not have AM/PM: {c_output}" + ); + + // en_US: If locale is installed, should have AM/PM (12-hour) + // If not installed, falls back to C locale + if en_us_output.contains("PM") { + // Locale is available and using 12-hour format + assert!( + en_us_output.contains("1:00") || en_us_output.contains("01:00"), + "en_US with 12-hour should show 1:00 PM or 01:00 PM, got: {en_us_output}" + ); + } +} + +#[test] +#[cfg(any(target_os = "linux", target_os = "android", target_vendor = "apple",))] +fn test_date_locale_fr_french() { + // Test French locale (fr_FR.UTF-8) behavior + // French typically uses 24-hour format and may have localized day/month names + + let result = new_ucmd!() + .env("LC_ALL", "fr_FR.UTF-8") + .env("TZ", "UTC") + .arg("-d") + .arg("2025-12-14T13:00:00") + .succeeds(); + + let stdout = result.stdout_str(); + + // French locale should use 24-hour format (no AM/PM) + assert!( + !stdout.contains("AM") && !stdout.contains("PM"), + "French locale should use 24-hour format (no AM/PM), got: {stdout}" + ); + + // Should have 13:00 (not 1:00) + assert!( + stdout.contains("13:00"), + "French locale should show 13:00 for 1 PM, got: {stdout}" + ); + + // Timezone should be included (our implementation adds %Z if missing) + assert!( + stdout.contains("UTC") || stdout.contains("+00") || stdout.contains('Z'), + "Output should include timezone information, got: {stdout}" + ); +}