diff --git a/src/rust/src/avc/nal.rs b/src/rust/src/avc/nal.rs index 4a2d2342b..767a9da0a 100644 --- a/src/rust/src/avc/nal.rs +++ b/src/rust/src/avc/nal.rs @@ -9,7 +9,7 @@ use crate::{ccx_options, current_fps, total_frames_count, MPEG_CLOCK_FREQ}; use lib_ccxr::common::{AvcNalType, BitStreamRust, BitstreamError, FRAMERATES_VALUES, SLICE_TYPES}; use lib_ccxr::util::log::DebugMessageFlag; use lib_ccxr::{debug, info}; -use std::os::raw::{c_char, c_long}; +use std::os::raw::c_char; /// Process sequence parameter set RBSP pub fn seq_parameter_set_rbsp( @@ -630,7 +630,7 @@ pub unsafe fn slice_header( msg_type = DebugMessageFlag::TIME; " sync_pts:{} ({:8})", std::ffi::CStr::from_ptr(ccxr_print_mstime_static( - ((*dec_ctx.timing).sync_pts / ((MPEG_CLOCK_FREQ as i64) / 1000i64)) as c_long, + (*dec_ctx.timing).sync_pts / ((MPEG_CLOCK_FREQ as i64) / 1000i64), buf.as_mut_ptr() )) .to_str() diff --git a/src/rust/src/decoder/output.rs b/src/rust/src/decoder/output.rs index 02e0356ba..7cc2b6a71 100644 --- a/src/rust/src/decoder/output.rs +++ b/src/rust/src/decoder/output.rs @@ -85,14 +85,41 @@ impl<'a> Writer<'a> { /// Write the symbol to the provided buffer /// -/// Always writes 2 bytes for consistent UTF-16BE encoding. -/// Previously, this function wrote 1 byte for ASCII characters and 2 bytes -/// for non-ASCII, creating an invalid mix that encoding conversion couldn't -/// handle properly. This caused garbled output with Japanese/Chinese characters -/// (issue #1451). -pub fn write_char(sym: &dtvcc_symbol, buf: &mut Vec) { - buf.push((sym.sym >> 8) as u8); - buf.push((sym.sym & 0xff) as u8); +/// The `use_utf16` parameter controls the output format: +/// - `true`: Always writes 2 bytes (UTF-16BE format). Use for UTF-16/UCS-2 charsets. +/// - `false`: Writes 1 byte for ASCII (high byte == 0), 2 bytes for extended chars. +/// Use for variable-width encodings like EUC-KR, CP949, Shift-JIS, etc. +/// +/// Issue #1451: Japanese/Chinese with UTF-16BE need 2 bytes for all characters. +/// Issue #1065: Korean with EUC-KR needs 1 byte for ASCII, 2 bytes for Korean. +pub fn write_char(sym: &dtvcc_symbol, buf: &mut Vec, use_utf16: bool) { + let high = (sym.sym >> 8) as u8; + let low = (sym.sym & 0xff) as u8; + + if use_utf16 { + // UTF-16BE: Always write 2 bytes + buf.push(high); + buf.push(low); + } else { + // Variable-width: Only write high byte if non-zero + if high != 0 { + buf.push(high); + } + buf.push(low); + } +} + +/// Check if a charset name indicates UTF-16 or UCS-2 encoding +/// +/// These are fixed-width 16-bit encodings where even ASCII needs 2 bytes. +pub fn is_utf16_charset(charset: &str) -> bool { + let upper = charset.to_uppercase(); + upper.contains("UTF-16") + || upper.contains("UTF16") + || upper.contains("UCS-2") + || upper.contains("UCS2") + || upper.contains("UTF_16") + || upper.contains("UCS_2") } /// Convert from CEA-708 color representation to hex code @@ -114,27 +141,71 @@ mod tests { use super::*; #[test] - fn test_write_char() { + fn test_write_char_utf16_mode() { let mut buf = Vec::new(); - // Write ASCII symbol - UTF-16BE always uses 2 bytes - // 'A' (0x41) becomes [0x00, 0x41] in UTF-16BE + // UTF-16 mode: ASCII symbol 'A' (0x41) becomes [0x00, 0x41] let sym = dtvcc_symbol { sym: 0x41, init: 0 }; - write_char(&sym, &mut buf); + write_char(&sym, &mut buf, true); assert_eq!(buf, vec![0x00, 0x41]); buf.clear(); - // Write non-ASCII symbol (e.g., Japanese character) - // Already 16-bit, writes as [high_byte, low_byte] + // UTF-16 mode: Non-ASCII symbol writes as [high_byte, low_byte] let sym = dtvcc_symbol { sym: 0x1234, init: 0, }; - write_char(&sym, &mut buf); + write_char(&sym, &mut buf, true); assert_eq!(buf, vec![0x12, 0x34]); } + #[test] + fn test_write_char_variable_width_mode() { + let mut buf = Vec::new(); + + // Variable-width mode: ASCII symbol 'A' (0x41) becomes [0x41] (1 byte) + let sym = dtvcc_symbol { sym: 0x41, init: 0 }; + write_char(&sym, &mut buf, false); + assert_eq!(buf, vec![0x41]); + + buf.clear(); + + // Variable-width mode: Korean EUC-KR char becomes [high, low] (2 bytes) + // Example: Korean '인' = 0xC0CE in EUC-KR + let sym = dtvcc_symbol { + sym: 0xC0CE, + init: 0, + }; + write_char(&sym, &mut buf, false); + assert_eq!(buf, vec![0xC0, 0xCE]); + + buf.clear(); + + // Variable-width mode: Space (0x20) becomes [0x20] (1 byte, no NUL) + let sym = dtvcc_symbol { sym: 0x20, init: 0 }; + write_char(&sym, &mut buf, false); + assert_eq!(buf, vec![0x20]); + } + + #[test] + fn test_is_utf16_charset() { + // Should return true for UTF-16 variants + assert!(is_utf16_charset("UTF-16BE")); + assert!(is_utf16_charset("UTF-16LE")); + assert!(is_utf16_charset("utf-16")); + assert!(is_utf16_charset("UTF16")); + assert!(is_utf16_charset("UCS-2")); + assert!(is_utf16_charset("UCS2")); + + // Should return false for variable-width encodings + assert!(!is_utf16_charset("EUC-KR")); + assert!(!is_utf16_charset("CP949")); + assert!(!is_utf16_charset("Shift-JIS")); + assert!(!is_utf16_charset("UTF-8")); + assert!(!is_utf16_charset("ISO-8859-1")); + } + #[test] fn test_color_to_hex() { assert_eq!(color_to_hex(0b00_00_00), (0, 0, 0)); // Black diff --git a/src/rust/src/decoder/tv_screen.rs b/src/rust/src/decoder/tv_screen.rs index aede8e608..7ee5ec532 100644 --- a/src/rust/src/decoder/tv_screen.rs +++ b/src/rust/src/decoder/tv_screen.rs @@ -13,7 +13,7 @@ use std::{ffi::CStr, fs::File}; #[cfg(windows)] use crate::bindings::_get_osfhandle; -use super::output::{color_to_hex, write_char, Writer}; +use super::output::{color_to_hex, is_utf16_charset, write_char, Writer}; use super::timing::{get_scc_time_str, get_time_str}; use super::{CCX_DTVCC_SCREENGRID_COLUMNS, CCX_DTVCC_SCREENGRID_ROWS}; use crate::{ @@ -177,6 +177,23 @@ impl dtvcc_tv_screen { let (first, last) = self.get_write_interval(row_index); debug!("First: {first}, Last: {last}"); + // Determine if we should use UTF-16 mode (2 bytes for all chars) or + // variable-width mode (1 byte for ASCII, 2 bytes for extended chars). + // UTF-16/UCS-2 encodings require 2 bytes even for ASCII. + // Variable-width encodings (EUC-KR, CP949, Shift-JIS, etc.) use 1 byte for ASCII. + let use_utf16 = if !writer.writer_ctx.charset.is_null() { + let charset = unsafe { + CStr::from_ptr(writer.writer_ctx.charset) + .to_str() + .unwrap_or("") + }; + is_utf16_charset(charset) + } else { + // No charset specified - default to variable-width for backward compatibility + // with raw byte output (no encoding conversion) + false + }; + for i in 0..last + 1 { if use_colors { self.change_pen_color( @@ -219,7 +236,7 @@ impl dtvcc_tv_screen { if i < first { buf.push(b' '); } else { - write_char(&self.chars[row_index][i], &mut buf) + write_char(&self.chars[row_index][i], &mut buf, use_utf16) } } // there can be unclosed tags or colors after the last symbol in a row diff --git a/src/rust/src/es/pic.rs b/src/rust/src/es/pic.rs index d9bfea66a..89c3344b9 100644 --- a/src/rust/src/es/pic.rs +++ b/src/rust/src/es/pic.rs @@ -277,12 +277,8 @@ pub unsafe fn read_pic_info( // set_fts() is not called for each picture when use_gop_as_pts == 1. if ccx_options.use_gop_as_pts == 1 { // Calculate current FTS based on GOP start time + frame offset - // Cast fts_at_gop_start to i64 for cross-platform compatibility (c_long is i32 on Windows) let frame_offset_ms = (dec_ctx.frames_since_last_gop as f64 * 1000.0 / current_fps) as i64; - #[allow(clippy::unnecessary_cast)] - { - (*dec_ctx.timing).fts_now = (fts_at_gop_start as i64) + frame_offset_ms; - } + (*dec_ctx.timing).fts_now = fts_at_gop_start + frame_offset_ms; // Update fts_max if needed if (*dec_ctx.timing).fts_now > (*dec_ctx.timing).fts_max { diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 4e453f4e3..9f267c7b3 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -51,7 +51,7 @@ use std::os::raw::{c_uchar, c_void}; use std::{ ffi::CStr, io::Write, - os::raw::{c_char, c_double, c_int, c_long, c_uint}, + os::raw::{c_char, c_double, c_int, c_uint}, }; // Mock data for rust unit tests @@ -67,7 +67,7 @@ cfg_if! { static mut frames_since_ref_time: c_int = 0; static mut total_frames_count: c_uint = 0; - static mut fts_at_gop_start: c_long = 0; + static mut fts_at_gop_start: i64 = 0; static mut gop_rollover: c_int = 0; static mut pts_big_change: c_uint = 0; @@ -144,7 +144,7 @@ extern "C" { static mut total_frames_count: c_uint; static mut gop_time: gop_time_code; static mut first_gop_time: gop_time_code; - static mut fts_at_gop_start: c_long; + static mut fts_at_gop_start: i64; static mut gop_rollover: c_int; static mut ccx_common_timing_settings: ccx_common_timing_settings_t; static mut capitalization_list: word_list; diff --git a/src/rust/src/libccxr_exports/time.rs b/src/rust/src/libccxr_exports/time.rs index c0063916f..82df6838c 100644 --- a/src/rust/src/libccxr_exports/time.rs +++ b/src/rust/src/libccxr_exports/time.rs @@ -1,7 +1,7 @@ #![allow(clippy::useless_conversion)] use std::convert::TryInto; -use std::ffi::{c_char, c_int, c_long, CStr}; +use std::ffi::{c_char, c_int, CStr}; use crate::{ bindings::*, cb_708, cb_field1, cb_field2, ccx_common_timing_settings as timing_settings, @@ -330,7 +330,7 @@ unsafe fn write_back_from_timing_info() { .unwrap_or(0); gop_time = write_gop_time_code(timing_info.gop_time); first_gop_time = write_gop_time_code(timing_info.first_gop_time); - fts_at_gop_start = timing_info.fts_at_gop_start.millis() as c_long; + fts_at_gop_start = timing_info.fts_at_gop_start.millis(); gop_rollover = if timing_info.gop_rollover { 1 } else { 0 }; timing_settings.disable_sync_check = if timing_info.timing_settings.disable_sync_check { 1 @@ -412,7 +412,7 @@ pub unsafe fn write_gop_time_code(g: Option) -> gop_time_code { /// /// `ctx` must not be null. #[no_mangle] -pub unsafe extern "C" fn ccxr_add_current_pts(ctx: *mut ccx_common_timing_ctx, pts: c_long) { +pub unsafe extern "C" fn ccxr_add_current_pts(ctx: *mut ccx_common_timing_ctx, pts: i64) { apply_timing_info(); let mut context = generate_timing_context(ctx); @@ -428,7 +428,7 @@ pub unsafe extern "C" fn ccxr_add_current_pts(ctx: *mut ccx_common_timing_ctx, p /// /// `ctx` must not be null. #[no_mangle] -pub unsafe extern "C" fn ccxr_set_current_pts(ctx: *mut ccx_common_timing_ctx, pts: c_long) { +pub unsafe extern "C" fn ccxr_set_current_pts(ctx: *mut ccx_common_timing_ctx, pts: i64) { apply_timing_info(); let mut context = generate_timing_context(ctx); @@ -469,7 +469,7 @@ pub unsafe extern "C" fn ccxr_set_fts(ctx: *mut ccx_common_timing_ctx) -> c_int pub unsafe extern "C" fn ccxr_get_fts( ctx: *mut ccx_common_timing_ctx, current_field: c_int, -) -> c_long { +) -> i64 { apply_timing_info(); let mut context = generate_timing_context(ctx); @@ -485,7 +485,7 @@ pub unsafe extern "C" fn ccxr_get_fts( write_back_to_common_timing_ctx(ctx, &context); write_back_from_timing_info(); - ans.millis().try_into().unwrap_or(0) + ans.millis() } /// Rust equivalent for `get_visible_end` function in C. Uses C-native types as input and output. @@ -503,7 +503,7 @@ pub unsafe extern "C" fn ccxr_get_fts( pub unsafe extern "C" fn ccxr_get_visible_end( ctx: *mut ccx_common_timing_ctx, _current_field: c_int, -) -> c_long { +) -> i64 { apply_timing_info(); let mut context = generate_timing_context(ctx); @@ -518,7 +518,7 @@ pub unsafe extern "C" fn ccxr_get_visible_end( write_back_to_common_timing_ctx(ctx, &context); write_back_from_timing_info(); - fts as c_long + fts } /// Rust equivalent for `get_visible_start` function in C. Uses C-native types as input and output. @@ -537,7 +537,7 @@ pub unsafe extern "C" fn ccxr_get_visible_end( pub unsafe extern "C" fn ccxr_get_visible_start( ctx: *mut ccx_common_timing_ctx, _current_field: c_int, -) -> c_long { +) -> i64 { apply_timing_info(); let context = generate_timing_context(ctx); @@ -554,7 +554,7 @@ pub unsafe extern "C" fn ccxr_get_visible_start( write_back_to_common_timing_ctx(ctx, &context); write_back_from_timing_info(); - fts as c_long + fts } /// Rust equivalent for `get_fts_max` function in C. Uses C-native types as input and output. @@ -563,7 +563,7 @@ pub unsafe extern "C" fn ccxr_get_visible_start( /// /// `ctx` must not be null. #[no_mangle] -pub unsafe extern "C" fn ccxr_get_fts_max(ctx: *mut ccx_common_timing_ctx) -> c_long { +pub unsafe extern "C" fn ccxr_get_fts_max(ctx: *mut ccx_common_timing_ctx) -> i64 { apply_timing_info(); let mut context = generate_timing_context(ctx); @@ -572,7 +572,7 @@ pub unsafe extern "C" fn ccxr_get_fts_max(ctx: *mut ccx_common_timing_ctx) -> c_ write_back_to_common_timing_ctx(ctx, &context); write_back_from_timing_info(); - ans.millis().try_into().unwrap_or(0) + ans.millis() } /// Rust equivalent for `print_mstime_static` function in C. Uses C-native types as input and output. @@ -581,8 +581,8 @@ pub unsafe extern "C" fn ccxr_get_fts_max(ctx: *mut ccx_common_timing_ctx) -> c_ /// /// `buf` must not be null. It must have sufficient length to hold the time in string form. #[no_mangle] -pub unsafe extern "C" fn ccxr_print_mstime_static(mstime: c_long, buf: *mut c_char) -> *mut c_char { - let time = Timestamp::from_millis(mstime.into()); +pub unsafe extern "C" fn ccxr_print_mstime_static(mstime: i64, buf: *mut c_char) -> *mut c_char { + let time = Timestamp::from_millis(mstime); let ans = c::print_mstime_static(time, ':'); write_string_into_pointer(buf, &ans); buf