Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/rust/src/avc/nal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::{ccx_options, current_fps, total_frames_count, MPEG_CLOCK_FREQ};
use lib_ccxr::common::{AvcNalType, BitStreamRust, BitstreamError, FRAMERATES_VALUES, SLICE_TYPES};
use lib_ccxr::util::log::DebugMessageFlag;
use lib_ccxr::{debug, info};
use std::os::raw::{c_char, c_long};
use std::os::raw::c_char;

/// Process sequence parameter set RBSP
pub fn seq_parameter_set_rbsp(
Expand Down Expand Up @@ -630,7 +630,7 @@ pub unsafe fn slice_header(
msg_type = DebugMessageFlag::TIME;
" sync_pts:{} ({:8})",
std::ffi::CStr::from_ptr(ccxr_print_mstime_static(
((*dec_ctx.timing).sync_pts / ((MPEG_CLOCK_FREQ as i64) / 1000i64)) as c_long,
(*dec_ctx.timing).sync_pts / ((MPEG_CLOCK_FREQ as i64) / 1000i64),
buf.as_mut_ptr()
))
.to_str()
Expand Down
101 changes: 86 additions & 15 deletions src/rust/src/decoder/output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,41 @@ impl<'a> Writer<'a> {

/// Write the symbol to the provided buffer
///
/// Always writes 2 bytes for consistent UTF-16BE encoding.
/// Previously, this function wrote 1 byte for ASCII characters and 2 bytes
/// for non-ASCII, creating an invalid mix that encoding conversion couldn't
/// handle properly. This caused garbled output with Japanese/Chinese characters
/// (issue #1451).
pub fn write_char(sym: &dtvcc_symbol, buf: &mut Vec<u8>) {
buf.push((sym.sym >> 8) as u8);
buf.push((sym.sym & 0xff) as u8);
/// The `use_utf16` parameter controls the output format:
/// - `true`: Always writes 2 bytes (UTF-16BE format). Use for UTF-16/UCS-2 charsets.
/// - `false`: Writes 1 byte for ASCII (high byte == 0), 2 bytes for extended chars.
/// Use for variable-width encodings like EUC-KR, CP949, Shift-JIS, etc.
///
/// Issue #1451: Japanese/Chinese with UTF-16BE need 2 bytes for all characters.
/// Issue #1065: Korean with EUC-KR needs 1 byte for ASCII, 2 bytes for Korean.
pub fn write_char(sym: &dtvcc_symbol, buf: &mut Vec<u8>, use_utf16: bool) {
let high = (sym.sym >> 8) as u8;
let low = (sym.sym & 0xff) as u8;

if use_utf16 {
// UTF-16BE: Always write 2 bytes
buf.push(high);
buf.push(low);
} else {
// Variable-width: Only write high byte if non-zero
if high != 0 {
buf.push(high);
}
buf.push(low);
}
}

/// Check if a charset name indicates UTF-16 or UCS-2 encoding
///
/// These are fixed-width 16-bit encodings where even ASCII needs 2 bytes.
pub fn is_utf16_charset(charset: &str) -> bool {
let upper = charset.to_uppercase();
upper.contains("UTF-16")
|| upper.contains("UTF16")
|| upper.contains("UCS-2")
|| upper.contains("UCS2")
|| upper.contains("UTF_16")
|| upper.contains("UCS_2")
}

/// Convert from CEA-708 color representation to hex code
Expand All @@ -114,27 +141,71 @@ mod tests {
use super::*;

#[test]
fn test_write_char() {
fn test_write_char_utf16_mode() {
let mut buf = Vec::new();

// Write ASCII symbol - UTF-16BE always uses 2 bytes
// 'A' (0x41) becomes [0x00, 0x41] in UTF-16BE
// UTF-16 mode: ASCII symbol 'A' (0x41) becomes [0x00, 0x41]
let sym = dtvcc_symbol { sym: 0x41, init: 0 };
write_char(&sym, &mut buf);
write_char(&sym, &mut buf, true);
assert_eq!(buf, vec![0x00, 0x41]);

buf.clear();

// Write non-ASCII symbol (e.g., Japanese character)
// Already 16-bit, writes as [high_byte, low_byte]
// UTF-16 mode: Non-ASCII symbol writes as [high_byte, low_byte]
let sym = dtvcc_symbol {
sym: 0x1234,
init: 0,
};
write_char(&sym, &mut buf);
write_char(&sym, &mut buf, true);
assert_eq!(buf, vec![0x12, 0x34]);
}

#[test]
fn test_write_char_variable_width_mode() {
let mut buf = Vec::new();

// Variable-width mode: ASCII symbol 'A' (0x41) becomes [0x41] (1 byte)
let sym = dtvcc_symbol { sym: 0x41, init: 0 };
write_char(&sym, &mut buf, false);
assert_eq!(buf, vec![0x41]);

buf.clear();

// Variable-width mode: Korean EUC-KR char becomes [high, low] (2 bytes)
// Example: Korean '인' = 0xC0CE in EUC-KR
let sym = dtvcc_symbol {
sym: 0xC0CE,
init: 0,
};
write_char(&sym, &mut buf, false);
assert_eq!(buf, vec![0xC0, 0xCE]);

buf.clear();

// Variable-width mode: Space (0x20) becomes [0x20] (1 byte, no NUL)
let sym = dtvcc_symbol { sym: 0x20, init: 0 };
write_char(&sym, &mut buf, false);
assert_eq!(buf, vec![0x20]);
}

#[test]
fn test_is_utf16_charset() {
// Should return true for UTF-16 variants
assert!(is_utf16_charset("UTF-16BE"));
assert!(is_utf16_charset("UTF-16LE"));
assert!(is_utf16_charset("utf-16"));
assert!(is_utf16_charset("UTF16"));
assert!(is_utf16_charset("UCS-2"));
assert!(is_utf16_charset("UCS2"));

// Should return false for variable-width encodings
assert!(!is_utf16_charset("EUC-KR"));
assert!(!is_utf16_charset("CP949"));
assert!(!is_utf16_charset("Shift-JIS"));
assert!(!is_utf16_charset("UTF-8"));
assert!(!is_utf16_charset("ISO-8859-1"));
}

#[test]
fn test_color_to_hex() {
assert_eq!(color_to_hex(0b00_00_00), (0, 0, 0)); // Black
Expand Down
21 changes: 19 additions & 2 deletions src/rust/src/decoder/tv_screen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use std::{ffi::CStr, fs::File};
#[cfg(windows)]
use crate::bindings::_get_osfhandle;

use super::output::{color_to_hex, write_char, Writer};
use super::output::{color_to_hex, is_utf16_charset, write_char, Writer};
use super::timing::{get_scc_time_str, get_time_str};
use super::{CCX_DTVCC_SCREENGRID_COLUMNS, CCX_DTVCC_SCREENGRID_ROWS};
use crate::{
Expand Down Expand Up @@ -177,6 +177,23 @@ impl dtvcc_tv_screen {
let (first, last) = self.get_write_interval(row_index);
debug!("First: {first}, Last: {last}");

// Determine if we should use UTF-16 mode (2 bytes for all chars) or
// variable-width mode (1 byte for ASCII, 2 bytes for extended chars).
// UTF-16/UCS-2 encodings require 2 bytes even for ASCII.
// Variable-width encodings (EUC-KR, CP949, Shift-JIS, etc.) use 1 byte for ASCII.
let use_utf16 = if !writer.writer_ctx.charset.is_null() {
let charset = unsafe {
CStr::from_ptr(writer.writer_ctx.charset)
.to_str()
.unwrap_or("")
};
is_utf16_charset(charset)
} else {
// No charset specified - default to variable-width for backward compatibility
// with raw byte output (no encoding conversion)
false
};

for i in 0..last + 1 {
if use_colors {
self.change_pen_color(
Expand Down Expand Up @@ -219,7 +236,7 @@ impl dtvcc_tv_screen {
if i < first {
buf.push(b' ');
} else {
write_char(&self.chars[row_index][i], &mut buf)
write_char(&self.chars[row_index][i], &mut buf, use_utf16)
}
}
// there can be unclosed tags or colors after the last symbol in a row
Expand Down
6 changes: 1 addition & 5 deletions src/rust/src/es/pic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,12 +277,8 @@ pub unsafe fn read_pic_info(
// set_fts() is not called for each picture when use_gop_as_pts == 1.
if ccx_options.use_gop_as_pts == 1 {
// Calculate current FTS based on GOP start time + frame offset
// Cast fts_at_gop_start to i64 for cross-platform compatibility (c_long is i32 on Windows)
let frame_offset_ms = (dec_ctx.frames_since_last_gop as f64 * 1000.0 / current_fps) as i64;
#[allow(clippy::unnecessary_cast)]
{
(*dec_ctx.timing).fts_now = (fts_at_gop_start as i64) + frame_offset_ms;
}
(*dec_ctx.timing).fts_now = fts_at_gop_start + frame_offset_ms;

// Update fts_max if needed
if (*dec_ctx.timing).fts_now > (*dec_ctx.timing).fts_max {
Expand Down
6 changes: 3 additions & 3 deletions src/rust/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ use std::os::raw::{c_uchar, c_void};
use std::{
ffi::CStr,
io::Write,
os::raw::{c_char, c_double, c_int, c_long, c_uint},
os::raw::{c_char, c_double, c_int, c_uint},
};

// Mock data for rust unit tests
Expand All @@ -67,7 +67,7 @@ cfg_if! {

static mut frames_since_ref_time: c_int = 0;
static mut total_frames_count: c_uint = 0;
static mut fts_at_gop_start: c_long = 0;
static mut fts_at_gop_start: i64 = 0;
static mut gop_rollover: c_int = 0;
static mut pts_big_change: c_uint = 0;

Expand Down Expand Up @@ -144,7 +144,7 @@ extern "C" {
static mut total_frames_count: c_uint;
static mut gop_time: gop_time_code;
static mut first_gop_time: gop_time_code;
static mut fts_at_gop_start: c_long;
static mut fts_at_gop_start: i64;
static mut gop_rollover: c_int;
static mut ccx_common_timing_settings: ccx_common_timing_settings_t;
static mut capitalization_list: word_list;
Expand Down
28 changes: 14 additions & 14 deletions src/rust/src/libccxr_exports/time.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#![allow(clippy::useless_conversion)]

use std::convert::TryInto;
use std::ffi::{c_char, c_int, c_long, CStr};
use std::ffi::{c_char, c_int, CStr};

use crate::{
bindings::*, cb_708, cb_field1, cb_field2, ccx_common_timing_settings as timing_settings,
Expand Down Expand Up @@ -330,7 +330,7 @@ unsafe fn write_back_from_timing_info() {
.unwrap_or(0);
gop_time = write_gop_time_code(timing_info.gop_time);
first_gop_time = write_gop_time_code(timing_info.first_gop_time);
fts_at_gop_start = timing_info.fts_at_gop_start.millis() as c_long;
fts_at_gop_start = timing_info.fts_at_gop_start.millis();
gop_rollover = if timing_info.gop_rollover { 1 } else { 0 };
timing_settings.disable_sync_check = if timing_info.timing_settings.disable_sync_check {
1
Expand Down Expand Up @@ -412,7 +412,7 @@ pub unsafe fn write_gop_time_code(g: Option<GopTimeCode>) -> gop_time_code {
///
/// `ctx` must not be null.
#[no_mangle]
pub unsafe extern "C" fn ccxr_add_current_pts(ctx: *mut ccx_common_timing_ctx, pts: c_long) {
pub unsafe extern "C" fn ccxr_add_current_pts(ctx: *mut ccx_common_timing_ctx, pts: i64) {
apply_timing_info();
let mut context = generate_timing_context(ctx);

Expand All @@ -428,7 +428,7 @@ pub unsafe extern "C" fn ccxr_add_current_pts(ctx: *mut ccx_common_timing_ctx, p
///
/// `ctx` must not be null.
#[no_mangle]
pub unsafe extern "C" fn ccxr_set_current_pts(ctx: *mut ccx_common_timing_ctx, pts: c_long) {
pub unsafe extern "C" fn ccxr_set_current_pts(ctx: *mut ccx_common_timing_ctx, pts: i64) {
apply_timing_info();
let mut context = generate_timing_context(ctx);

Expand Down Expand Up @@ -469,7 +469,7 @@ pub unsafe extern "C" fn ccxr_set_fts(ctx: *mut ccx_common_timing_ctx) -> c_int
pub unsafe extern "C" fn ccxr_get_fts(
ctx: *mut ccx_common_timing_ctx,
current_field: c_int,
) -> c_long {
) -> i64 {
apply_timing_info();
let mut context = generate_timing_context(ctx);

Expand All @@ -485,7 +485,7 @@ pub unsafe extern "C" fn ccxr_get_fts(
write_back_to_common_timing_ctx(ctx, &context);
write_back_from_timing_info();

ans.millis().try_into().unwrap_or(0)
ans.millis()
}

/// Rust equivalent for `get_visible_end` function in C. Uses C-native types as input and output.
Expand All @@ -503,7 +503,7 @@ pub unsafe extern "C" fn ccxr_get_fts(
pub unsafe extern "C" fn ccxr_get_visible_end(
ctx: *mut ccx_common_timing_ctx,
_current_field: c_int,
) -> c_long {
) -> i64 {
apply_timing_info();
let mut context = generate_timing_context(ctx);

Expand All @@ -518,7 +518,7 @@ pub unsafe extern "C" fn ccxr_get_visible_end(
write_back_to_common_timing_ctx(ctx, &context);
write_back_from_timing_info();

fts as c_long
fts
}

/// Rust equivalent for `get_visible_start` function in C. Uses C-native types as input and output.
Expand All @@ -537,7 +537,7 @@ pub unsafe extern "C" fn ccxr_get_visible_end(
pub unsafe extern "C" fn ccxr_get_visible_start(
ctx: *mut ccx_common_timing_ctx,
_current_field: c_int,
) -> c_long {
) -> i64 {
apply_timing_info();
let context = generate_timing_context(ctx);

Expand All @@ -554,7 +554,7 @@ pub unsafe extern "C" fn ccxr_get_visible_start(
write_back_to_common_timing_ctx(ctx, &context);
write_back_from_timing_info();

fts as c_long
fts
}

/// Rust equivalent for `get_fts_max` function in C. Uses C-native types as input and output.
Expand All @@ -563,7 +563,7 @@ pub unsafe extern "C" fn ccxr_get_visible_start(
///
/// `ctx` must not be null.
#[no_mangle]
pub unsafe extern "C" fn ccxr_get_fts_max(ctx: *mut ccx_common_timing_ctx) -> c_long {
pub unsafe extern "C" fn ccxr_get_fts_max(ctx: *mut ccx_common_timing_ctx) -> i64 {
apply_timing_info();
let mut context = generate_timing_context(ctx);

Expand All @@ -572,7 +572,7 @@ pub unsafe extern "C" fn ccxr_get_fts_max(ctx: *mut ccx_common_timing_ctx) -> c_
write_back_to_common_timing_ctx(ctx, &context);
write_back_from_timing_info();

ans.millis().try_into().unwrap_or(0)
ans.millis()
}

/// Rust equivalent for `print_mstime_static` function in C. Uses C-native types as input and output.
Expand All @@ -581,8 +581,8 @@ pub unsafe extern "C" fn ccxr_get_fts_max(ctx: *mut ccx_common_timing_ctx) -> c_
///
/// `buf` must not be null. It must have sufficient length to hold the time in string form.
#[no_mangle]
pub unsafe extern "C" fn ccxr_print_mstime_static(mstime: c_long, buf: *mut c_char) -> *mut c_char {
let time = Timestamp::from_millis(mstime.into());
pub unsafe extern "C" fn ccxr_print_mstime_static(mstime: i64, buf: *mut c_char) -> *mut c_char {
let time = Timestamp::from_millis(mstime);
let ans = c::print_mstime_static(time, ':');
write_string_into_pointer(buf, &ans);
buf
Expand Down
Loading