From 3365a715a6e16a7db46a0e5b463e621237d7c40d Mon Sep 17 00:00:00 2001 From: Carlos Fernandez Date: Mon, 29 Dec 2025 21:10:11 +0100 Subject: [PATCH 1/3] fix: Properly handle ATSC CC in private MPEG-2 streams MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit fixes two issues: 1. ATSC CC data in private MPEG-2 streams (stream type 0x06) was not being processed. The code returned CCX_PRIVATE_MPEG2_CC buffer type which was never properly implemented - it just dumped debug output and returned placeholder bytes. Fix: Treat ATSC CC in private MPEG-2 streams the same as in user-private streams (0x80-0x8F) by returning CCX_PES buffer type. Both contain the same CC data format and should use the same processing path. 2. Several dump() calls were using CCX_DMT_GENERIC_NOTICES which is enabled by default, causing binary output to flood the terminal when processing certain files. Fix: Changed to appropriate debug-only masks (CCX_DMT_VERBOSE, CCX_DMT_PARSE) so binary dumps only appear when debug mode is explicitly enabled. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/lib_ccx/avc_functions.c | 9 ++++----- src/lib_ccx/general_loop.c | 6 +----- src/lib_ccx/ts_functions.c | 18 ++++-------------- 3 files changed, 9 insertions(+), 24 deletions(-) diff --git a/src/lib_ccx/avc_functions.c b/src/lib_ccx/avc_functions.c index aa58e991d..cc195edce 100644 --- a/src/lib_ccx/avc_functions.c +++ b/src/lib_ccx/avc_functions.c @@ -379,11 +379,10 @@ void sei_rbsp(struct avc_ctx *ctx, unsigned char *seibuf, unsigned char *seiend) } else { - // TODO: This really really looks bad - mprint("WARNING: Unexpected SEI unit length...trying to continue."); - temp_debug = 1; - mprint("\n Failed block (at sei_rbsp) was:\n"); - dump(CCX_DMT_GENERIC_NOTICES, (unsigned char *)seibuf, seiend - seibuf, 0, 0); + // Unexpected SEI length - common with malformed streams, don't spam output + dbg_print(CCX_DMT_VERBOSE, "WARNING: Unexpected SEI unit length (parsed to %p, expected %p)...trying to continue.\n", + (void *)tbuf, (void *)(seiend - 1)); + dump(CCX_DMT_VERBOSE, (unsigned char *)seibuf, seiend - seibuf, 0, 0); ctx->num_unexpected_sei_length++; } diff --git a/src/lib_ccx/general_loop.c b/src/lib_ccx/general_loop.c index d82a03314..a99cfe144 100644 --- a/src/lib_ccx/general_loop.c +++ b/src/lib_ccx/general_loop.c @@ -75,7 +75,7 @@ int ps_get_more_data(struct lib_ccx_ctx *ctx, struct demuxer_data **ppdata) if (!ctx->demux_ctx->strangeheader) { mprint("\nNot a recognized header. Searching for next header.\n"); - dump(CCX_DMT_GENERIC_NOTICES, nextheader, 6, 0, 0); + dump(CCX_DMT_PARSE, nextheader, 6, 0, 0); // Only print the message once per loop / unrecognized header ctx->demux_ctx->strangeheader = 1; } @@ -809,10 +809,6 @@ int process_data(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, str got = data_node->len; } } - else if (data_node->bufferdatatype == CCX_PRIVATE_MPEG2_CC) - { - got = data_node->len; // Do nothing. Still don't know how to process it - } else if (data_node->bufferdatatype == CCX_RAW) // Raw two byte 608 data from DVR-MS/ASF { // The asf_get_more_data() loop sets current_pts when possible diff --git a/src/lib_ccx/ts_functions.c b/src/lib_ccx/ts_functions.c index 44a199260..12702dc4c 100644 --- a/src/lib_ccx/ts_functions.c +++ b/src/lib_ccx/ts_functions.c @@ -153,12 +153,11 @@ enum ccx_bufferdata_type get_buffer_type(struct cap_info *cinfo) { return CCX_TELETEXT; } - else if (cinfo->stream == CCX_STREAM_TYPE_PRIVATE_MPEG2 && cinfo->codec == CCX_CODEC_ATSC_CC) - { - return CCX_PRIVATE_MPEG2_CC; - } - else if (cinfo->stream == CCX_STREAM_TYPE_PRIVATE_USER_MPEG2 && cinfo->codec == CCX_CODEC_ATSC_CC) + else if ((cinfo->stream == CCX_STREAM_TYPE_PRIVATE_MPEG2 || + cinfo->stream == CCX_STREAM_TYPE_PRIVATE_USER_MPEG2) && + cinfo->codec == CCX_CODEC_ATSC_CC) { + // ATSC CC can be in either private stream type - process both as PES return CCX_PES; } else @@ -567,15 +566,6 @@ int copy_capbuf_demux_data(struct ccx_demuxer *ctx, struct demuxer_data **data, if (!cinfo->capbuf || !cinfo->capbuflen) return -1; - if (ptr->bufferdatatype == CCX_PRIVATE_MPEG2_CC) - { - dump(CCX_DMT_GENERIC_NOTICES, cinfo->capbuf, cinfo->capbuflen, 0, 1); - // Bogus data, so we return something - ptr->buffer[ptr->len++] = 0xFA; - ptr->buffer[ptr->len++] = 0x80; - ptr->buffer[ptr->len++] = 0x80; - return CCX_OK; - } if (cinfo->codec == CCX_CODEC_TELETEXT) { memcpy(ptr->buffer + ptr->len, cinfo->capbuf, cinfo->capbuflen); From 25162fe40a0d8e097cebe236a8b7231db86557dd Mon Sep 17 00:00:00 2001 From: Carlos Fernandez Date: Mon, 29 Dec 2025 21:11:51 +0100 Subject: [PATCH 2/3] chore: Add build directories to .gitignore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add build_*/ pattern and linux/build_scan/ to ignore various build output directories (build_ocr/, build_ocr_asan/, etc.) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index bdb5246d4..a8b4db72d 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ CVS mac/ccextractor linux/ccextractor linux/depend +linux/build_scan/ windows/x86_64-pc-windows-msvc/** windows/Debug/** windows/Debug-OCR/** @@ -28,6 +29,7 @@ windows/Debug-Full/** windows/x64/** windows/ccextractor.VC.db build/ +build_*/ #### # Python From a71687e19fb45f6a1bce9db34c5646f86be42e75 Mon Sep 17 00:00:00 2001 From: Carlos Fernandez Date: Mon, 29 Dec 2025 21:26:56 +0100 Subject: [PATCH 3/3] fix(dvb): Enable OCR for all DVB subtitle streams, not just first MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, the `initialized_ocr` flag was stored at the program level and shared across all DVB subtitle streams within a program. This caused OCR to only initialize for the first DVB stream, leaving subsequent streams without an OCR context and unable to extract subtitles. The fix removes the `initialized_ocr` flag entirely. Each DVB subtitle decoder now gets its own OCR context, matching the behavior of DVD and VOBSUB decoders which already worked correctly with multiple streams. Test results with multi-language DVB sample: - Before: Second stream (0xCE0) → "No captions were found" - After: Second stream (0xCE0) → 5 subtitles extracted correctly Fixes #1067 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/lib_ccx/ccx_demuxer.c | 1 - src/lib_ccx/ccx_demuxer.h | 1 - src/lib_ccx/dvb_subtitle_decoder.c | 5 ++--- src/lib_ccx/dvb_subtitle_decoder.h | 2 +- src/lib_ccx/matroska.c | 2 +- src/lib_ccx/ts_info.c | 2 +- src/lib_ccx/ts_tables.c | 4 +--- src/rust/src/common.rs | 1 - src/rust/src/ctorust.rs | 1 - src/rust/src/demuxer/common_types.rs | 2 -- 10 files changed, 6 insertions(+), 15 deletions(-) diff --git a/src/lib_ccx/ccx_demuxer.c b/src/lib_ccx/ccx_demuxer.c index 9a0993760..6d104eec7 100644 --- a/src/lib_ccx/ccx_demuxer.c +++ b/src/lib_ccx/ccx_demuxer.c @@ -348,7 +348,6 @@ struct ccx_demuxer *init_demuxer(void *parent, struct demuxer_cfg *cfg) { ctx->pinfo[i].got_important_streams_min_pts[j] = UINT64_MAX; } - ctx->pinfo[i].initialized_ocr = 0; ctx->pinfo[i].version = 0xFF; // Not real in a real stream since it's 5 bits. FF => Not initialized } diff --git a/src/lib_ccx/ccx_demuxer.h b/src/lib_ccx/ccx_demuxer.h index 2e6eae7b1..16cd6c10c 100644 --- a/src/lib_ccx/ccx_demuxer.h +++ b/src/lib_ccx/ccx_demuxer.h @@ -35,7 +35,6 @@ struct program_info { int pid; int program_number; - int initialized_ocr; // Avoid initializing the OCR more than once uint8_t analysed_PMT_once : 1; uint8_t version; uint8_t saved_section[1021]; diff --git a/src/lib_ccx/dvb_subtitle_decoder.c b/src/lib_ccx/dvb_subtitle_decoder.c index 91edbc0cb..6299cbcfe 100644 --- a/src/lib_ccx/dvb_subtitle_decoder.c +++ b/src/lib_ccx/dvb_subtitle_decoder.c @@ -418,7 +418,7 @@ static void delete_regions(DVBSubContext *ctx) * @return DVB context kept as void* for abstraction * */ -void *dvbsub_init_decoder(struct dvb_config *cfg, int initialized_ocr) +void *dvbsub_init_decoder(struct dvb_config *cfg) { int i, r, g, b, a = 0; DVBSubContext *ctx = (DVBSubContext *)malloc(sizeof(DVBSubContext)); @@ -442,8 +442,7 @@ void *dvbsub_init_decoder(struct dvb_config *cfg, int initialized_ocr) } #ifdef ENABLE_OCR - if (!initialized_ocr) - ctx->ocr_ctx = init_ocr(ctx->lang_index); + ctx->ocr_ctx = init_ocr(ctx->lang_index); #endif ctx->version = -1; diff --git a/src/lib_ccx/dvb_subtitle_decoder.h b/src/lib_ccx/dvb_subtitle_decoder.h index 879a8a62c..abb01ef5d 100644 --- a/src/lib_ccx/dvb_subtitle_decoder.h +++ b/src/lib_ccx/dvb_subtitle_decoder.h @@ -42,7 +42,7 @@ extern "C" * @return DVB context kept as void* for abstraction * */ - void *dvbsub_init_decoder(struct dvb_config *cfg, int initialized_ocr); + void *dvbsub_init_decoder(struct dvb_config *cfg); int dvbsub_close_decoder(void **dvb_ctx); diff --git a/src/lib_ccx/matroska.c b/src/lib_ccx/matroska.c index 59f0390f3..9030ec522 100644 --- a/src/lib_ccx/matroska.c +++ b/src/lib_ccx/matroska.c @@ -1173,7 +1173,7 @@ void parse_private_codec_data(struct matroska_ctx *mkv_ctx, char *codec_id_strin memset((void *)&cnf, 0, sizeof(struct dvb_config)); parse_dvb_description(&cnf, codec_data, 8); - dec_ctx->private_data = dvbsub_init_decoder(&cnf, 0); + dec_ctx->private_data = dvbsub_init_decoder(&cnf); free(codec_data); } diff --git a/src/lib_ccx/ts_info.c b/src/lib_ccx/ts_info.c index 5f894e09a..9abbfcd18 100644 --- a/src/lib_ccx/ts_info.c +++ b/src/lib_ccx/ts_info.c @@ -173,7 +173,7 @@ static void *init_private_data(enum ccx_code_type codec) case CCX_CODEC_TELETEXT: return telxcc_init(); case CCX_CODEC_DVB: - return dvbsub_init_decoder(NULL, 0); + return dvbsub_init_decoder(NULL); default: return NULL; } diff --git a/src/lib_ccx/ts_tables.c b/src/lib_ccx/ts_tables.c index 2fd8fd2f9..87630b5e4 100644 --- a/src/lib_ccx/ts_tables.c +++ b/src/lib_ccx/ts_tables.c @@ -399,9 +399,7 @@ int parse_PMT(struct ccx_demuxer *ctx, unsigned char *buf, int len, struct progr ret = parse_dvb_description(&cnf, es_info, desc_len); if (ret < 0) break; - ptr = dvbsub_init_decoder(&cnf, pinfo->initialized_ocr); - if (!pinfo->initialized_ocr) - pinfo->initialized_ocr = 1; + ptr = dvbsub_init_decoder(&cnf); if (ptr == NULL) break; update_capinfo(ctx, elementary_PID, stream_type, CCX_CODEC_DVB, program_number, ptr); diff --git a/src/rust/src/common.rs b/src/rust/src/common.rs index 7f47da23d..89825217d 100755 --- a/src/rust/src/common.rs +++ b/src/rust/src/common.rs @@ -1070,7 +1070,6 @@ impl CType for ProgramInfo { program_info { pid: self.pid, program_number: self.program_number, - initialized_ocr: self.initialized_ocr as c_int, _bitfield_align_1: [], _bitfield_1: bf1, version: self.version, diff --git a/src/rust/src/ctorust.rs b/src/rust/src/ctorust.rs index 6854a109c..5ab729fe3 100755 --- a/src/rust/src/ctorust.rs +++ b/src/rust/src/ctorust.rs @@ -540,7 +540,6 @@ impl FromCType for ProgramInfo { Some(ProgramInfo { pid: info.pid, program_number: info.program_number, - initialized_ocr: info.initialized_ocr != 0, analysed_pmt_once: info._bitfield_1.get_bit(0) as u8, version: info.version, saved_section: info.saved_section, diff --git a/src/rust/src/demuxer/common_types.rs b/src/rust/src/demuxer/common_types.rs index 9fa0d2683..b7dc2a21c 100644 --- a/src/rust/src/demuxer/common_types.rs +++ b/src/rust/src/demuxer/common_types.rs @@ -50,7 +50,6 @@ pub struct FileReport { pub struct ProgramInfo { pub pid: i32, pub program_number: i32, - pub initialized_ocr: bool, // Avoid initializing the OCR more than once pub analysed_pmt_once: u8, // 1-bit field pub version: u8, pub saved_section: [u8; SAVED_SECTIONS_PROGRAMINFO], @@ -301,7 +300,6 @@ impl Default for ProgramInfo { ProgramInfo { pid: -1, program_number: 0, - initialized_ocr: false, analysed_pmt_once: 0, version: 0, saved_section: [0; SAVED_SECTIONS_PROGRAMINFO],