From fd063931ea408f3b0cf308a89895b2929c25bbd9 Mon Sep 17 00:00:00 2001 From: Carlos Fernandez Date: Tue, 23 Dec 2025 14:28:15 +0100 Subject: [PATCH 1/4] feat(teletext): Add multi-page extraction with separate output files (#665) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement support for extracting multiple teletext pages simultaneously, with each page output to a separate file. Changes: - Support multiple --tpage arguments (e.g., --tpage 397 --tpage 398) - Create separate output files per page with _pNNN suffix (e.g., output_p397.srt, output_p398.srt) - Maintain backward compatibility for single-page extraction (no suffix) - Add per-page SRT counters for correct subtitle numbering - Fix BCD to decimal page number conversion in telxcc.c - Add --tpages-all mode support for auto-detecting all pages Tested with 21 teletext samples from the sample platform, all passing. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/lib_ccx/ccx_common_structs.h | 3 + src/lib_ccx/ccx_encoders_common.c | 177 ++++++++++++++++++++++++++++++ src/lib_ccx/ccx_encoders_common.h | 16 +++ src/lib_ccx/ccx_encoders_srt.c | 42 +++++-- src/lib_ccx/lib_ccx.h | 10 +- src/lib_ccx/teletext.h | 33 +++++- src/lib_ccx/telxcc.c | 71 +++++++++++- src/rust/lib_ccxr/src/teletext.rs | 13 ++- src/rust/src/args.rs | 13 ++- src/rust/src/common.rs | 9 ++ src/rust/src/parser.rs | 35 +++++- 11 files changed, 397 insertions(+), 25 deletions(-) diff --git a/src/lib_ccx/ccx_common_structs.h b/src/lib_ccx/ccx_common_structs.h index 2dd51011e..b531b547b 100644 --- a/src/lib_ccx/ccx_common_structs.h +++ b/src/lib_ccx/ccx_common_structs.h @@ -84,6 +84,9 @@ struct cc_subtitle /** Raw PTS value when this subtitle started (for DVB timing) */ LLONG start_pts; + /** Teletext page number (for multi-page extraction, issue #665) */ + uint16_t teletext_page; + struct cc_subtitle *next; struct cc_subtitle *prev; }; diff --git a/src/lib_ccx/ccx_encoders_common.c b/src/lib_ccx/ccx_encoders_common.c index 964f18cd7..306256b1a 100644 --- a/src/lib_ccx/ccx_encoders_common.c +++ b/src/lib_ccx/ccx_encoders_common.c @@ -719,6 +719,9 @@ void dinit_encoder(struct encoder_ctx **arg, LLONG current_fts) write_subtitle_file_footer(ctx, ctx->out + i); } + // Clean up teletext multi-page output files (issue #665) + dinit_teletext_outputs(ctx); + free_encoder_context(ctx->prev); dinit_output_ctx(ctx); freep(&ctx->subline); @@ -838,6 +841,15 @@ struct encoder_ctx *init_encoder(struct encoder_cfg *opt) ctx->segment_last_key_frame = 0; ctx->nospupngocr = opt->nospupngocr; + // Initialize teletext multi-page output arrays (issue #665) + ctx->tlt_out_count = 0; + for (int i = 0; i < MAX_TLT_PAGES_EXTRACT; i++) + { + ctx->tlt_out[i] = NULL; + ctx->tlt_out_pages[i] = 0; + ctx->tlt_srt_counter[i] = 0; + } + ctx->prev = NULL; return ctx; } @@ -1298,3 +1310,168 @@ void switch_output_file(struct lib_ccx_ctx *ctx, struct encoder_ctx *enc_ctx, in enc_ctx->cea_708_counter = 0; enc_ctx->srt_counter = 0; } + +/** + * Get or create the output file for a specific teletext page (issue #665) + * Creates output files on-demand with suffix _pNNN (e.g., output_p891.srt) + * Returns NULL if we're in stdout mode or if too many pages are being extracted + */ +struct ccx_s_write *get_teletext_output(struct encoder_ctx *ctx, uint16_t teletext_page) +{ + // If teletext_page is 0, use the default output + if (teletext_page == 0 || ctx->out == NULL) + return ctx->out; + + // Check if we're sending to stdout - can't do multi-page in that case + if (ctx->out[0].fh == STDOUT_FILENO) + return ctx->out; + + // Check if we already have an output file for this page + for (int i = 0; i < ctx->tlt_out_count; i++) + { + if (ctx->tlt_out_pages[i] == teletext_page) + return ctx->tlt_out[i]; + } + + // If we only have one teletext page requested, use the default output + // (no suffix needed for backward compatibility) + extern struct ccx_s_teletext_config tlt_config; + if (tlt_config.num_user_pages <= 1 && !tlt_config.extract_all_pages) + return ctx->out; + + // Need to create a new output file for this page + if (ctx->tlt_out_count >= MAX_TLT_PAGES_EXTRACT) + { + mprint("Warning: Too many teletext pages to extract (max %d), using default output for page %03d\n", + MAX_TLT_PAGES_EXTRACT, teletext_page); + return ctx->out; + } + + // Allocate the new write structure + struct ccx_s_write *new_out = (struct ccx_s_write *)malloc(sizeof(struct ccx_s_write)); + if (!new_out) + { + mprint("Error: Memory allocation failed for teletext output\n"); + return ctx->out; + } + memset(new_out, 0, sizeof(struct ccx_s_write)); + + // Create the filename with page suffix + const char *ext = get_file_extension(ctx->write_format); + char suffix[16]; + snprintf(suffix, sizeof(suffix), "_p%03d", teletext_page); + + char *basefilename = NULL; + if (ctx->out[0].filename != NULL) + { + basefilename = get_basename(ctx->out[0].filename); + } + else if (ctx->first_input_file != NULL) + { + basefilename = get_basename(ctx->first_input_file); + } + else + { + basefilename = strdup("untitled"); + } + + if (basefilename == NULL) + { + free(new_out); + return ctx->out; + } + + char *filename = create_outfilename(basefilename, suffix, ext); + free(basefilename); + + if (filename == NULL) + { + free(new_out); + return ctx->out; + } + + // Open the file + new_out->filename = filename; + new_out->fh = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IREAD | S_IWRITE); + if (new_out->fh == -1) + { + mprint("Error: Failed to open output file %s: %s\n", filename, strerror(errno)); + free(filename); + free(new_out); + return ctx->out; + } + + mprint("Creating teletext output file: %s\n", filename); + + // Store in our array + int idx = ctx->tlt_out_count; + ctx->tlt_out[idx] = new_out; + ctx->tlt_out_pages[idx] = teletext_page; + ctx->tlt_srt_counter[idx] = 0; + ctx->tlt_out_count++; + + // Write the subtitle file header + write_subtitle_file_header(ctx, new_out); + + return new_out; +} + +/** + * Get the SRT counter for a specific teletext page (issue #665) + * Returns pointer to the counter, or NULL if page not found + */ +unsigned int *get_teletext_srt_counter(struct encoder_ctx *ctx, uint16_t teletext_page) +{ + // If teletext_page is 0, use the default counter + if (teletext_page == 0) + return &ctx->srt_counter; + + // Check if we're using multi-page mode + extern struct ccx_s_teletext_config tlt_config; + if (tlt_config.num_user_pages <= 1 && !tlt_config.extract_all_pages) + return &ctx->srt_counter; + + // Find the counter for this page + for (int i = 0; i < ctx->tlt_out_count; i++) + { + if (ctx->tlt_out_pages[i] == teletext_page) + return &ctx->tlt_srt_counter[i]; + } + + // Not found, use default counter + return &ctx->srt_counter; +} + +/** + * Clean up all teletext output files (issue #665) + */ +void dinit_teletext_outputs(struct encoder_ctx *ctx) +{ + if (!ctx) + return; + + for (int i = 0; i < ctx->tlt_out_count; i++) + { + if (ctx->tlt_out[i] != NULL) + { + // Write footer + write_subtitle_file_footer(ctx, ctx->tlt_out[i]); + + // Close file + if (ctx->tlt_out[i]->fh != -1) + { + close(ctx->tlt_out[i]->fh); + } + + // Free filename + if (ctx->tlt_out[i]->filename != NULL) + { + free(ctx->tlt_out[i]->filename); + } + + free(ctx->tlt_out[i]); + ctx->tlt_out[i] = NULL; + } + } + ctx->tlt_out_count = 0; +} diff --git a/src/lib_ccx/ccx_encoders_common.h b/src/lib_ccx/ccx_encoders_common.h index 4540de681..3a445232e 100644 --- a/src/lib_ccx/ccx_encoders_common.h +++ b/src/lib_ccx/ccx_encoders_common.h @@ -16,6 +16,11 @@ #include "ccx_encoders_structs.h" #include "ccx_common_option.h" +// Maximum number of teletext pages to extract simultaneously (issue #665) +#ifndef MAX_TLT_PAGES_EXTRACT +#define MAX_TLT_PAGES_EXTRACT 8 +#endif + #define REQUEST_BUFFER_CAPACITY(ctx, length) \ if (length > ctx->capacity) \ { \ @@ -169,6 +174,12 @@ struct encoder_ctx // OCR in SPUPNG int nospupngocr; + + // Teletext multi-page output (issue #665) + struct ccx_s_write *tlt_out[MAX_TLT_PAGES_EXTRACT]; // Output files per teletext page + uint16_t tlt_out_pages[MAX_TLT_PAGES_EXTRACT]; // Page numbers for each output slot + unsigned int tlt_srt_counter[MAX_TLT_PAGES_EXTRACT]; // SRT counter per page + int tlt_out_count; // Number of teletext output files }; #define INITIAL_ENC_BUFFER_CAPACITY 2048 @@ -263,4 +274,9 @@ unsigned int get_font_encoded(struct encoder_ctx *ctx, unsigned char *buffer, in struct lib_ccx_ctx; void switch_output_file(struct lib_ccx_ctx *ctx, struct encoder_ctx *enc_ctx, int track_id); + +// Teletext multi-page output (issue #665) +struct ccx_s_write *get_teletext_output(struct encoder_ctx *ctx, uint16_t teletext_page); +unsigned int *get_teletext_srt_counter(struct encoder_ctx *ctx, uint16_t teletext_page); +void dinit_teletext_outputs(struct encoder_ctx *ctx); #endif diff --git a/src/lib_ccx/ccx_encoders_srt.c b/src/lib_ccx/ccx_encoders_srt.c index 1f76e3203..865c6e229 100644 --- a/src/lib_ccx/ccx_encoders_srt.c +++ b/src/lib_ccx/ccx_encoders_srt.c @@ -6,9 +6,10 @@ #include "ocr.h" #include "ccextractor.h" -/* The timing here is not PTS based, but output based, i.e. user delay must be accounted for - if there is any */ -int write_stringz_as_srt(char *string, struct encoder_ctx *context, LLONG ms_start, LLONG ms_end) +/* Helper function to write SRT to a specific output file (issue #665 - teletext multi-page) + Takes output file descriptor and counter pointer as parameters */ +static int write_stringz_as_srt_to_output(char *string, struct encoder_ctx *context, LLONG ms_start, LLONG ms_end, + int out_fh, unsigned int *srt_counter) { int used; unsigned h1, m1, s1, ms1; @@ -20,17 +21,17 @@ int write_stringz_as_srt(char *string, struct encoder_ctx *context, LLONG ms_sta millis_to_time(ms_start, &h1, &m1, &s1, &ms1); millis_to_time(ms_end - 1, &h2, &m2, &s2, &ms2); // -1 To prevent overlapping with next line. - context->srt_counter++; - snprintf(timeline, sizeof(timeline), "%u%s", context->srt_counter, context->encoded_crlf); + (*srt_counter)++; + snprintf(timeline, sizeof(timeline), "%u%s", *srt_counter, context->encoded_crlf); used = encode_line(context, context->buffer, (unsigned char *)timeline); - write_wrapped(context->out->fh, context->buffer, used); + write_wrapped(out_fh, context->buffer, used); snprintf(timeline, sizeof(timeline), "%02u:%02u:%02u,%03u --> %02u:%02u:%02u,%03u%s", h1, m1, s1, ms1, h2, m2, s2, ms2, context->encoded_crlf); used = encode_line(context, context->buffer, (unsigned char *)timeline); dbg_print(CCX_DMT_DECODER_608, "\n- - - SRT caption - - -\n"); dbg_print(CCX_DMT_DECODER_608, "%s", timeline); - write_wrapped(context->out->fh, context->buffer, used); + write_wrapped(out_fh, context->buffer, used); int len = strlen(string); unsigned char *unescaped = (unsigned char *)malloc(len + 1); if (!unescaped) @@ -69,20 +70,28 @@ int write_stringz_as_srt(char *string, struct encoder_ctx *context, LLONG ms_sta dbg_print(CCX_DMT_DECODER_608, "\r"); dbg_print(CCX_DMT_DECODER_608, "%s\n", context->subline); } - write_wrapped(context->out->fh, el, u); - write_wrapped(context->out->fh, context->encoded_crlf, context->encoded_crlf_length); + write_wrapped(out_fh, el, u); + write_wrapped(out_fh, context->encoded_crlf, context->encoded_crlf_length); begin += strlen((const char *)begin) + 1; } dbg_print(CCX_DMT_DECODER_608, "- - - - - - - - - - - -\r\n"); - write_wrapped(context->out->fh, context->encoded_crlf, context->encoded_crlf_length); + write_wrapped(out_fh, context->encoded_crlf, context->encoded_crlf_length); free(el); free(unescaped); return 0; } +/* The timing here is not PTS based, but output based, i.e. user delay must be accounted for + if there is any */ +int write_stringz_as_srt(char *string, struct encoder_ctx *context, LLONG ms_start, LLONG ms_end) +{ + return write_stringz_as_srt_to_output(string, context, ms_start, ms_end, + context->out->fh, &context->srt_counter); +} + int write_cc_bitmap_as_srt(struct cc_subtitle *sub, struct encoder_ctx *context) { int ret = 0; @@ -155,7 +164,18 @@ int write_cc_subtitle_as_srt(struct cc_subtitle *sub, struct encoder_ctx *contex { if (sub->type == CC_TEXT) { - ret = write_stringz_as_srt(sub->data, context, sub->start_time, sub->end_time); + // For teletext multi-page extraction (issue #665), use page-specific output + struct ccx_s_write *out = get_teletext_output(context, sub->teletext_page); + unsigned int *counter = get_teletext_srt_counter(context, sub->teletext_page); + if (out && counter) + { + ret = write_stringz_as_srt_to_output(sub->data, context, sub->start_time, sub->end_time, + out->fh, counter); + } + else + { + ret = write_stringz_as_srt(sub->data, context, sub->start_time, sub->end_time); + } freep(&sub->data); sub->nb_data = 0; ret = 1; diff --git a/src/lib_ccx/lib_ccx.h b/src/lib_ccx/lib_ccx.h index 426b7c306..b38519ad3 100644 --- a/src/lib_ccx/lib_ccx.h +++ b/src/lib_ccx/lib_ccx.h @@ -43,17 +43,23 @@ struct file_report }; // Stuff for telxcc.c +#define MAX_TLT_PAGES_EXTRACT 8 // Maximum number of teletext pages to extract simultaneously + struct ccx_s_teletext_config { uint8_t verbose : 1; // should telxcc be verbose? - uint16_t page; // teletext page containing cc we want to filter + uint16_t page; // teletext page containing cc we want to filter (legacy, first page) uint16_t tid; // 13-bit packet ID for teletext stream double offset; // time offset in seconds uint8_t bom : 1; // print UTF-8 BOM characters at the beginning of output uint8_t nonempty : 1; // produce at least one (dummy) frame // uint8_t se_mode : 1; // search engine compatible mode => Uses CCExtractor's write_format // uint64_t utc_refvalue; // UTC referential value => Moved to ccx_decoders_common, so can be used for other decoders (608/xds) too - uint16_t user_page; // Page selected by user, which MIGHT be different to 'page' depending on autodetection stuff + uint16_t user_page; // Page selected by user (legacy, first page) + // Multi-page teletext extraction (issue #665) + uint16_t user_pages[MAX_TLT_PAGES_EXTRACT]; // Pages selected by user for extraction + int num_user_pages; // Number of pages to extract (0 = auto-detect single page) + int extract_all_pages; // If 1, extract all detected subtitle pages int dolevdist; // 0=Don't attempt to correct errors int levdistmincnt, levdistmaxpct; // Means 2 fails or less is "the same", 10% or less is also "the same" struct ccx_boundary_time extraction_start, extraction_end; // Segment we actually process diff --git a/src/lib_ccx/teletext.h b/src/lib_ccx/teletext.h index 41f850692..79dcbed5b 100644 --- a/src/lib_ccx/teletext.h +++ b/src/lib_ccx/teletext.h @@ -8,6 +8,7 @@ // #include #define MAX_TLT_PAGES 1000 +#define MAX_TLT_PAGES_EXTRACT 8 // Maximum pages to extract simultaneously (must match lib_ccx.h) typedef struct { @@ -18,6 +19,28 @@ typedef struct uint8_t tainted; // 1 = text variable contains any data } teletext_page_t; +// Per-page state for multi-page extraction (issue #665) +typedef struct +{ + uint16_t page_number; // BCD-encoded page number (0 = unused slot) + teletext_page_t page_buffer; // Current page content being received + char *page_buffer_prev; // Previous formatted output + char *page_buffer_cur; // Current formatted output + unsigned page_buffer_cur_size; + unsigned page_buffer_cur_used; + unsigned page_buffer_prev_size; + unsigned page_buffer_prev_used; + uint64_t *ucs2_buffer_prev; // Previous comparison string + uint64_t *ucs2_buffer_cur; // Current comparison string + unsigned ucs2_buffer_cur_size; + unsigned ucs2_buffer_cur_used; + unsigned ucs2_buffer_prev_size; + unsigned ucs2_buffer_prev_used; + uint64_t prev_hide_timestamp; + uint64_t prev_show_timestamp; + uint8_t receiving_data; // Currently receiving data for this page +} teletext_page_state_t; + // application states -- flags for notices that should be printed only once struct s_states { @@ -62,8 +85,14 @@ struct TeletextCtx char millis_separator; uint32_t global_timestamp; - // Current and previous page buffers. This is the output written to file when - // the time comes. + // Multi-page extraction state (issue #665) + teletext_page_state_t page_states[MAX_TLT_PAGES_EXTRACT]; // Per-page state + int num_active_pages; // Number of pages being extracted + int current_page_idx; // Index of page currently receiving data (-1 = none) + int multi_page_mode; // 1 = multi-page mode active + + // Current and previous page buffers (legacy single-page mode) + // These are still used when multi_page_mode == 0 for backward compatibility teletext_page_t page_buffer; char *page_buffer_prev; char *page_buffer_cur; diff --git a/src/lib_ccx/telxcc.c b/src/lib_ccx/telxcc.c index e28091684..74560ecac 100644 --- a/src/lib_ccx/telxcc.c +++ b/src/lib_ccx/telxcc.c @@ -538,6 +538,13 @@ void telxcc_dump_prev_page(struct TeletextCtx *ctx, struct cc_subtitle *sub) add_cc_sub_text(sub, ctx->page_buffer_prev, ctx->prev_show_timestamp, ctx->prev_hide_timestamp, info, "TLT", CCX_ENC_UTF_8); + // Set teletext page number for multi-page extraction (issue #665) + // Find the last subtitle node and set its teletext_page (in decimal format) + struct cc_subtitle *last_sub = sub; + while (last_sub->next) + last_sub = last_sub->next; + last_sub->teletext_page = bcd_page_to_int(tlt_config.page); + if (ctx->page_buffer_prev) free(ctx->page_buffer_prev); if (ctx->ucs2_buffer_prev) @@ -875,6 +882,13 @@ void process_page(struct TeletextCtx *ctx, teletext_page_t *page, struct cc_subt default: add_cc_sub_text(sub, ctx->page_buffer_cur, page->show_timestamp, page->hide_timestamp + 1, NULL, "TLT", CCX_ENC_UTF_8); + // Set teletext page number for multi-page extraction (issue #665) + { + struct cc_subtitle *last_sub = sub; + while (last_sub->next) + last_sub = last_sub->next; + last_sub->teletext_page = bcd_page_to_int(tlt_config.page); + } } // Also update GUI... @@ -886,6 +900,40 @@ void process_page(struct TeletextCtx *ctx, teletext_page_t *page, struct cc_subt fflush(stderr); } +// Helper function to check if a page should be accepted for extraction (issue #665) +// Returns 1 if the page should be accepted, 0 otherwise +static int should_accept_page(uint16_t page_number, int is_subtitle_page) +{ + // If extract_all_pages is set, accept all subtitle pages + if (tlt_config.extract_all_pages && is_subtitle_page) + return 1; + + // If multiple pages are specified, check against the list + if (tlt_config.num_user_pages > 0) + { + // Convert BCD page_number to decimal for comparison + int page_dec = bcd_page_to_int(page_number); + for (int i = 0; i < tlt_config.num_user_pages && i < MAX_TLT_PAGES_EXTRACT; i++) + { + if (tlt_config.user_pages[i] == page_dec) + return 1; + } + return 0; + } + + // Legacy single-page mode: check against tlt_config.page + if (tlt_config.page == 0) // Auto-detect mode + return is_subtitle_page; + + return (page_number == tlt_config.page); +} + +// Check if we're in multi-page extraction mode +static int is_multi_page_mode(void) +{ + return (tlt_config.extract_all_pages || tlt_config.num_user_pages > 1); +} + void process_telx_packet(struct TeletextCtx *ctx, data_unit_t data_unit_id, teletext_packet_payload_t *packet, uint64_t timestamp, struct cc_subtitle *sub) { // variable names conform to ETS 300 706, chapter 7.1.2 @@ -923,7 +971,8 @@ void process_telx_packet(struct TeletextCtx *ctx, data_unit_t data_unit_id, tele } } } - if ((tlt_config.page == 0) && (flag_subtitle == YES) && (i < 0xff)) + // Auto-detect page if none specified (and not in extract_all mode) + if ((tlt_config.page == 0) && !tlt_config.extract_all_pages && (tlt_config.num_user_pages == 0) && (flag_subtitle == YES) && (i < 0xff)) { tlt_config.page = (m << 8) | (unham_8_4(packet->data[1]) << 4) | unham_8_4(packet->data[0]); mprint("- No teletext page specified, first received suitable page is %03x, not guaranteed\n", tlt_config.page); @@ -949,18 +998,34 @@ void process_telx_packet(struct TeletextCtx *ctx, data_unit_t data_unit_id, tele if ((ctx->transmission_mode == TRANSMISSION_MODE_PARALLEL) && (data_unit_id != DATA_UNIT_EBU_TELETEXT_SUBTITLE) && !(de_ctr && flag_subtitle && ctx->receiving_data == YES)) return; + // Check if this page should be accepted for extraction (issue #665) + int accept_this_page = should_accept_page(page_number, flag_subtitle); + + // Handle page transition - if we were receiving a different page, stop if ((ctx->receiving_data == YES) && (((ctx->transmission_mode == TRANSMISSION_MODE_SERIAL) && (PAGE(page_number) != PAGE(tlt_config.page))) || ((ctx->transmission_mode == TRANSMISSION_MODE_PARALLEL) && (PAGE(page_number) != PAGE(tlt_config.page)) && (m == MAGAZINE(tlt_config.page))))) { ctx->receiving_data = NO; if (!(de_ctr && flag_subtitle)) - return; + { + // In multi-page mode, check if this new page should be accepted + if (!accept_this_page) + return; + } } // Page transmission is terminated, however now we are waiting for our new page - if (page_number != tlt_config.page && !(de_ctr && flag_subtitle && ctx->receiving_data == YES)) + // Modified for multi-page support (issue #665) + if (!accept_this_page && !(de_ctr && flag_subtitle && ctx->receiving_data == YES)) return; + // Update tlt_config.page to track the current page being received + // This is needed so process_page knows which page to tag the output with + if (accept_this_page && page_number != tlt_config.page) + { + tlt_config.page = page_number; + } + // Now we have the begining of page transmission; if there is page_buffer pending, process it if (ctx->page_buffer.tainted == YES) { diff --git a/src/rust/lib_ccxr/src/teletext.rs b/src/rust/lib_ccxr/src/teletext.rs index 03d08c79d..cb8314426 100644 --- a/src/rust/lib_ccxr/src/teletext.rs +++ b/src/rust/lib_ccxr/src/teletext.rs @@ -721,15 +721,22 @@ pub struct TeletextPage { tainted: bool, // true = text variable contains any data } +/// Maximum number of teletext pages to extract simultaneously (must match C MAX_TLT_PAGES_EXTRACT) +pub const MAX_TLT_PAGES_EXTRACT: usize = 8; + /// Settings required to contruct a [`TeletextContext`]. #[derive(Debug)] pub struct TeletextConfig { /// should telxcc logging be verbose? pub verbose: bool, - /// teletext page containing cc we want to filter + /// teletext page containing cc we want to filter (legacy single page) pub page: Cell, - /// Page selected by user, which MIGHT be different to `page` depending on autodetection stuff + /// Page selected by user (legacy single page) pub user_page: u16, + /// Pages selected by user for multi-page extraction (issue #665) + pub user_pages: Vec, + /// Extract all detected subtitle pages + pub extract_all_pages: bool, /// false = Don't attempt to correct errors pub dolevdist: bool, /// Means 2 fails or less is "the same" @@ -757,6 +764,8 @@ impl Default for TeletextConfig { verbose: true, page: TeletextPageNumber(0).into(), user_page: 0, + user_pages: Vec::new(), + extract_all_pages: false, dolevdist: false, levdistmincnt: 0, levdistmaxpct: 0, diff --git a/src/rust/src/args.rs b/src/rust/src/args.rs index 8194d9e00..f802281df 100644 --- a/src/rust/src/args.rs +++ b/src/rust/src/args.rs @@ -821,8 +821,17 @@ pub struct Args { /// Use this page for subtitles (if this parameter /// is not used, try to autodetect). In Spain the /// page is always 888, may vary in other countries. - #[arg(long, verbatim_doc_comment, value_name="page", help_heading=TELETEXT_OPTIONS)] - pub tpage: Option, + /// You can specify multiple pages by using --tpage + /// multiple times (e.g., --tpage 891 --tpage 892). + /// Each page will be output to a separate file with + /// suffix _pNNN (e.g., output_p891.srt, output_p892.srt). + #[arg(long, verbatim_doc_comment, value_name="page", action = clap::ArgAction::Append, help_heading=TELETEXT_OPTIONS)] + pub tpage: Option>, + /// Extract all teletext subtitle pages found in the stream. + /// Each page will be output to a separate file with + /// suffix _pNNN (e.g., output_p891.srt, output_p892.srt). + #[arg(long, verbatim_doc_comment, help_heading=TELETEXT_OPTIONS)] + pub tpages_all: bool, /// Enable verbose mode in the teletext decoder. #[arg(long, verbatim_doc_comment, help_heading=TELETEXT_OPTIONS)] pub tverbose: bool, diff --git a/src/rust/src/common.rs b/src/rust/src/common.rs index e582fea89..e5af90d4d 100755 --- a/src/rust/src/common.rs +++ b/src/rust/src/common.rs @@ -554,6 +554,12 @@ unsafe fn c_char_to_string(c_str: *const ::std::os::raw::c_char) -> String { } impl CType2 for TeletextConfig { unsafe fn to_ctype(&self, value: &Options) -> ccx_s_teletext_config { + // Initialize user_pages array (issue #665) + let mut user_pages_arr = [0u16; 8]; // MAX_TLT_PAGES_EXTRACT = 8 + for (i, &page) in self.user_pages.iter().take(8).enumerate() { + user_pages_arr[i] = page; + } + let mut config = ccx_s_teletext_config { _bitfield_1: Default::default(), _bitfield_2: Default::default(), @@ -563,6 +569,9 @@ impl CType2 for TeletextConfig { tid: 0, offset: 0.0, user_page: self.user_page, + user_pages: user_pages_arr, + num_user_pages: self.user_pages.len().min(8) as i32, + extract_all_pages: self.extract_all_pages.into(), dolevdist: self.dolevdist.into(), levdistmincnt: self.levdistmincnt.into(), levdistmaxpct: self.levdistmaxpct.into(), diff --git a/src/rust/src/parser.rs b/src/rust/src/parser.rs index bfa97c9e7..524ab8a7f 100644 --- a/src/rust/src/parser.rs +++ b/src/rust/src/parser.rs @@ -1201,9 +1201,29 @@ impl OptionsExt for Options { } } - if let Some(ref tpage) = args.tpage { - tlt_config.user_page = get_atoi_hex::(tpage.as_str()) as _; - tlt_config.page = Cell::new(TeletextPageNumber::from(tlt_config.user_page)); + if let Some(ref tpages) = args.tpage { + // Support multiple --tpage arguments (issue #665) + if tpages.len() == 1 { + // Single page - legacy mode + tlt_config.user_page = tpages[0]; + tlt_config.page = Cell::new(TeletextPageNumber::from(tlt_config.user_page)); + } else { + // Multiple pages - each gets a separate output file + for &page_num in tpages { + if page_num >= 100 && page_num <= 899 { + tlt_config.user_pages.push(page_num); + } + } + // Set first page as legacy value for backward compatibility + if !tlt_config.user_pages.is_empty() { + tlt_config.user_page = tlt_config.user_pages[0]; + tlt_config.page = Cell::new(TeletextPageNumber::from(tlt_config.user_page)); + } + } + } + + if args.tpages_all { + tlt_config.extract_all_pages = true; } // Red Hen/ UCLA Specific stuff @@ -1484,6 +1504,15 @@ impl OptionsExt for Options { "Teletext page number out of range (100-899)" ); } + // Validate multiple pages if specified (issue #665) + for page in &tlt_config.user_pages { + if *page < 100 || *page > 899 { + fatal!( + cause = ExitCause::NotClassified; + "Teletext page number {} out of range (100-899)", page + ); + } + } if self.is_inputfile_empty() && self.input_source == DataSource::File { fatal!( From cbb5f0b0a8fede21a62685eee4fd20b30b38a1cd Mon Sep 17 00:00:00 2001 From: Carlos Fernandez Date: Tue, 23 Dec 2025 14:41:18 +0100 Subject: [PATCH 2/4] fix(clippy): Use RangeInclusive::contains() instead of manual range check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/rust/src/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/src/parser.rs b/src/rust/src/parser.rs index 524ab8a7f..2a28e32d7 100644 --- a/src/rust/src/parser.rs +++ b/src/rust/src/parser.rs @@ -1210,7 +1210,7 @@ impl OptionsExt for Options { } else { // Multiple pages - each gets a separate output file for &page_num in tpages { - if page_num >= 100 && page_num <= 899 { + if (100..=899).contains(&page_num) { tlt_config.user_pages.push(page_num); } } From 1d9f32239ec16d9f69384f8e58d8dc13db2226bd Mon Sep 17 00:00:00 2001 From: Carlos Fernandez Date: Tue, 23 Dec 2025 15:43:54 +0100 Subject: [PATCH 3/4] docs: Add doxygen comments to should_accept_page function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/lib_ccx/telxcc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/lib_ccx/telxcc.c b/src/lib_ccx/telxcc.c index 74560ecac..becb0c1ab 100644 --- a/src/lib_ccx/telxcc.c +++ b/src/lib_ccx/telxcc.c @@ -900,8 +900,12 @@ void process_page(struct TeletextCtx *ctx, teletext_page_t *page, struct cc_subt fflush(stderr); } -// Helper function to check if a page should be accepted for extraction (issue #665) -// Returns 1 if the page should be accepted, 0 otherwise +/** + * Helper function to check if a page should be accepted for extraction (issue #665) + * @param page_number The teletext page number in BCD format + * @param is_subtitle_page Whether this page is marked as a subtitle page + * @return 1 if the page should be accepted, 0 otherwise + */ static int should_accept_page(uint16_t page_number, int is_subtitle_page) { // If extract_all_pages is set, accept all subtitle pages From be239a5c466a1b7a88ccd80eb769c32cb1f3d18b Mon Sep 17 00:00:00 2001 From: Carlos Fernandez Date: Tue, 23 Dec 2025 16:36:02 +0100 Subject: [PATCH 4/4] fix: Restore teletext auto-detect mode for single-page extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The page update logic at line 1029-1035 was incorrectly updating tlt_config.page for all accepted pages, even in single-page auto-detect mode. This caused the auto-detect logic at line 979 to be bypassed because the first packet (even with an invalid page number like 0xFF) would set tlt_config.page, preventing proper auto-detection. The fix restricts the page update to multi-page mode only. In single-page mode, tlt_config.page is set exclusively by: 1. User specification (--tpage option) 2. Auto-detect logic (first valid subtitle page found) This fixes regression in SP Test 76 which uses sample 8c1615c1a84d4b9b34134bde8085214bb93305407e935edcdfd4c2fc522c215f.mpg with --autoprogram --out=ttxt --latin1. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/lib_ccx/telxcc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/lib_ccx/telxcc.c b/src/lib_ccx/telxcc.c index becb0c1ab..fb32d9dda 100644 --- a/src/lib_ccx/telxcc.c +++ b/src/lib_ccx/telxcc.c @@ -1023,9 +1023,10 @@ void process_telx_packet(struct TeletextCtx *ctx, data_unit_t data_unit_id, tele if (!accept_this_page && !(de_ctr && flag_subtitle && ctx->receiving_data == YES)) return; - // Update tlt_config.page to track the current page being received - // This is needed so process_page knows which page to tag the output with - if (accept_this_page && page_number != tlt_config.page) + // Update tlt_config.page to track the current page being received (multi-page mode only) + // In single-page mode, tlt_config.page is set by auto-detect logic or user specification + // This prevents overwriting auto-detect selection with an arbitrary page number + if (is_multi_page_mode() && accept_this_page && page_number != tlt_config.page) { tlt_config.page = page_number; }