diff --git a/linux/Makefile.am b/linux/Makefile.am index 3019c7e03..eaf728dca 100644 --- a/linux/Makefile.am +++ b/linux/Makefile.am @@ -151,6 +151,8 @@ ccextractor_SOURCES = \ ../src/lib_ccx/list.h \ ../src/lib_ccx/matroska.c \ ../src/lib_ccx/matroska.h \ + ../src/lib_ccx/vobsub_decoder.c \ + ../src/lib_ccx/vobsub_decoder.h \ ../src/lib_ccx/mp4.c \ ../src/lib_ccx/myth.c \ ../src/lib_ccx/networking.c \ diff --git a/mac/Makefile.am b/mac/Makefile.am index 3b98d4490..2874b97f6 100644 --- a/mac/Makefile.am +++ b/mac/Makefile.am @@ -123,6 +123,8 @@ ccextractor_SOURCES = \ ../src/lib_ccx/list.h \ ../src/lib_ccx/matroska.c \ ../src/lib_ccx/matroska.h \ + ../src/lib_ccx/vobsub_decoder.c \ + ../src/lib_ccx/vobsub_decoder.h \ ../src/lib_ccx/mp4.c \ ../src/lib_ccx/myth.c \ ../src/lib_ccx/networking.c \ diff --git a/src/lib_ccx/matroska.c b/src/lib_ccx/matroska.c index 1b5cfe912..59f0390f3 100644 --- a/src/lib_ccx/matroska.c +++ b/src/lib_ccx/matroska.c @@ -6,6 +6,7 @@ #include #include #include "dvb_subtitle_decoder.h" +#include "vobsub_decoder.h" void skip_bytes(FILE *file, ULLONG n) { @@ -1426,6 +1427,114 @@ static void generate_vobsub_timestamp(char *buf, size_t bufsize, ULLONG millisec hours, minutes, seconds, ms); } +/* Check if output format is text-based (requires OCR for bitmap subtitles) */ +static int is_text_output_format(enum ccx_output_format format) +{ + return (format == CCX_OF_SRT || format == CCX_OF_SSA || + format == CCX_OF_WEBVTT || format == CCX_OF_TRANSCRIPT || + format == CCX_OF_SAMI || format == CCX_OF_SMPTETT); +} + +/* VOBSUB support: Process VOBSUB track with OCR and output text format */ +static void process_vobsub_track_ocr(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *track) +{ + if (track->sentence_count == 0) + { + mprint("\nNo VOBSUB subtitles to process"); + return; + } + + /* Check if OCR is available */ + if (!vobsub_ocr_available()) + { + fatal(EXIT_NOT_CLASSIFIED, + "VOBSUB to text conversion requires OCR support.\n" + "Please rebuild CCExtractor with -DWITH_OCR=ON or use raw output (--out=idx)"); + } + + /* Initialize VOBSUB decoder */ + struct vobsub_ctx *vob_ctx = init_vobsub_decoder(); + if (!vob_ctx) + { + fatal(EXIT_NOT_CLASSIFIED, + "VOBSUB to text conversion requires OCR, but initialization failed.\n" + "Please ensure Tesseract is installed with language data."); + } + + /* Parse palette from track header (CodecPrivate) */ + if (track->header) + { + vobsub_parse_palette(vob_ctx, track->header); + } + + mprint("\nProcessing VOBSUB track with OCR (%d subtitles)", track->sentence_count); + + /* Get encoder context for output */ + struct encoder_ctx *enc_ctx = update_encoder_list(mkv_ctx->ctx); + + /* Process each subtitle */ + for (int i = 0; i < track->sentence_count; i++) + { + struct matroska_sub_sentence *sentence = track->sentences[i]; + mkv_ctx->sentence_count++; + + /* Calculate end time (use next subtitle start if not specified) */ + ULLONG end_time = sentence->time_end; + if (end_time == 0 && i + 1 < track->sentence_count) + { + end_time = track->sentences[i + 1]->time_start - 1; + } + else if (end_time == 0) + { + end_time = sentence->time_start + 5000; /* Default 5 second duration */ + } + + /* Decode SPU and run OCR */ + struct cc_subtitle sub; + memset(&sub, 0, sizeof(sub)); + + int ret = vobsub_decode_spu(vob_ctx, + (unsigned char *)sentence->text, + sentence->text_size, + sentence->time_start, + end_time, + &sub); + + if (ret == 0 && sub.got_output) + { + /* Encode the subtitle to output format */ + encode_sub(enc_ctx, &sub); + + /* Free subtitle data */ + if (sub.data) + { + struct cc_bitmap *rect = (struct cc_bitmap *)sub.data; + for (int j = 0; j < sub.nb_data; j++) + { + if (rect[j].data0) + free(rect[j].data0); + if (rect[j].data1) + free(rect[j].data1); +#ifdef ENABLE_OCR + if (rect[j].ocr_text) + free(rect[j].ocr_text); +#endif + } + free(sub.data); + } + } + + /* Progress indicator */ + if ((i + 1) % 50 == 0 || i + 1 == track->sentence_count) + { + mprint("\rProcessing VOBSUB: %d/%d subtitles", i + 1, track->sentence_count); + } + } + + delete_vobsub_decoder(&vob_ctx); + mprint("\nVOBSUB OCR processing complete"); +} + /* VOBSUB support: Save VOBSUB track to .idx and .sub files */ #define VOBSUB_BLOCK_SIZE 2048 static void save_vobsub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *track) @@ -1564,10 +1673,21 @@ void save_sub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *tra char *filename; int desc; - // VOBSUB tracks need special handling - separate .idx and .sub files + // VOBSUB tracks need special handling if (track->codec_id == MATROSKA_TRACK_SUBTITLE_CODEC_ID_VOBSUB) { - save_vobsub_track(mkv_ctx, track); + // Check if user wants text output (SRT, SSA, WebVTT, etc.) + if (ccx_options.write_format_rewritten && + is_text_output_format(ccx_options.enc_cfg.write_format)) + { + // Use OCR to convert VOBSUB to text + process_vobsub_track_ocr(mkv_ctx, track); + } + else + { + // Output raw idx/sub files + save_vobsub_track(mkv_ctx, track); + } return; } @@ -1846,8 +1966,13 @@ int matroska_loop(struct lib_ccx_ctx *ctx) { if (ccx_options.write_format_rewritten) { - mprint(MATROSKA_WARNING "You are using --out=, but Matroska parser extract subtitles in a recorded format\n"); - mprint("--out= will be ignored\n"); + /* Note: For VOBSUB tracks, text output formats (SRT, SSA, etc.) are + * supported via OCR. For other subtitle types, the native format is used. */ + if (!is_text_output_format(ccx_options.enc_cfg.write_format)) + { + mprint(MATROSKA_WARNING "You are using --out=, but Matroska parser extracts subtitles in their recorded format\n"); + mprint("--out= will be ignored for non-VOBSUB tracks\n"); + } } // Don't need generated input file diff --git a/src/lib_ccx/mp4.c b/src/lib_ccx/mp4.c index aad0c460f..6f8392a4b 100644 --- a/src/lib_ccx/mp4.c +++ b/src/lib_ccx/mp4.c @@ -12,6 +12,7 @@ #include "ccx_mp4.h" #include "activity.h" #include "ccx_dtvcc.h" +#include "vobsub_decoder.h" #define MEDIA_TYPE(type, subtype) (((u64)(type) << 32) + (subtype)) @@ -25,6 +26,11 @@ #define GF_ISOM_SUBTYPE_HVC1 GF_4CC('h', 'v', 'c', '1') #endif +// VOBSUB subtype (mp4s or MPEG) +#ifndef GF_ISOM_SUBTYPE_MPEG4 +#define GF_ISOM_SUBTYPE_MPEG4 GF_4CC('M', 'P', 'E', 'G') +#endif + static short bswap16(short v) { return ((v >> 8) & 0x00FF) | ((v << 8) & 0xFF00); @@ -410,6 +416,144 @@ static int process_hevc_track(struct lib_ccx_ctx *ctx, const char *basename, GF_ return status; } +static int process_vobsub_track(struct lib_ccx_ctx *ctx, GF_ISOFile *f, u32 track, struct cc_subtitle *sub) +{ + u32 timescale, i, sample_count; + int status = 0; + struct lib_cc_decode *dec_ctx = NULL; + struct encoder_ctx *enc_ctx = NULL; + struct vobsub_ctx *vob_ctx = NULL; + + dec_ctx = update_decoder_list(ctx); + enc_ctx = update_encoder_list(ctx); + + if ((sample_count = gf_isom_get_sample_count(f, track)) < 1) + { + return 0; + } + + timescale = gf_isom_get_media_timescale(f, track); + + /* Check if OCR is available */ + if (!vobsub_ocr_available()) + { + fatal(EXIT_NOT_CLASSIFIED, + "VOBSUB to text conversion requires OCR support.\n" + "Please rebuild CCExtractor with -DWITH_OCR=ON"); + } + + /* Initialize VOBSUB decoder */ + vob_ctx = init_vobsub_decoder(); + if (!vob_ctx) + { + fatal(EXIT_NOT_CLASSIFIED, + "VOBSUB decoder initialization failed.\n" + "Please ensure Tesseract is installed with language data."); + } + + /* Try to get decoder config for palette info */ + GF_GenericSampleDescription *gdesc = gf_isom_get_generic_sample_description(f, track, 1); + if (gdesc && gdesc->extension_buf && gdesc->extension_buf_size > 0) + { + /* The extension buffer may contain an idx-like header with palette */ + char *header = malloc(gdesc->extension_buf_size + 1); + if (header) + { + memcpy(header, gdesc->extension_buf, gdesc->extension_buf_size); + header[gdesc->extension_buf_size] = '\0'; + vobsub_parse_palette(vob_ctx, header); + free(header); + } + } + if (gdesc) + free(gdesc); + + mprint("Processing VOBSUB track (%u samples)\n", sample_count); + + for (i = 0; i < sample_count; i++) + { + u32 sdi; + GF_ISOSample *s = gf_isom_get_sample(f, track, i + 1, &sdi); + + if (s != NULL) + { + s32 signed_cts = (s32)s->CTS_Offset; + LLONG start_time_ms = (LLONG)((s->DTS + signed_cts) * 1000) / timescale; + + /* Calculate end time from next sample if available */ + LLONG end_time_ms = 0; + if (i + 1 < sample_count) + { + u32 next_sdi; + GF_ISOSample *next_s = gf_isom_get_sample(f, track, i + 2, &next_sdi); + if (next_s) + { + s32 next_signed_cts = (s32)next_s->CTS_Offset; + end_time_ms = (LLONG)((next_s->DTS + next_signed_cts) * 1000) / timescale; + gf_isom_sample_del(&next_s); + } + } + if (end_time_ms == 0) + end_time_ms = start_time_ms + 5000; /* Default 5 second duration */ + + set_current_pts(dec_ctx->timing, (s->DTS + signed_cts) * MPEG_CLOCK_FREQ / timescale); + set_fts(dec_ctx->timing); + + /* Decode SPU and run OCR */ + struct cc_subtitle vob_sub; + memset(&vob_sub, 0, sizeof(vob_sub)); + + int ret = vobsub_decode_spu(vob_ctx, + (unsigned char *)s->data, s->dataLength, + start_time_ms, end_time_ms, + &vob_sub); + + if (ret == 0 && vob_sub.got_output) + { + /* Encode the subtitle to output format */ + encode_sub(enc_ctx, &vob_sub); + sub->got_output = 1; + + /* Free subtitle data */ + if (vob_sub.data) + { + struct cc_bitmap *rect = (struct cc_bitmap *)vob_sub.data; + for (int j = 0; j < vob_sub.nb_data; j++) + { + if (rect[j].data0) + free(rect[j].data0); + if (rect[j].data1) + free(rect[j].data1); +#ifdef ENABLE_OCR + if (rect[j].ocr_text) + free(rect[j].ocr_text); +#endif + } + free(vob_sub.data); + } + } + + gf_isom_sample_del(&s); + } + + int progress = (int)((i * 100) / sample_count); + if (ctx->last_reported_progress != progress) + { + int cur_sec = (int)(get_fts(dec_ctx->timing, dec_ctx->current_field) / 1000); + activity_progress(progress, cur_sec / 60, cur_sec % 60); + ctx->last_reported_progress = progress; + } + } + + int cur_sec = (int)(get_fts(dec_ctx->timing, dec_ctx->current_field) / 1000); + activity_progress(100, cur_sec / 60, cur_sec % 60); + + delete_vobsub_decoder(&vob_ctx); + mprint("VOBSUB processing complete\n"); + + return status; +} + static char *format_duration(u64 dur, u32 timescale, char *szDur, size_t szDur_size) { u32 h, m, s, ms; @@ -764,6 +908,7 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file) avc_track_count = 0; hevc_track_count = 0; cc_track_count = 0; + u32 vobsub_track_count = 0; for (i = 0; i < track_count; i++) { @@ -779,9 +924,11 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file) avc_track_count++; if (type == GF_ISOM_MEDIA_VISUAL && (subtype == GF_ISOM_SUBTYPE_HEV1 || subtype == GF_ISOM_SUBTYPE_HVC1)) hevc_track_count++; + if (type == GF_ISOM_MEDIA_SUBPIC && subtype == GF_ISOM_SUBTYPE_MPEG4) + vobsub_track_count++; } - mprint("MP4: found %u tracks: %u avc, %u hevc and %u cc\n", track_count, avc_track_count, hevc_track_count, cc_track_count); + mprint("MP4: found %u tracks: %u avc, %u hevc, %u cc, %u vobsub\n", track_count, avc_track_count, hevc_track_count, cc_track_count, vobsub_track_count); for (i = 0; i < track_count; i++) { @@ -899,6 +1046,24 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file) } break; + case MEDIA_TYPE(GF_ISOM_MEDIA_SUBPIC, GF_ISOM_SUBTYPE_MPEG4): // subp:MPEG (VOBSUB) + // If there are multiple VOBSUB tracks, change fd for different tracks + if (vobsub_track_count > 1) + { + switch_output_file(ctx, enc_ctx, i); + } + if (process_vobsub_track(ctx, f, i + 1, &dec_sub) != 0) + { + mprint("Error on process_vobsub_track()\n"); + free(dec_ctx->xds_ctx); + return -3; + } + if (dec_sub.got_output) + { + mp4_ret = 1; + } + break; + default: if (type != GF_ISOM_MEDIA_CLOSED_CAPTION && type != GF_ISOM_MEDIA_SUBT && type != GF_ISOM_MEDIA_TEXT) break; // ignore non cc track @@ -1038,9 +1203,14 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file) mprint("Found no HEVC track(s). "); if (cc_track_count) - mprint("Found %d CC track(s).\n", cc_track_count); + mprint("Found %d CC track(s). ", cc_track_count); + else + mprint("Found no dedicated CC track(s). "); + + if (vobsub_track_count) + mprint("Found %d VOBSUB track(s).\n", vobsub_track_count); else - mprint("Found no dedicated CC track(s).\n"); + mprint("\n"); ctx->freport.mp4_cc_track_cnt = cc_track_count; diff --git a/src/lib_ccx/vobsub_decoder.c b/src/lib_ccx/vobsub_decoder.c new file mode 100644 index 000000000..9bf79314b --- /dev/null +++ b/src/lib_ccx/vobsub_decoder.c @@ -0,0 +1,517 @@ +/** + * VOBSUB decoder with OCR support + * + * Decodes VOBSUB (DVD bitmap) subtitles from MKV, MP4, or standalone idx/sub files + * and optionally performs OCR to convert to text. + * + * SPU (SubPicture Unit) format: + * - 2 bytes: total SPU size + * - 2 bytes: offset to control sequence + * - RLE-encoded pixel data (interlaced) + * - Control sequence with timing, colors, coordinates + */ + +#include +#include +#include +#include + +#include "lib_ccx.h" +#include "vobsub_decoder.h" +#include "ccx_common_common.h" +#include "ccx_decoders_structs.h" +#include "ccx_common_constants.h" + +#ifdef ENABLE_OCR +#include "ocr.h" +#endif + +#define RGBA(r, g, b, a) (((unsigned)(a) << 24) | ((r) << 16) | ((g) << 8) | (b)) + +/* Control sequence structure */ +struct vobsub_ctrl_seq +{ + uint8_t color[4]; /* Color indices */ + uint8_t alpha[4]; /* Alpha values */ + uint16_t coord[4]; /* x1, x2, y1, y2 */ + uint16_t pixoffset[2]; /* Offset to 1st and 2nd graphic line */ + uint16_t start_time; + uint16_t stop_time; +}; + +struct vobsub_ctx +{ + uint32_t palette[16]; /* RGBA palette from idx header */ + int palette_parsed; /* 1 if palette has been parsed */ + struct vobsub_ctrl_seq ctrl; + unsigned char *bitmap; /* Decoded bitmap */ +#ifdef ENABLE_OCR + void *ocr_ctx; /* OCR context */ +#endif +}; + +/* Get 4 bits from buffer for RLE decoding */ +static int vobsub_get_bits(unsigned char *buffer, uint8_t *nextbyte, int *pos, int *m) +{ + int ret; + ret = (*nextbyte & 0xf0) >> 4; + if (*m == 0) + *pos += 1; + *nextbyte = (*nextbyte << 4) | ((*m) ? (buffer[*pos] & 0x0f) : ((buffer[*pos] & 0xf0) >> 4)); + *m = (*m + 1) % 2; + return ret; +} + +/* RLE decode to get run length and color */ +static int vobsub_rle_decode(unsigned char *buffer, int *color, uint8_t *nextbyte, int *pos, int *m) +{ + int val = 4; + uint16_t rlen = vobsub_get_bits(buffer, nextbyte, pos, m); + while (rlen < val && val <= 0x40) + { + rlen = (rlen << 4) | vobsub_get_bits(buffer, nextbyte, pos, m); + val = val << 2; + } + *color = rlen & 0x3; + rlen = rlen >> 2; + return rlen; +} + +/* Decode bitmap from RLE-encoded SPU data */ +static void vobsub_get_bitmap(struct vobsub_ctx *ctx, unsigned char *buffer, size_t buf_size) +{ + int w, h, x, lineno; + int pos, color, m; + int len; + uint8_t nextbyte; + unsigned char *buffp; + + w = (ctx->ctrl.coord[1] - ctx->ctrl.coord[0]) + 1; + h = (ctx->ctrl.coord[3] - ctx->ctrl.coord[2]) + 1; + + if (w <= 0 || h <= 0 || w > 4096 || h > 4096) + { + dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Invalid dimensions w=%d h=%d\n", w, h); + return; + } + + pos = ctx->ctrl.pixoffset[0]; + if (pos >= (int)buf_size) + { + dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Pixel offset out of bounds\n"); + return; + } + + m = 0; + nextbyte = buffer[pos]; + + ctx->bitmap = malloc(w * h); + if (!ctx->bitmap) + return; + memset(ctx->bitmap, 0, w * h); + + buffp = ctx->bitmap; + x = 0; + lineno = 0; + + /* Decode first field (odd lines in interlaced) */ + while (lineno < (h + 1) / 2 && pos < (int)buf_size) + { + len = vobsub_rle_decode(buffer, &color, &nextbyte, &pos, &m); + if (len > (w - x) || len == 0) + len = w - x; + + memset(buffp + x, color, len); + x += len; + if (x >= w) + { + x = 0; + ++lineno; + buffp += (2 * w); /* Skip 1 line due to interlacing */ + if ((m == 1)) + { + vobsub_get_bits(buffer, &nextbyte, &pos, &m); + } + } + } + + /* Decode second field (even lines) */ + if (pos > ctx->ctrl.pixoffset[1]) + { + dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Error creating bitmap - overlapping fields\n"); + return; + } + + pos = ctx->ctrl.pixoffset[1]; + if (pos >= (int)buf_size) + { + dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Second field offset out of bounds\n"); + return; + } + + buffp = ctx->bitmap + w; + x = 0; + lineno = 0; + m = 0; + nextbyte = buffer[pos]; + + while (lineno < h / 2 && pos < (int)buf_size) + { + len = vobsub_rle_decode(buffer, &color, &nextbyte, &pos, &m); + if (len > (w - x) || len == 0) + len = w - x; + + memset(buffp + x, color, len); + x += len; + if (x >= w) + { + x = 0; + ++lineno; + buffp += (2 * w); + if ((m == 1)) + { + vobsub_get_bits(buffer, &nextbyte, &pos, &m); + } + } + } +} + +/* Parse control sequence from SPU data */ +static void vobsub_decode_control(struct vobsub_ctx *ctx, unsigned char *buffer, size_t buf_size, uint16_t ctrl_offset) +{ + int pos = ctrl_offset; + int pack_end = 0; + uint16_t date, next_ctrl; + + memset(&ctx->ctrl, 0, sizeof(ctx->ctrl)); + + while (pos + 4 <= (int)buf_size && pack_end == 0) + { + date = (buffer[pos] << 8) | buffer[pos + 1]; + next_ctrl = (buffer[pos + 2] << 8) | buffer[pos + 3]; + if (next_ctrl == pos) + pack_end = 1; + pos += 4; + + int seq_end = 0; + while (seq_end == 0 && pos < (int)buf_size) + { + int command = buffer[pos++]; + switch (command) + { + case 0x01: /* Start display */ + ctx->ctrl.start_time = (date << 10) / 90; + break; + case 0x02: /* Stop display */ + ctx->ctrl.stop_time = (date << 10) / 90; + break; + case 0x03: /* SET_COLOR */ + if (pos + 2 > (int)buf_size) + break; + ctx->ctrl.color[3] = (buffer[pos] & 0xf0) >> 4; + ctx->ctrl.color[2] = buffer[pos] & 0x0f; + ctx->ctrl.color[1] = (buffer[pos + 1] & 0xf0) >> 4; + ctx->ctrl.color[0] = buffer[pos + 1] & 0x0f; + pos += 2; + break; + case 0x04: /* SET_CONTR (alpha) */ + if (pos + 2 > (int)buf_size) + break; + ctx->ctrl.alpha[3] = (buffer[pos] & 0xf0) >> 4; + ctx->ctrl.alpha[2] = buffer[pos] & 0x0f; + ctx->ctrl.alpha[1] = (buffer[pos + 1] & 0xf0) >> 4; + ctx->ctrl.alpha[0] = buffer[pos + 1] & 0x0f; + pos += 2; + break; + case 0x05: /* SET_DAREA (coordinates) */ + if (pos + 6 > (int)buf_size) + break; + ctx->ctrl.coord[0] = ((buffer[pos] << 8) | (buffer[pos + 1] & 0xf0)) >> 4; + ctx->ctrl.coord[1] = ((buffer[pos + 1] & 0x0f) << 8) | buffer[pos + 2]; + ctx->ctrl.coord[2] = ((buffer[pos + 3] << 8) | (buffer[pos + 4] & 0xf0)) >> 4; + ctx->ctrl.coord[3] = ((buffer[pos + 4] & 0x0f) << 8) | buffer[pos + 5]; + pos += 6; + break; + case 0x06: /* SET_DSPXA (pixel offset) */ + if (pos + 4 > (int)buf_size) + break; + ctx->ctrl.pixoffset[0] = (buffer[pos] << 8) | buffer[pos + 1]; + ctx->ctrl.pixoffset[1] = (buffer[pos + 2] << 8) | buffer[pos + 3]; + pos += 4; + break; + case 0x07: /* Extended command */ + if (pos + 2 > (int)buf_size) + break; + { + uint16_t skip = (buffer[pos] << 8) | buffer[pos + 1]; + pos += skip; + } + break; + case 0xff: /* End of control sequence */ + seq_end = 1; + break; + default: + dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Unknown control command 0x%02x\n", command); + break; + } + } + } +} + +/* Generate RGBA palette from color/alpha indices using parsed palette */ +static void vobsub_generate_rgba_palette(struct vobsub_ctx *ctx, uint32_t *rgba_palette) +{ + for (int i = 0; i < 4; i++) + { + if (ctx->ctrl.alpha[i] == 0) + { + rgba_palette[i] = 0; /* Fully transparent */ + } + else if (ctx->palette_parsed) + { + /* Use parsed palette from idx header */ + uint32_t color = ctx->palette[ctx->ctrl.color[i] & 0x0f]; + uint8_t r = (color >> 16) & 0xff; + uint8_t g = (color >> 8) & 0xff; + uint8_t b = color & 0xff; + uint8_t a = ctx->ctrl.alpha[i] * 17; /* Scale 0-15 to 0-255 */ + rgba_palette[i] = RGBA(r, g, b, a); + } + else + { + /* Fallback: guess palette (grayscale levels) */ + static const uint8_t level_map[4][4] = { + {0xff}, + {0x00, 0xff}, + {0x00, 0x80, 0xff}, + {0x00, 0x55, 0xaa, 0xff}, + }; + + /* Count opaque colors */ + int nb_opaque = 0; + for (int j = 0; j < 4; j++) + if (ctx->ctrl.alpha[j] != 0) + nb_opaque++; + + if (nb_opaque == 0) + nb_opaque = 1; + if (nb_opaque > 4) + nb_opaque = 4; + + int level = level_map[nb_opaque - 1][i < nb_opaque ? i : nb_opaque - 1]; + uint8_t a = ctx->ctrl.alpha[i] * 17; + rgba_palette[i] = RGBA(level, level, level, a); + } + } +} + +struct vobsub_ctx *init_vobsub_decoder(void) +{ + struct vobsub_ctx *ctx = malloc(sizeof(struct vobsub_ctx)); + if (!ctx) + return NULL; + + memset(ctx, 0, sizeof(struct vobsub_ctx)); + +#ifdef ENABLE_OCR + ctx->ocr_ctx = init_ocr(1); /* 1 = default language index (English) */ + if (!ctx->ocr_ctx) + { + mprint("VOBSUB: Warning - OCR initialization failed\n"); + /* Continue anyway - OCR will just not work */ + } +#endif + + return ctx; +} + +int vobsub_parse_palette(struct vobsub_ctx *ctx, const char *idx_header) +{ + if (!ctx || !idx_header) + return -1; + + /* Find "palette:" line */ + const char *palette_line = strstr(idx_header, "palette:"); + if (!palette_line) + { + dbg_print(CCX_DMT_VERBOSE, "VOBSUB: No palette line found in idx header\n"); + return -1; + } + + palette_line += 8; /* Skip "palette:" */ + + /* Skip whitespace */ + while (*palette_line == ' ' || *palette_line == '\t') + palette_line++; + + /* Parse 16 hex RGB colors */ + for (int i = 0; i < 16; i++) + { + unsigned int color; + if (sscanf(palette_line, "%x", &color) != 1) + { + dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Failed to parse palette color %d\n", i); + break; + } + ctx->palette[i] = color; + + /* Skip to next color (past comma and whitespace) */ + while (*palette_line && *palette_line != ',' && *palette_line != '\n') + palette_line++; + if (*palette_line == ',') + palette_line++; + while (*palette_line == ' ' || *palette_line == '\t') + palette_line++; + } + + ctx->palette_parsed = 1; + dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Parsed palette from idx header\n"); + return 0; +} + +int vobsub_decode_spu(struct vobsub_ctx *ctx, + unsigned char *spu_data, size_t spu_size, + long long start_time, long long end_time, + struct cc_subtitle *sub) +{ + if (!ctx || !spu_data || spu_size < 4 || !sub) + return -1; + + /* Parse SPU header */ + uint16_t size_spu = (spu_data[0] << 8) | spu_data[1]; + uint16_t ctrl_offset = (spu_data[2] << 8) | spu_data[3]; + + if (ctrl_offset > spu_size || size_spu > spu_size) + { + dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Invalid SPU header (size=%u, ctrl=%u, buf=%zu)\n", + size_spu, ctrl_offset, spu_size); + return -1; + } + + /* Parse control sequence */ + vobsub_decode_control(ctx, spu_data, spu_size, ctrl_offset); + + /* Free any previous bitmap */ + if (ctx->bitmap) + { + free(ctx->bitmap); + ctx->bitmap = NULL; + } + + /* Decode bitmap */ + vobsub_get_bitmap(ctx, spu_data, spu_size); + if (!ctx->bitmap) + { + dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Failed to decode bitmap\n"); + return -1; + } + + /* Build cc_subtitle structure */ + int w = (ctx->ctrl.coord[1] - ctx->ctrl.coord[0]) + 1; + int h = (ctx->ctrl.coord[3] - ctx->ctrl.coord[2]) + 1; + + if (w <= 0 || h <= 0) + { + dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Invalid bitmap dimensions\n"); + free(ctx->bitmap); + ctx->bitmap = NULL; + return -1; + } + + sub->type = CC_BITMAP; + sub->nb_data = 1; + sub->got_output = 1; + + struct cc_bitmap *rect = malloc(sizeof(struct cc_bitmap)); + if (!rect) + { + free(ctx->bitmap); + ctx->bitmap = NULL; + return -1; + } + memset(rect, 0, sizeof(struct cc_bitmap)); + + sub->data = rect; + sub->datatype = CC_DATATYPE_GENERIC; + sub->start_time = start_time; + sub->end_time = end_time > 0 ? end_time : start_time + ctx->ctrl.stop_time; + + /* Copy bitmap data */ + rect->data0 = malloc(w * h); + if (!rect->data0) + { + free(rect); + sub->data = NULL; + free(ctx->bitmap); + ctx->bitmap = NULL; + return -1; + } + memcpy(rect->data0, ctx->bitmap, w * h); + + /* Generate RGBA palette */ + rect->data1 = malloc(1024); /* Space for 256 colors */ + if (!rect->data1) + { + free(rect->data0); + free(rect); + sub->data = NULL; + free(ctx->bitmap); + ctx->bitmap = NULL; + return -1; + } + memset(rect->data1, 0, 1024); + vobsub_generate_rgba_palette(ctx, (uint32_t *)rect->data1); + + rect->nb_colors = 4; + rect->x = ctx->ctrl.coord[0]; + rect->y = ctx->ctrl.coord[2]; + rect->w = w; + rect->h = h; + rect->linesize0 = w; + +#ifdef ENABLE_OCR + /* Run OCR if available */ + if (ctx->ocr_ctx) + { + char *ocr_str = NULL; + int ret = ocr_rect(ctx->ocr_ctx, rect, &ocr_str, 0, 1); /* quantmode=1 */ + if (ret >= 0 && ocr_str) + { + rect->ocr_text = ocr_str; + } + } +#endif + + free(ctx->bitmap); + ctx->bitmap = NULL; + + return 0; +} + +int vobsub_ocr_available(void) +{ +#ifdef ENABLE_OCR + return 1; +#else + return 0; +#endif +} + +void delete_vobsub_decoder(struct vobsub_ctx **ctx) +{ + if (!ctx || !*ctx) + return; + + struct vobsub_ctx *c = *ctx; + +#ifdef ENABLE_OCR + if (c->ocr_ctx) + delete_ocr(&c->ocr_ctx); +#endif + + if (c->bitmap) + free(c->bitmap); + + free(c); + *ctx = NULL; +} diff --git a/src/lib_ccx/vobsub_decoder.h b/src/lib_ccx/vobsub_decoder.h new file mode 100644 index 000000000..e668b5dac --- /dev/null +++ b/src/lib_ccx/vobsub_decoder.h @@ -0,0 +1,53 @@ +#ifndef VOBSUB_DECODER_H +#define VOBSUB_DECODER_H + +#include "ccx_decoders_structs.h" + +/** + * VOBSUB decoder context - opaque structure + */ +struct vobsub_ctx; + +/** + * Initialize VOBSUB decoder context + * @return Pointer to context, or NULL on failure + */ +struct vobsub_ctx *init_vobsub_decoder(void); + +/** + * Parse palette from idx header string (e.g., from MKV CodecPrivate) + * Looks for "palette:" line and parses 16 hex RGB colors + * @param ctx VOBSUB decoder context + * @param idx_header The idx header string containing palette info + * @return 0 on success, -1 on failure + */ +int vobsub_parse_palette(struct vobsub_ctx *ctx, const char *idx_header); + +/** + * Decode single SPU packet and optionally perform OCR + * @param ctx VOBSUB decoder context + * @param spu_data Raw SPU data (starting with 2-byte size) + * @param spu_size Size of SPU data + * @param start_time Start time in milliseconds + * @param end_time End time in milliseconds (0 if unknown) + * @param sub Output subtitle structure + * @return 0 on success, -1 on error + */ +int vobsub_decode_spu(struct vobsub_ctx *ctx, + unsigned char *spu_data, size_t spu_size, + long long start_time, long long end_time, + struct cc_subtitle *sub); + +/** + * Check if VOBSUB OCR is available (compiled with OCR support) + * @return 1 if OCR available, 0 otherwise + */ +int vobsub_ocr_available(void); + +/** + * Free VOBSUB decoder context and resources + * @param ctx Pointer to context pointer (will be set to NULL) + */ +void delete_vobsub_decoder(struct vobsub_ctx **ctx); + +#endif /* VOBSUB_DECODER_H */ diff --git a/windows/ccextractor.vcxproj b/windows/ccextractor.vcxproj index 450adcffb..58d88f969 100644 --- a/windows/ccextractor.vcxproj +++ b/windows/ccextractor.vcxproj @@ -38,6 +38,7 @@ + @@ -130,6 +131,7 @@ + diff --git a/windows/ccextractor.vcxproj.filters b/windows/ccextractor.vcxproj.filters index f189d67f0..5a31459a2 100644 --- a/windows/ccextractor.vcxproj.filters +++ b/windows/ccextractor.vcxproj.filters @@ -153,6 +153,9 @@ Header Files + + Header Files + Header Files @@ -455,6 +458,9 @@ Source Files + + Source Files + Source Files