diff --git a/.github/workflows/build_linux.yml b/.github/workflows/build_linux.yml index 311fe3719..da8819fca 100644 --- a/.github/workflows/build_linux.yml +++ b/.github/workflows/build_linux.yml @@ -27,6 +27,10 @@ jobs: steps: - name: Install dependencies run: sudo apt update && sudo apt-get install libgpac-dev libtesseract-dev libavcodec-dev libavdevice-dev libx11-dev libxcb1-dev libxcb-shm0-dev + - name: Install Rust + run: | + curl --proto "=https" --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + - uses: actions/checkout@v6 - name: build run: ./build -hardsubx @@ -47,6 +51,10 @@ jobs: steps: - name: Install dependencies run: sudo apt update && sudo apt-get install libgpac-dev + - name: Install Rust + run: | + curl --proto "=https" --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + - uses: actions/checkout@v6 - name: run autogen run: ./autogen.sh @@ -65,6 +73,10 @@ jobs: steps: - name: Install dependencies run: sudo apt update && sudo apt-get install libgpac-dev + - name: Install Rust + run: | + curl --proto "=https" --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + - uses: actions/checkout@v6 - name: cmake run: mkdir build && cd build && cmake ../src @@ -79,8 +91,13 @@ jobs: - uses: actions/checkout@v6 - name: Install dependencies run: sudo apt update && sudo apt install libgpac-dev libtesseract-dev libavformat-dev libavdevice-dev libswscale-dev yasm + - name: Install Rust + run: | + curl --proto "=https" --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + - name: cmake run: | + mkdir build && cd build cmake -DWITH_OCR=ON -DWITH_HARDSUBX=ON ../src - name: build @@ -94,6 +111,10 @@ jobs: steps: - name: Install dependencies run: sudo apt update && sudo apt-get install libgpac-dev + - name: Install Rust + run: | + curl --proto "=https" --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + - uses: actions/checkout@v6 - name: cache uses: actions/cache@v5 diff --git a/.github/workflows/build_mac.yml b/.github/workflows/build_mac.yml index 8e01ec1cd..74bafe45e 100644 --- a/.github/workflows/build_mac.yml +++ b/.github/workflows/build_mac.yml @@ -64,6 +64,10 @@ jobs: runs-on: macos-latest steps: - uses: actions/checkout@v6 + - name: Install Rust + run: | + curl --proto "=https" --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + echo "$HOME/.cargo/bin" >> $GITHUB_PATH - name: dependencies run: brew install gpac - uses: actions/checkout@v6 @@ -78,6 +82,10 @@ jobs: runs-on: macos-latest steps: - uses: actions/checkout@v6 + - name: Install Rust + run: | + curl --proto "=https" --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + echo "$HOME/.cargo/bin" >> $GITHUB_PATH - name: Install dependencies run: brew install pkg-config autoconf automake libtool tesseract leptonica gpac ffmpeg - name: cmake diff --git a/.gitignore b/.gitignore index bdb5246d4..c8df9be92 100644 --- a/.gitignore +++ b/.gitignore @@ -156,6 +156,21 @@ windows/*/CACHEDIR.TAG windows/.rustc_info.json linux/configure~ +# local junk +build-system/ +*.backup +*.save +*.patch +*.a +README.srt +no_subs.ts + +fix_hardsubx.cmake +fix_libraries.cmake +mac/configure~ +*.current + + # Plans and temporary files plans/ tess.log diff --git a/mac/build.command b/mac/build.command index e0aab469a..f69d9daf1 100755 --- a/mac/build.command +++ b/mac/build.command @@ -106,7 +106,7 @@ if [[ "$ENABLE_OCR" == "true" ]]; then BLD_INCLUDE="$BLD_INCLUDE `pkg-config --cflags --silence-errors tesseract`" fi -SRC_CCX="$(find ../src/lib_ccx -name '*.c')" +SRC_CCX="$(find ../src/lib_ccx -name '*.c' | grep -v ccx_encoders_smptett.c)" SRC_LIB_HASH="$(find ../src/thirdparty/lib_hash -name '*.c')" SRC_LIBPNG="$(find ../src/thirdparty/libpng -name '*.c')" SRC_UTF8="../src/thirdparty/utf8proc/utf8proc.c" diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1896b3492..fa9b91225 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -246,11 +246,25 @@ add_executable (ccextractor ${SOURCEFILE} ${FREETYPE_SOURCE} ${UTF8PROC_SOURCE}) # Build with Rust library ######################################################## -if (PKG_CONFIG_FOUND) - add_subdirectory (rust) - set (EXTRA_LIBS ${EXTRA_LIBS} ccx_rust) -endif (PKG_CONFIG_FOUND) +# Rust is REQUIRED for DTVCC and SMPTE-TT functionality +find_program(CARGO cargo PATHS $ENV{HOME}/.cargo/bin REQUIRED) +message(STATUS "Found cargo: ${CARGO}") + +# Build Rust library using Corrosion (in rust/CMakeLists.txt) +add_subdirectory(rust) + +# Platform-specific system libraries needed for Rust +if(APPLE) + set(RUST_EXTRA_LIBS pthread dl m "-framework Security" "-framework CoreFoundation") +elseif(WIN32) + set(RUST_EXTRA_LIBS ws2_32 userenv bcrypt ntdll) +else() + set(RUST_EXTRA_LIBS pthread dl m) +endif() +set(EXTRA_LIBS ${EXTRA_LIBS} ccx_rust ${RUST_EXTRA_LIBS}) +set(RUST_AVAILABLE TRUE) +message(STATUS "Rust available, using Rust implementations") target_link_libraries (ccextractor ${EXTRA_LIBS}) target_include_directories (ccextractor PUBLIC ${EXTRA_INCLUDES}) diff --git a/src/lib_ccx/CMakeLists.txt b/src/lib_ccx/CMakeLists.txt index befb4f18b..c643a1ab9 100644 --- a/src/lib_ccx/CMakeLists.txt +++ b/src/lib_ccx/CMakeLists.txt @@ -59,6 +59,9 @@ endif (WITH_OCR) aux_source_directory ("${PROJECT_SOURCE_DIR}/lib_ccx/" SOURCEFILE) +# Rust is available, using Rust SMPTE-TT implementation +list(REMOVE_ITEM SOURCEFILE "${PROJECT_SOURCE_DIR}/lib_ccx/ccx_encoders_smptett.c") + add_library (ccx ${SOURCEFILE} ccx_dtvcc.h ccx_dtvcc.c ccx_encoders_mcc.c ccx_encoders_mcc.h) target_link_libraries (ccx ${EXTRA_LIBS}) target_include_directories (ccx PUBLIC ${EXTRA_INCLUDES}) diff --git a/src/lib_ccx/ccx_encoders_smptett.c.bak b/src/lib_ccx/ccx_encoders_smptett.c.bak new file mode 100644 index 000000000..7c5f90ddf --- /dev/null +++ b/src/lib_ccx/ccx_encoders_smptett.c.bak @@ -0,0 +1,411 @@ +/* + Produces minimally-compliant SMPTE Timed Text (W3C TTML) + format-compatible output + + See http://www.w3.org/TR/ttaf1-dfxp/ and + https://www.smpte.org/sites/default/files/st2052-1-2010.pdf + + Copyright (C) 2012 John Kemp + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*/ +#include "lib_ccx.h" +#include "ccx_common_option.h" +#include "ccx_encoders_common.h" +#include +#include "ocr.h" +#include "utility.h" +#include "ccx_encoders_helpers.h" + +void write_stringz_as_smptett(char *string, struct encoder_ctx *context, LLONG ms_start, LLONG ms_end) +{ + int used; + unsigned h1, m1, s1, ms1; + unsigned h2, m2, s2, ms2; + int len = strlen(string); + unsigned char *unescaped = (unsigned char *)malloc(len + 1); + unsigned char *el = (unsigned char *)malloc(len * 3 + 1); // Be generous + int pos_r = 0; + int pos_w = 0; + char str[1024]; + + if (el == NULL || unescaped == NULL) + fatal(EXIT_NOT_ENOUGH_MEMORY, "In write_stringz_as_smptett() - not enough memory.\n"); + + millis_to_time(ms_start, &h1, &m1, &s1, &ms1); + millis_to_time(ms_end - 1, &h2, &m2, &s2, &ms2); + + sprintf((char *)str, "

\r\n", h1, m1, s1, ms1, h2, m2, s2, ms2); + if (context->encoding != CCX_ENC_UNICODE) + { + dbg_print(CCX_DMT_DECODER_608, "\r%s\n", str); + } + used = encode_line(context, context->buffer, (unsigned char *)str); + write_wrapped(context->out->fh, context->buffer, used); + // Scan for \n in the string and replace it with a 0 + while (pos_r < len) + { + if (string[pos_r] == '\\' && string[pos_r + 1] == 'n') + { + unescaped[pos_w] = 0; + pos_r += 2; + } + else + { + unescaped[pos_w] = string[pos_r]; + pos_r++; + } + pos_w++; + } + unescaped[pos_w] = 0; + // Now read the unescaped string (now several string'z and write them) + unsigned char *begin = unescaped; + while (begin < unescaped + len) + { + unsigned int u = encode_line(context, el, begin); + if (context->encoding != CCX_ENC_UNICODE) + { + dbg_print(CCX_DMT_DECODER_608, "\r"); + dbg_print(CCX_DMT_DECODER_608, "%s\n", context->subline); + } + write_wrapped(context->out->fh, el, u); + // write (wb->fh, encoded_br, encoded_br_length); + + write_wrapped(context->out->fh, context->encoded_crlf, context->encoded_crlf_length); + begin += strlen((const char *)begin) + 1; + } + + sprintf((char *)str, "

\n"); + if (context->encoding != CCX_ENC_UNICODE) + { + dbg_print(CCX_DMT_DECODER_608, "\r%s\n", str); + } + used = encode_line(context, context->buffer, (unsigned char *)str); + write_wrapped(context->out->fh, context->buffer, used); + + free(el); + free(unescaped); +} + +int write_cc_bitmap_as_smptett(struct cc_subtitle *sub, struct encoder_ctx *context) +{ + int ret = 0; +#ifdef ENABLE_OCR + struct cc_bitmap *rect; + // char timeline[128]; + int i, len = 0; + + if (sub->nb_data == 0) + return 0; + + rect = sub->data; + + if (sub->flags & SUB_EOD_MARKER) + context->prev_start = sub->start_time; + + for (i = sub->nb_data - 1; i >= 0; i--) + { + if (rect[i].ocr_text && *(rect[i].ocr_text)) + { + if (context->prev_start != -1 || !(sub->flags & SUB_EOD_MARKER)) + { + char *buf = (char *)context->buffer; + unsigned h1, m1, s1, ms1; + unsigned h2, m2, s2, ms2; + millis_to_time(sub->start_time, &h1, &m1, &s1, &ms1); + millis_to_time(sub->end_time - 1, &h2, &m2, &s2, &ms2); // -1 To prevent overlapping with next line. + sprintf((char *)context->buffer, "

\n", h1, m1, s1, ms1, h2, m2, s2, ms2); + write_wrapped(context->out->fh, buf, strlen(buf)); + len = strlen(rect[i].ocr_text); + write_wrapped(context->out->fh, rect[i].ocr_text, len); + write_wrapped(context->out->fh, context->encoded_crlf, context->encoded_crlf_length); + sprintf(buf, "

\n"); + write_wrapped(context->out->fh, buf, strlen(buf)); + } + } + } + for (i = 0, rect = sub->data; i < sub->nb_data; i++, rect++) + { + freep(&rect->data0); + freep(&rect->data1); + } +#endif + + sub->nb_data = 0; + freep(&sub->data); + return ret; +} + +int write_cc_subtitle_as_smptett(struct cc_subtitle *sub, struct encoder_ctx *context) +{ + int ret = 0; + struct cc_subtitle *osub = sub; + struct cc_subtitle *lsub = sub; + while (sub) + { + if (sub->type == CC_TEXT) + { + write_stringz_as_smptett(sub->data, context, sub->start_time, sub->end_time); + freep(&sub->data); + sub->nb_data = 0; + } + lsub = sub; + sub = sub->next; + } + while (lsub != osub) + { + sub = lsub->prev; + freep(&lsub); + lsub = sub; + } + + return ret; +} + +int write_cc_buffer_as_smptett(struct eia608_screen *data, struct encoder_ctx *context) +{ + int used; + unsigned h1, m1, s1, ms1; + unsigned h2, m2, s2, ms2; + int wrote_something = 0; + char str[1024]; + + millis_to_time(data->start_time, &h1, &m1, &s1, &ms1); + millis_to_time(data->end_time - 1, &h2, &m2, &s2, &ms2); + + for (int row = 0; row < 15; row++) + { + if (data->row_used[row]) + { + float row1 = 0; + float col1 = 0; + int firstcol = -1; + + // ROWS is actually 90% of the screen size + // Add +10% because row 0 is at position 10% + row1 = ((100 * row) / (ROWS / 0.8)) + 10; + + for (int column = 0; column < COLUMNS; column++) + { + int unicode = 0; + get_char_in_unicode((unsigned char *)&unicode, data->characters[row][column]); + // if (COL_TRANSPARENT != data->colors[row][column]) + if (unicode != 0x20) + { + if (firstcol < 0) + { + firstcol = column; + } + } + } + // COLUMNS is actually 90% of the screen size + // Add +10% because column 0 is at position 10% + col1 = ((100 * firstcol) / (COLUMNS / 0.8)) + 10; + + if (firstcol >= 0) + { + wrote_something = 1; + + sprintf(str, "

\n ", h1, m1, s1, ms1, h2, m2, s2, ms2, col1, row1); + if (context->encoding != CCX_ENC_UNICODE) + { + dbg_print(CCX_DMT_DECODER_608, "\r%s\n", str); + } + used = encode_line(context, context->buffer, (unsigned char *)str); + write_wrapped(context->out->fh, context->buffer, used); + // Trimming subs because the position is defined by "tts:origin" + int old_trim_subs = context->trim_subs; + context->trim_subs = 1; + if (context->encoding != CCX_ENC_UNICODE) + { + dbg_print(CCX_DMT_DECODER_608, "\r"); + dbg_print(CCX_DMT_DECODER_608, "%s\n", context->subline); + } + + get_decoder_line_encoded(context, context->subline, row, data); + + char *final = malloc(strlen((const char *)(context->subline)) + 1000); // Being overly generous? :P + char *temp = malloc(strlen((const char *)(context->subline)) + 1000); + *final = 0; + *temp = 0; + /* + final : stores formatted HTML sentence. This will be written in subtitle file. + temp : stored temporary sentences required while formatting + + +1000 because huge and