From 43a30c9dce1acaa5a5a8050bf67615d0c964698e Mon Sep 17 00:00:00 2001 From: BurntRanch Date: Fri, 13 Mar 2026 21:40:32 +0300 Subject: [PATCH] ocr: offload more guesswork to tesseract this makes it cover more cases way better since tesseract devs are probably smarter --- src/text_extraction.cpp | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/src/text_extraction.cpp b/src/text_extraction.cpp index 8b3e935..a96bad5 100644 --- a/src/text_extraction.cpp +++ b/src/text_extraction.cpp @@ -1,5 +1,6 @@ #include "text_extraction.hpp" +#include #include #include @@ -13,6 +14,8 @@ #include "screen_capture.hpp" #include "util.hpp" +using namespace spdlog; + // OCR static tesseract::PageSegMode choose_psm(int w, int h) { @@ -22,32 +25,18 @@ static tesseract::PageSegMode choose_psm(int w, int h) const size_t area = static_cast(w) * h; const float aspect = (h > 0) ? static_cast(w) / static_cast(h) : 1.0f; + debug("Choosing PSM, area: {}*{}={}, aspect: {}", w, h, area, aspect); + // Single character: tiny and roughly square if (area < 2'500 && aspect > 0.3f && aspect < 3.0f) return tesseract::PSM_SINGLE_CHAR; // Single word: small area, not excessively wide - if (area < 15'000 && aspect < 5.0f) + if (area < 15'000 && aspect > 0.3f && aspect < 5.0f) return tesseract::PSM_SINGLE_WORD; - // Single line: wide-and-short, or very small height regardless of width. - // Guard with absolute height: a wide multi-line block also has high aspect, - // so aspect alone is not enough. A single text line is never taller than ~80px - // at normal DPI; after the 2x upscale in preprocess_pix that becomes ~160px. - if (aspect > 4.0f && h < w / 4 && h < 160) - return tesseract::PSM_SINGLE_LINE; - - // Vertical text: tall and narrow (e.g. rotated sidebar labels) - if (aspect < 0.25f) - return tesseract::PSM_SINGLE_BLOCK_VERT_TEXT; - - // Large/sparse region (full screen, desktop, busy UI panel) - // Sparse text avoids Tesseract choking on whitespace-heavy layouts - if (area > 500'000) - return tesseract::PSM_SPARSE_TEXT; - - // Mid-size blocks: paragraphs, dialog boxes, panels - return tesseract::PSM_SINGLE_BLOCK; + // AUTO_OSD is good enough to take care of the rest. + return tesseract::PSM_AUTO_OSD; } static PIX* preprocess_pix(PIX* src)