diff --git a/src/unstract/llmwhisperer/client_v2.py b/src/unstract/llmwhisperer/client_v2.py index 00afc50..4f47005 100644 --- a/src/unstract/llmwhisperer/client_v2.py +++ b/src/unstract/llmwhisperer/client_v2.py @@ -385,6 +385,7 @@ def whisper( line_spitter_strategy: str = "left-priority", add_line_nos: bool = False, include_line_confidence: bool = False, + word_confidence_threshold: float = 0.3, lang: str = "eng", tag: str = "default", filename: str = "", @@ -401,8 +402,8 @@ def whisper( file_path (str, optional): The path to the file to be processed. Defaults to "". stream (IO[bytes], optional): A stream of bytes to be processed. Defaults to None. url (str, optional): The URL of the file to be processed. Defaults to "". - mode (str, optional): The processing mode. Can be "high_quality", "form", "low_cost" or "native_text". - Defaults to "high_quality". + mode (str, optional): The processing mode. Can be "high_quality", "form", "low_cost", "native_text" + or "table". Defaults to "high_quality". output_mode (str, optional): The output mode. Can be "layout_preserving" or "text". Defaults to "layout_preserving". page_seperator (str, optional): The page separator. Defaults to "<<<". @@ -418,6 +419,11 @@ def whisper( which can be queried later using the highlights API. include_line_confidence (bool, optional): Adds line confidence to the line metadata returned by the highlights API. Requires add_line_nos to be enabled. Defaults to False. + word_confidence_threshold (float, optional): The minimum OCR confidence score a word must have to be + included in the extracted text. Accepts a value in the range [0.0, 1.0], where higher values are + stricter. Any word whose confidence value falls below the configured threshold is ignored and + excluded from the final output. This parameter works only with "form", "high_quality" and "table" + modes. Defaults to 0.3. lang (str, optional): The language of the document. Defaults to "eng". tag (str, optional): The tag for the document. Defaults to "default". filename (str, optional): The name of the file to store in reports. Defaults to "". @@ -454,6 +460,7 @@ def whisper( "line_spitter_strategy": line_spitter_strategy, "add_line_nos": add_line_nos, "include_line_confidence": include_line_confidence, + "word_confidence_threshold": word_confidence_threshold, "lang": lang, "tag": tag, "filename": filename, diff --git a/tests/integration/client_v2_test.py b/tests/integration/client_v2_test.py index dd60297..7ec0a5a 100644 --- a/tests/integration/client_v2_test.py +++ b/tests/integration/client_v2_test.py @@ -206,6 +206,13 @@ def test_webhook(client_v2: LLMWhispererClientV2, url: str, token: str, webhook_ Returns: None """ + # Clean up any webhook left over from a previous (possibly failed) run so + # registration starts from a clean slate. + try: + client_v2.delete_webhook(webhook_name) + except LLMWhispererClientException: + pass + result = client_v2.register_webhook(url, token, webhook_name) assert isinstance(result, dict) assert result["message"] == "Webhook created successfully" diff --git a/tests/unit/client_v2_test.py b/tests/unit/client_v2_test.py index 8b1535f..eb5921f 100644 --- a/tests/unit/client_v2_test.py +++ b/tests/unit/client_v2_test.py @@ -1,5 +1,6 @@ import time from unittest.mock import MagicMock +from urllib.parse import parse_qs, urlparse import pytest import requests @@ -151,6 +152,34 @@ def test_whisper_invalid_json_response_202(mocker: MockerFixture, client_v2: LLM assert response["extraction"] == {} +def test_whisper_default_word_confidence_threshold(mocker: MockerFixture, client_v2: LLMWhispererClientV2) -> None: + """whisper() sends the default word_confidence_threshold when not specified.""" + mock_send = mocker.patch("requests.Session.send") + mock_send.return_value = _mock_response(200, '{"status_code": 200, "extraction": {"text": "ok"}}') + + client_v2.whisper(url="https://example.com/test.pdf", wait_for_completion=False) + + prepared_request = mock_send.call_args[0][0] + query = parse_qs(urlparse(prepared_request.url).query) + assert query["word_confidence_threshold"] == ["0.3"] + + +def test_whisper_custom_word_confidence_threshold(mocker: MockerFixture, client_v2: LLMWhispererClientV2) -> None: + """whisper() forwards a custom word_confidence_threshold as a request param.""" + mock_send = mocker.patch("requests.Session.send") + mock_send.return_value = _mock_response(200, '{"status_code": 200, "extraction": {"text": "ok"}}') + + client_v2.whisper( + url="https://example.com/test.pdf", + word_confidence_threshold=0.75, + wait_for_completion=False, + ) + + prepared_request = mock_send.call_args[0][0] + query = parse_qs(urlparse(prepared_request.url).query) + assert query["word_confidence_threshold"] == ["0.75"] + + # --- Retry behavior tests ---