From b76b20e6f898335f3ead6edeee44c3bcf893ad65 Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Tue, 27 Jan 2026 13:03:06 +0000 Subject: [PATCH 01/22] Started on image upload support --- lf_toolkit/evaluation/image_upload.py | 160 +++++++++ poetry.lock | 140 +++++++- pyproject.toml | 3 + tests/evaluation/image_upload_test.py | 482 ++++++++++++++++++++++++++ 4 files changed, 784 insertions(+), 1 deletion(-) create mode 100644 lf_toolkit/evaluation/image_upload.py create mode 100644 tests/evaluation/image_upload_test.py diff --git a/lf_toolkit/evaluation/image_upload.py b/lf_toolkit/evaluation/image_upload.py new file mode 100644 index 0000000..70abd17 --- /dev/null +++ b/lf_toolkit/evaluation/image_upload.py @@ -0,0 +1,160 @@ +import requests +import uuid +import os +from io import BytesIO +from typing import Dict, List, Optional +from PIL import Image +from dotenv import load_dotenv + +load_dotenv() + +MIME_TO_FORMAT: Dict[str, List[str]] = { + 'image/jpeg': ['JPEG', 'JPG'], + 'image/png': ['PNG'], + 'image/gif': ['GIF'], + 'image/bmp': ['BMP'], + 'image/webp': ['WEBP'], + 'image/tiff': ['TIFF', 'TIF'], + 'image/x-icon': ['ICO'], +} + +FORMAT_TO_EXTENSION: Dict[str, List[str]] = { + 'JPEG': ['.jpg', '.jpeg', '.jpe'], + 'PNG': ['.png'], + 'GIF': ['.gif'], + 'BMP': ['.bmp'], + 'WEBP': ['.webp'], + 'TIFF': ['.tiff', '.tif'], + 'ICO': ['.ico'], +} + + +class ImageUploadError(Exception): + """Custom exception for image upload failures""" + pass + + +class InvalidMimeTypeError(ImageUploadError): + """Exception for invalid MIME type""" + pass + + +class MissingEnvironmentVariableError(ImageUploadError): + """Exception for missing environment variables""" + pass + + +def generate_file_name(img: Image.Image) -> str: + """Generate filename for the image + + Args: + img: PIL Image object + + Returns: + Generated filename string + """ + unique_id: str = str(uuid.uuid4()) + format_ext: str = img.format.lower() if img.format else 'png' + return f"{unique_id}.{format_ext}" + + +def validate_mime_type(mime_type: str, img: Image.Image, filename: str) -> bool: + """Validate MIME type against image format and filename + + Args: + mime_type: MIME type string to validate + img: PIL Image object + filename: Name of the file + + Returns: + True if validation passes + + Raises: + InvalidMimeTypeError: If MIME type is invalid or doesn't match image + """ + if mime_type not in MIME_TO_FORMAT: + raise InvalidMimeTypeError( + f"Invalid MIME type '{mime_type}'. " + f"Supported types: {', '.join(MIME_TO_FORMAT.keys())}" + ) + + img_format: Optional[str] = img.format.upper() if img.format else None + + if img_format: + allowed_formats: List[str] = MIME_TO_FORMAT[mime_type] + if img_format not in allowed_formats: + raise InvalidMimeTypeError( + f"MIME type '{mime_type}' does not match image format '{img_format}'. " + f"Expected formats for {mime_type}: {', '.join(allowed_formats)}" + ) + + file_ext: str = filename[filename.rfind('.'):].lower() + + if img_format and img_format in FORMAT_TO_EXTENSION: + valid_extensions: List[str] = FORMAT_TO_EXTENSION[img_format] + if file_ext not in valid_extensions: + raise InvalidMimeTypeError( + f"File extension '{file_ext}' does not match format '{img_format}'. " + f"Expected extensions: {', '.join(valid_extensions)}" + ) + + return True + + +def get_s3_bucket_uri() -> str: + """Get S3 bucket URI from environment variable""" + s3_uri: Optional[str] = os.getenv('S3_BUCKET_URI') + + if not s3_uri: + raise MissingEnvironmentVariableError( + "S3_BUCKET_URI environment variable is not set" + ) + + return s3_uri + + +def upload_image(img: Image.Image, mime_type: str) -> Dict: + """Upload PIL image with comprehensive MIME type validation + + Args: + img: PIL Image object to upload + mime_type: MIME type for the upload + + Returns: + JSON response from the server as a dictionary + + Raises: + InvalidMimeTypeError: If MIME type validation fails + MissingEnvironmentVariableError: If S3_BUCKET_URI is not set + ImageUploadError: If upload fails for any reason + """ + try: + # Get URL from environment variable + url: str = get_s3_bucket_uri() + + filename: str = generate_file_name(img) + + validate_mime_type(mime_type, img, filename) + + buffer: BytesIO = BytesIO() + img_format: str = img.format if img.format else 'PNG' + img.save(buffer, format=img_format) + buffer.seek(0) + + files: Dict[str, tuple] = {'file': (filename, buffer, mime_type)} + response: requests.Response = requests.post(url, files=files, timeout=30) + + if response.status_code != 200: + raise ImageUploadError( + f"Upload failed with status code {response.status_code}: {response.text}" + ) + + return response.json()['url'] + + except (InvalidMimeTypeError, MissingEnvironmentVariableError): + raise + except requests.exceptions.RequestException as e: + raise ImageUploadError(f"Network error: {str(e)}") + except Exception as e: + raise ImageUploadError(f"Unexpected error: {str(e)}") + diff --git a/poetry.lock b/poetry.lock index 84e4f85..fa953b9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -646,6 +646,20 @@ files = [ {file = "distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d"}, ] +[[package]] +name = "dotenv" +version = "0.9.9" +description = "Deprecated package" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "dotenv-0.9.9-py2.py3-none-any.whl", hash = "sha256:29cf74a087b31dafdb5a446b6d7e11cbce8ed2741540e2339c69fbef92c94ce9"}, +] + +[package.dependencies] +python-dotenv = "*" + [[package]] name = "dulwich" version = "0.24.1" @@ -1328,6 +1342,115 @@ all = ["pbs-installer[download,install]"] download = ["httpx (>=0.27.0,<1)"] install = ["zstandard (>=0.21.0)"] +[[package]] +name = "pillow" +version = "12.1.0" +description = "Python Imaging Library (fork)" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "pillow-12.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:fb125d860738a09d363a88daa0f59c4533529a90e564785e20fe875b200b6dbd"}, + {file = "pillow-12.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cad302dc10fac357d3467a74a9561c90609768a6f73a1923b0fd851b6486f8b0"}, + {file = "pillow-12.1.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a40905599d8079e09f25027423aed94f2823adaf2868940de991e53a449e14a8"}, + {file = "pillow-12.1.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:92a7fe4225365c5e3a8e598982269c6d6698d3e783b3b1ae979e7819f9cd55c1"}, + {file = "pillow-12.1.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f10c98f49227ed8383d28174ee95155a675c4ed7f85e2e573b04414f7e371bda"}, + {file = "pillow-12.1.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8637e29d13f478bc4f153d8daa9ffb16455f0a6cb287da1b432fdad2bfbd66c7"}, + {file = "pillow-12.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:21e686a21078b0f9cb8c8a961d99e6a4ddb88e0fc5ea6e130172ddddc2e5221a"}, + {file = "pillow-12.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2415373395a831f53933c23ce051021e79c8cd7979822d8cc478547a3f4da8ef"}, + {file = "pillow-12.1.0-cp310-cp310-win32.whl", hash = "sha256:e75d3dba8fc1ddfec0cd752108f93b83b4f8d6ab40e524a95d35f016b9683b09"}, + {file = "pillow-12.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:64efdf00c09e31efd754448a383ea241f55a994fd079866b92d2bbff598aad91"}, + {file = "pillow-12.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:f188028b5af6b8fb2e9a76ac0f841a575bd1bd396e46ef0840d9b88a48fdbcea"}, + {file = "pillow-12.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:a83e0850cb8f5ac975291ebfc4170ba481f41a28065277f7f735c202cd8e0af3"}, + {file = "pillow-12.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b6e53e82ec2db0717eabb276aa56cf4e500c9a7cec2c2e189b55c24f65a3e8c0"}, + {file = "pillow-12.1.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:40a8e3b9e8773876d6e30daed22f016509e3987bab61b3b7fe309d7019a87451"}, + {file = "pillow-12.1.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:800429ac32c9b72909c671aaf17ecd13110f823ddb7db4dfef412a5587c2c24e"}, + {file = "pillow-12.1.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b022eaaf709541b391ee069f0022ee5b36c709df71986e3f7be312e46f42c84"}, + {file = "pillow-12.1.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f345e7bc9d7f368887c712aa5054558bad44d2a301ddf9248599f4161abc7c0"}, + {file = "pillow-12.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d70347c8a5b7ccd803ec0c85c8709f036e6348f1e6a5bf048ecd9c64d3550b8b"}, + {file = "pillow-12.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1fcc52d86ce7a34fd17cb04e87cfdb164648a3662a6f20565910a99653d66c18"}, + {file = "pillow-12.1.0-cp311-cp311-win32.whl", hash = "sha256:3ffaa2f0659e2f740473bcf03c702c39a8d4b2b7ffc629052028764324842c64"}, + {file = "pillow-12.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:806f3987ffe10e867bab0ddad45df1148a2b98221798457fa097ad85d6e8bc75"}, + {file = "pillow-12.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:9f5fefaca968e700ad1a4a9de98bf0869a94e397fe3524c4c9450c1445252304"}, + {file = "pillow-12.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a332ac4ccb84b6dde65dbace8431f3af08874bf9770719d32a635c4ef411b18b"}, + {file = "pillow-12.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:907bfa8a9cb790748a9aa4513e37c88c59660da3bcfffbd24a7d9e6abf224551"}, + {file = "pillow-12.1.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:efdc140e7b63b8f739d09a99033aa430accce485ff78e6d311973a67b6bf3208"}, + {file = "pillow-12.1.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bef9768cab184e7ae6e559c032e95ba8d07b3023c289f79a2bd36e8bf85605a5"}, + {file = "pillow-12.1.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:742aea052cf5ab5034a53c3846165bc3ce88d7c38e954120db0ab867ca242661"}, + {file = "pillow-12.1.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a6dfc2af5b082b635af6e08e0d1f9f1c4e04d17d4e2ca0ef96131e85eda6eb17"}, + {file = "pillow-12.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:609e89d9f90b581c8d16358c9087df76024cf058fa693dd3e1e1620823f39670"}, + {file = "pillow-12.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:43b4899cfd091a9693a1278c4982f3e50f7fb7cff5153b05174b4afc9593b616"}, + {file = "pillow-12.1.0-cp312-cp312-win32.whl", hash = "sha256:aa0c9cc0b82b14766a99fbe6084409972266e82f459821cd26997a488a7261a7"}, + {file = "pillow-12.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:d70534cea9e7966169ad29a903b99fc507e932069a881d0965a1a84bb57f6c6d"}, + {file = "pillow-12.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:65b80c1ee7e14a87d6a068dd3b0aea268ffcabfe0498d38661b00c5b4b22e74c"}, + {file = "pillow-12.1.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:7b5dd7cbae20285cdb597b10eb5a2c13aa9de6cde9bb64a3c1317427b1db1ae1"}, + {file = "pillow-12.1.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:29a4cef9cb672363926f0470afc516dbf7305a14d8c54f7abbb5c199cd8f8179"}, + {file = "pillow-12.1.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:681088909d7e8fa9e31b9799aaa59ba5234c58e5e4f1951b4c4d1082a2e980e0"}, + {file = "pillow-12.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:983976c2ab753166dc66d36af6e8ec15bb511e4a25856e2227e5f7e00a160587"}, + {file = "pillow-12.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:db44d5c160a90df2d24a24760bbd37607d53da0b34fb546c4c232af7192298ac"}, + {file = "pillow-12.1.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6b7a9d1db5dad90e2991645874f708e87d9a3c370c243c2d7684d28f7e133e6b"}, + {file = "pillow-12.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6258f3260986990ba2fa8a874f8b6e808cf5abb51a94015ca3dc3c68aa4f30ea"}, + {file = "pillow-12.1.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e115c15e3bc727b1ca3e641a909f77f8ca72a64fff150f666fcc85e57701c26c"}, + {file = "pillow-12.1.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6741e6f3074a35e47c77b23a4e4f2d90db3ed905cb1c5e6e0d49bff2045632bc"}, + {file = "pillow-12.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:935b9d1aed48fcfb3f838caac506f38e29621b44ccc4f8a64d575cb1b2a88644"}, + {file = "pillow-12.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5fee4c04aad8932da9f8f710af2c1a15a83582cfb884152a9caa79d4efcdbf9c"}, + {file = "pillow-12.1.0-cp313-cp313-win32.whl", hash = "sha256:a786bf667724d84aa29b5db1c61b7bfdde380202aaca12c3461afd6b71743171"}, + {file = "pillow-12.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:461f9dfdafa394c59cd6d818bdfdbab4028b83b02caadaff0ffd433faf4c9a7a"}, + {file = "pillow-12.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:9212d6b86917a2300669511ed094a9406888362e085f2431a7da985a6b124f45"}, + {file = "pillow-12.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:00162e9ca6d22b7c3ee8e61faa3c3253cd19b6a37f126cad04f2f88b306f557d"}, + {file = "pillow-12.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7d6daa89a00b58c37cb1747ec9fb7ac3bc5ffd5949f5888657dfddde6d1312e0"}, + {file = "pillow-12.1.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e2479c7f02f9d505682dc47df8c0ea1fc5e264c4d1629a5d63fe3e2334b89554"}, + {file = "pillow-12.1.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f188d580bd870cda1e15183790d1cc2fa78f666e76077d103edf048eed9c356e"}, + {file = "pillow-12.1.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0fde7ec5538ab5095cc02df38ee99b0443ff0e1c847a045554cf5f9af1f4aa82"}, + {file = "pillow-12.1.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ed07dca4a8464bada6139ab38f5382f83e5f111698caf3191cb8dbf27d908b4"}, + {file = "pillow-12.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f45bd71d1fa5e5749587613037b172e0b3b23159d1c00ef2fc920da6f470e6f0"}, + {file = "pillow-12.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:277518bf4fe74aa91489e1b20577473b19ee70fb97c374aa50830b279f25841b"}, + {file = "pillow-12.1.0-cp313-cp313t-win32.whl", hash = "sha256:7315f9137087c4e0ee73a761b163fc9aa3b19f5f606a7fc08d83fd3e4379af65"}, + {file = "pillow-12.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:0ddedfaa8b5f0b4ffbc2fa87b556dc59f6bb4ecb14a53b33f9189713ae8053c0"}, + {file = "pillow-12.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:80941e6d573197a0c28f394753de529bb436b1ca990ed6e765cf42426abc39f8"}, + {file = "pillow-12.1.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:5cb7bc1966d031aec37ddb9dcf15c2da5b2e9f7cc3ca7c54473a20a927e1eb91"}, + {file = "pillow-12.1.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:97e9993d5ed946aba26baf9c1e8cf18adbab584b99f452ee72f7ee8acb882796"}, + {file = "pillow-12.1.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:414b9a78e14ffeb98128863314e62c3f24b8a86081066625700b7985b3f529bd"}, + {file = "pillow-12.1.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e6bdb408f7c9dd2a5ff2b14a3b0bb6d4deb29fb9961e6eb3ae2031ae9a5cec13"}, + {file = "pillow-12.1.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3413c2ae377550f5487991d444428f1a8ae92784aac79caa8b1e3b89b175f77e"}, + {file = "pillow-12.1.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e5dcbe95016e88437ecf33544ba5db21ef1b8dd6e1b434a2cb2a3d605299e643"}, + {file = "pillow-12.1.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d0a7735df32ccbcc98b98a1ac785cc4b19b580be1bdf0aeb5c03223220ea09d5"}, + {file = "pillow-12.1.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0c27407a2d1b96774cbc4a7594129cc027339fd800cd081e44497722ea1179de"}, + {file = "pillow-12.1.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15c794d74303828eaa957ff8070846d0efe8c630901a1c753fdc63850e19ecd9"}, + {file = "pillow-12.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c990547452ee2800d8506c4150280757f88532f3de2a58e3022e9b179107862a"}, + {file = "pillow-12.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b63e13dd27da389ed9475b3d28510f0f954bca0041e8e551b2a4eb1eab56a39a"}, + {file = "pillow-12.1.0-cp314-cp314-win32.whl", hash = "sha256:1a949604f73eb07a8adab38c4fe50791f9919344398bdc8ac6b307f755fc7030"}, + {file = "pillow-12.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:4f9f6a650743f0ddee5593ac9e954ba1bdbc5e150bc066586d4f26127853ab94"}, + {file = "pillow-12.1.0-cp314-cp314-win_arm64.whl", hash = "sha256:808b99604f7873c800c4840f55ff389936ef1948e4e87645eaf3fccbc8477ac4"}, + {file = "pillow-12.1.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:bc11908616c8a283cf7d664f77411a5ed2a02009b0097ff8abbba5e79128ccf2"}, + {file = "pillow-12.1.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:896866d2d436563fa2a43a9d72f417874f16b5545955c54a64941e87c1376c61"}, + {file = "pillow-12.1.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8e178e3e99d3c0ea8fc64b88447f7cac8ccf058af422a6cedc690d0eadd98c51"}, + {file = "pillow-12.1.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:079af2fb0c599c2ec144ba2c02766d1b55498e373b3ac64687e43849fbbef5bc"}, + {file = "pillow-12.1.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bdec5e43377761c5dbca620efb69a77f6855c5a379e32ac5b158f54c84212b14"}, + {file = "pillow-12.1.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:565c986f4b45c020f5421a4cea13ef294dde9509a8577f29b2fc5edc7587fff8"}, + {file = "pillow-12.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:43aca0a55ce1eefc0aefa6253661cb54571857b1a7b2964bd8a1e3ef4b729924"}, + {file = "pillow-12.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0deedf2ea233722476b3a81e8cdfbad786f7adbed5d848469fa59fe52396e4ef"}, + {file = "pillow-12.1.0-cp314-cp314t-win32.whl", hash = "sha256:b17fbdbe01c196e7e159aacb889e091f28e61020a8abeac07b68079b6e626988"}, + {file = "pillow-12.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27b9baecb428899db6c0de572d6d305cfaf38ca1596b5c0542a5182e3e74e8c6"}, + {file = "pillow-12.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f61333d817698bdcdd0f9d7793e365ac3d2a21c1f1eb02b32ad6aefb8d8ea831"}, + {file = "pillow-12.1.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ca94b6aac0d7af2a10ba08c0f888b3d5114439b6b3ef39968378723622fed377"}, + {file = "pillow-12.1.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:351889afef0f485b84078ea40fe33727a0492b9af3904661b0abbafee0355b72"}, + {file = "pillow-12.1.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bb0984b30e973f7e2884362b7d23d0a348c7143ee559f38ef3eaab640144204c"}, + {file = "pillow-12.1.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:84cabc7095dd535ca934d57e9ce2a72ffd216e435a84acb06b2277b1de2689bd"}, + {file = "pillow-12.1.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53d8b764726d3af1a138dd353116f774e3862ec7e3794e0c8781e30db0f35dfc"}, + {file = "pillow-12.1.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5da841d81b1a05ef940a8567da92decaa15bc4d7dedb540a8c219ad83d91808a"}, + {file = "pillow-12.1.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:75af0b4c229ac519b155028fa1be632d812a519abba9b46b20e50c6caa184f19"}, + {file = "pillow-12.1.0.tar.gz", hash = "sha256:5c5ae0a06e9ea030ab786b0251b32c7e4ce10e58d983c0d5c56029455180b5b9"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=8.2)", "sphinx-autobuild", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] +fpx = ["olefile"] +mic = ["olefile"] +test-arrow = ["arro3-compute", "arro3-core", "nanoarrow", "pyarrow"] +tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma (>=5)", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "trove-classifiers (>=2024.10.12)"] +xmp = ["defusedxml"] + [[package]] name = "pkginfo" version = "1.12.1.2" @@ -1703,6 +1826,21 @@ pytest = ">=4.6" [package.extras] testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] +[[package]] +name = "python-dotenv" +version = "1.2.1" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61"}, + {file = "python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "pywin32" version = "306" @@ -2613,4 +2751,4 @@ parsing = ["antlr4-python3-runtime", "lark", "latex2sympy"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "10f9e90114dd9d66fe62d35aabc2fee0eb962ff7b99840216a17fb1282a641f4" +content-hash = "7fe52b482228044b36e97dde750e982e13c7837c6c7f9d6fd45433aeae8c18ea" diff --git a/pyproject.toml b/pyproject.toml index 2ae3f68..e7f311a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,9 @@ pywin32 = { version = "^306", platform = "win32", optional = true } ########################## poetry-plugin-export = "^1.9.0" pytest-asyncio = "^1.2.0" +pillow = "^12.1.0" +requests = "^2.32.5" +dotenv = "^0.9.9" [tool.poetry.group.dev.dependencies] black = "24.8.0" diff --git a/tests/evaluation/image_upload_test.py b/tests/evaluation/image_upload_test.py new file mode 100644 index 0000000..06400f2 --- /dev/null +++ b/tests/evaluation/image_upload_test.py @@ -0,0 +1,482 @@ +import pytest +import uuid +from io import BytesIO +from unittest.mock import Mock, patch, MagicMock +from PIL import Image +import requests + +# Import the module to test +from lf_toolkit.evaluation.image_upload import ( + generate_file_name, + validate_mime_type, + get_s3_bucket_uri, + upload_image, + ImageUploadError, + InvalidMimeTypeError, + MissingEnvironmentVariableError, + MIME_TO_FORMAT, + FORMAT_TO_EXTENSION +) + + +class TestGenerateFileName: + """Test suite for generate_file_name function""" + + def test_generate_file_name_with_jpeg_format(self): + """Test filename generation for JPEG image""" + img = Mock(spec=Image.Image) + img.format = 'JPEG' + + with patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') as mock_uuid: + mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') + filename = generate_file_name(img) + + assert filename == '12345678-1234-5678-1234-567812345678.jpeg' + + def test_generate_file_name_with_png_format(self): + """Test filename generation for PNG image""" + img = Mock(spec=Image.Image) + img.format = 'PNG' + + with patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') as mock_uuid: + mock_uuid.return_value = uuid.UUID('abcdef12-3456-7890-abcd-ef1234567890') + filename = generate_file_name(img) + + assert filename == 'abcdef12-3456-7890-abcd-ef1234567890.png' + + def test_generate_file_name_with_no_format(self): + """Test filename generation when image has no format (defaults to png)""" + img = Mock(spec=Image.Image) + img.format = None + + with patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') as mock_uuid: + mock_uuid.return_value = uuid.UUID('00000000-0000-0000-0000-000000000000') + filename = generate_file_name(img) + + assert filename == '00000000-0000-0000-0000-000000000000.png' + + def test_generate_file_name_unique(self): + """Test that generated filenames are unique""" + img = Mock(spec=Image.Image) + img.format = 'PNG' + + filename1 = generate_file_name(img) + filename2 = generate_file_name(img) + + assert filename1 != filename2 + + +class TestValidateMimeType: + """Test suite for validate_mime_type function""" + + def test_valid_jpeg_mime_type(self): + """Test validation with valid JPEG MIME type""" + img = Mock(spec=Image.Image) + img.format = 'JPEG' + + result = validate_mime_type('image/jpeg', img, 'test.jpg') + assert result is True + + def test_valid_png_mime_type(self): + """Test validation with valid PNG MIME type""" + img = Mock(spec=Image.Image) + img.format = 'PNG' + + result = validate_mime_type('image/png', img, 'test.png') + assert result is True + + def test_invalid_mime_type(self): + """Test validation with unsupported MIME type""" + img = Mock(spec=Image.Image) + img.format = 'PNG' + + with pytest.raises(InvalidMimeTypeError) as exc_info: + validate_mime_type('image/invalid', img, 'test.png') + + assert "Invalid MIME type 'image/invalid'" in str(exc_info.value) + + def test_mime_type_format_mismatch(self): + """Test validation when MIME type doesn't match image format""" + img = Mock(spec=Image.Image) + img.format = 'PNG' + + with pytest.raises(InvalidMimeTypeError) as exc_info: + validate_mime_type('image/jpeg', img, 'test.png') + + assert "does not match image format 'PNG'" in str(exc_info.value) + + def test_extension_format_mismatch(self): + """Test validation when file extension doesn't match format""" + img = Mock(spec=Image.Image) + img.format = 'JPEG' + + with pytest.raises(InvalidMimeTypeError) as exc_info: + validate_mime_type('image/jpeg', img, 'test.png') + + assert "File extension '.png' does not match format 'JPEG'" in str(exc_info.value) + + def test_valid_with_no_image_format(self): + """Test validation when image has no format attribute""" + img = Mock(spec=Image.Image) + img.format = None + + # Should not raise when format is None + result = validate_mime_type('image/png', img, 'test.png') + assert result is True + + def test_valid_webp_mime_type(self): + """Test validation with valid WEBP MIME type""" + img = Mock(spec=Image.Image) + img.format = 'WEBP' + + result = validate_mime_type('image/webp', img, 'test.webp') + assert result is True + + def test_jpeg_with_jpg_extension(self): + """Test JPEG image with .jpg extension""" + img = Mock(spec=Image.Image) + img.format = 'JPEG' + + result = validate_mime_type('image/jpeg', img, 'photo.jpg') + assert result is True + + def test_jpeg_with_jpeg_extension(self): + """Test JPEG image with .jpeg extension""" + img = Mock(spec=Image.Image) + img.format = 'JPEG' + + result = validate_mime_type('image/jpeg', img, 'photo.jpeg') + assert result is True + + +class TestGetS3BucketUri: + """Test suite for get_s3_bucket_uri function""" + + def test_get_s3_bucket_uri_success(self): + """Test successful retrieval of S3 bucket URI""" + with patch('lf_toolkit.evaluation.image_upload.os.getenv') as mock_getenv: + mock_getenv.return_value = 'https://s3.amazonaws.com/my-bucket' + + uri = get_s3_bucket_uri() + + assert uri == 'https://s3.amazonaws.com/my-bucket' + mock_getenv.assert_called_once_with('S3_BUCKET_URI') + + def test_get_s3_bucket_uri_missing(self): + """Test error when S3_BUCKET_URI is not set""" + with patch('lf_toolkit.evaluation.image_upload.os.getenv') as mock_getenv: + mock_getenv.return_value = None + + with pytest.raises(MissingEnvironmentVariableError) as exc_info: + get_s3_bucket_uri() + + assert "S3_BUCKET_URI environment variable is not set" in str(exc_info.value) + + def test_get_s3_bucket_uri_empty_string(self): + """Test error when S3_BUCKET_URI is empty string""" + with patch('lf_toolkit.evaluation.image_upload.os.getenv') as mock_getenv: + mock_getenv.return_value = '' + + with pytest.raises(MissingEnvironmentVariableError): + get_s3_bucket_uri() + + +class TestUploadImage: + """Test suite for upload_image function""" + + @patch('lf_toolkit.evaluation.image_upload.requests.post') + @patch('lf_toolkit.evaluation.image_upload.os.getenv') + @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') + def test_successful_upload(self, mock_uuid, mock_getenv, mock_post): + """Test successful image upload with UUID-based filename""" + # Setup mocks + mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') + mock_getenv.return_value = 'https://s3.amazonaws.com/my-bucket' + + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {'url': f'https://s3.amazonaws.com/uploaded-image.jpg'} + mock_post.return_value = mock_response + + # Create a real PIL image for testing + img = Image.new('RGB', (100, 100), color='red') + img.format = 'JPEG' + + # Execute + result = upload_image(img, 'image/jpeg') + + # Verify response + assert result == 'https://s3.amazonaws.com/uploaded-image.jpg' + assert mock_post.called + assert mock_post.call_args[1]['timeout'] == 30 + + # Verify UUID-based filename is used + call_args = mock_post.call_args + filename, file_obj, mime_type = call_args[1]['files']['file'] + assert filename == '12345678-1234-5678-1234-567812345678.jpeg' + assert mime_type == 'image/jpeg' + + @patch('lf_toolkit.evaluation.image_upload.requests.post') + @patch('lf_toolkit.evaluation.image_upload.os.getenv') + @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') + def test_upload_with_png_image(self, mock_uuid, mock_getenv, mock_post): + """Test uploading PNG image with UUID-based filename""" + mock_uuid.return_value = uuid.UUID('aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee') + mock_getenv.return_value = 'https://storage.example.com' + + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {'url': 'https://storage.example.com/image.png'} + mock_post.return_value = mock_response + + img = Image.new('RGBA', (50, 50), color=(0, 255, 0, 128)) + img.format = 'PNG' + + result = upload_image(img, 'image/png') + + assert result == 'https://storage.example.com/image.png' + + # Verify UUID-based filename is used + call_args = mock_post.call_args + filename, file_obj, mime_type = call_args[1]['files']['file'] + assert filename == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee.png' + assert mime_type == 'image/png' + + @patch('lf_toolkit.evaluation.image_upload.os.getenv') + def test_upload_missing_s3_uri(self, mock_getenv): + """Test upload fails when S3_BUCKET_URI is missing""" + mock_getenv.return_value = None + + img = Image.new('RGB', (100, 100)) + img.format = 'JPEG' + + with pytest.raises(MissingEnvironmentVariableError): + upload_image(img, 'image/jpeg') + + @patch('lf_toolkit.evaluation.image_upload.os.getenv') + def test_upload_invalid_mime_type(self, mock_getenv): + """Test upload fails with invalid MIME type""" + mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' + + img = Image.new('RGB', (100, 100)) + img.format = 'JPEG' + + with pytest.raises(InvalidMimeTypeError): + upload_image(img, 'image/invalid') + + @patch('lf_toolkit.evaluation.image_upload.requests.post') + @patch('lf_toolkit.evaluation.image_upload.os.getenv') + @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') + def test_upload_server_error(self, mock_uuid, mock_getenv, mock_post): + """Test upload fails when server returns error""" + mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') + mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' + + mock_response = Mock() + mock_response.status_code = 500 + mock_response.text = 'Internal Server Error' + mock_post.return_value = mock_response + + img = Image.new('RGB', (100, 100)) + img.format = 'JPEG' + + with pytest.raises(ImageUploadError) as exc_info: + upload_image(img, 'image/jpeg') + + assert "Upload failed with status code 500" in str(exc_info.value) + + @patch('lf_toolkit.evaluation.image_upload.requests.post') + @patch('lf_toolkit.evaluation.image_upload.os.getenv') + @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') + def test_upload_network_error(self, mock_uuid, mock_getenv, mock_post): + """Test upload fails on network error""" + mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') + mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' + + mock_post.side_effect = requests.exceptions.ConnectionError('Connection failed') + + img = Image.new('RGB', (100, 100)) + img.format = 'JPEG' + + with pytest.raises(ImageUploadError) as exc_info: + upload_image(img, 'image/jpeg') + + assert "Network error" in str(exc_info.value) + + @patch('lf_toolkit.evaluation.image_upload.requests.post') + @patch('lf_toolkit.evaluation.image_upload.os.getenv') + @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') + def test_upload_timeout_error(self, mock_uuid, mock_getenv, mock_post): + """Test upload fails on timeout""" + mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') + mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' + + mock_post.side_effect = requests.exceptions.Timeout('Request timed out') + + img = Image.new('RGB', (100, 100)) + img.format = 'JPEG' + + with pytest.raises(ImageUploadError) as exc_info: + upload_image(img, 'image/jpeg') + + assert "Network error" in str(exc_info.value) + + @patch('lf_toolkit.evaluation.image_upload.requests.post') + @patch('lf_toolkit.evaluation.image_upload.os.getenv') + @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') + def test_upload_mime_type_mismatch(self, mock_uuid, mock_getenv, mock_post): + """Test upload fails when MIME type doesn't match image format""" + mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') + mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' + + img = Image.new('RGB', (100, 100)) + img.format = 'PNG' + + with pytest.raises(InvalidMimeTypeError): + upload_image(img, 'image/jpeg') + + @patch('lf_toolkit.evaluation.image_upload.requests.post') + @patch('lf_toolkit.evaluation.image_upload.os.getenv') + @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') + def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_post): + """Test upload with image that has no format (defaults to PNG) uses UUID filename""" + mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') + mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' + + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {'url': 'https://s3.amazonaws.com/image.png'} + mock_post.return_value = mock_response + + img = Image.new('RGB', (100, 100)) + img.format = None + + result = upload_image(img, 'image/png') + + assert result == 'https://s3.amazonaws.com/image.png' + + # Verify UUID-based filename with default .png extension + call_args = mock_post.call_args + filename, file_obj, mime_type = call_args[1]['files']['file'] + assert filename == '12345678-1234-5678-1234-567812345678.png' + assert mime_type == 'image/png' + + @patch('lf_toolkit.evaluation.image_upload.requests.post') + @patch('lf_toolkit.evaluation.image_upload.os.getenv') + @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') + def test_upload_uses_different_uuid_each_time(self, mock_uuid, mock_getenv, mock_post): + """Test that each upload generates a unique UUID-based filename""" + mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' + + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {'url': 'https://s3.amazonaws.com/uploaded.jpg'} + mock_post.return_value = mock_response + + # First upload with first UUID + uuid1 = uuid.UUID('11111111-1111-1111-1111-111111111111') + mock_uuid.return_value = uuid1 + + img1 = Image.new('RGB', (100, 100)) + img1.format = 'JPEG' + upload_image(img1, 'image/jpeg') + + filename1 = mock_post.call_args[1]['files']['file'][0] + + # Second upload with different UUID + uuid2 = uuid.UUID('22222222-2222-2222-2222-222222222222') + mock_uuid.return_value = uuid2 + + img2 = Image.new('RGB', (100, 100)) + img2.format = 'JPEG' + upload_image(img2, 'image/jpeg') + + filename2 = mock_post.call_args[1]['files']['file'][0] + + # Verify different UUIDs result in different filenames + assert filename1 == '11111111-1111-1111-1111-111111111111.jpeg' + assert filename2 == '22222222-2222-2222-2222-222222222222.jpeg' + assert filename1 != filename2 + + @patch('lf_toolkit.evaluation.image_upload.requests.post') + @patch('lf_toolkit.evaluation.image_upload.os.getenv') + @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') + def test_upload_verifies_correct_file_uploaded(self, mock_uuid, mock_getenv, mock_post): + """Test that the correct file data is sent in upload request""" + mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') + mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' + + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {'url': 'https://s3.amazonaws.com/image.jpg'} + mock_post.return_value = mock_response + + img = Image.new('RGB', (100, 100), color='blue') + img.format = 'JPEG' + + upload_image(img, 'image/jpeg') + + # Verify the post was called with correct arguments + call_args = mock_post.call_args + assert call_args[0][0] == 'https://s3.amazonaws.com/bucket' + assert 'files' in call_args[1] + assert 'file' in call_args[1]['files'] + + filename, file_obj, mime_type = call_args[1]['files']['file'] + assert filename == '12345678-1234-5678-1234-567812345678.jpeg' + assert mime_type == 'image/jpeg' + + +class TestExceptionHierarchy: + """Test suite for custom exception classes""" + + def test_image_upload_error_is_exception(self): + """Test that ImageUploadError inherits from Exception""" + assert issubclass(ImageUploadError, Exception) + + def test_invalid_mime_type_error_is_image_upload_error(self): + """Test that InvalidMimeTypeError inherits from ImageUploadError""" + assert issubclass(InvalidMimeTypeError, ImageUploadError) + assert issubclass(InvalidMimeTypeError, Exception) + + def test_missing_environment_variable_error_is_image_upload_error(self): + """Test that MissingEnvironmentVariableError inherits from ImageUploadError""" + assert issubclass(MissingEnvironmentVariableError, ImageUploadError) + assert issubclass(MissingEnvironmentVariableError, Exception) + + def test_can_raise_and_catch_image_upload_error(self): + """Test that custom exceptions can be raised and caught""" + with pytest.raises(ImageUploadError): + raise ImageUploadError("Test error") + + def test_invalid_mime_type_error_caught_as_image_upload_error(self): + """Test that InvalidMimeTypeError can be caught as ImageUploadError""" + with pytest.raises(ImageUploadError): + raise InvalidMimeTypeError("Invalid MIME") + + +class TestConstants: + """Test suite for module constants""" + + def test_mime_to_format_has_expected_types(self): + """Test that MIME_TO_FORMAT contains expected image types""" + assert 'image/jpeg' in MIME_TO_FORMAT + assert 'image/png' in MIME_TO_FORMAT + assert 'image/gif' in MIME_TO_FORMAT + assert 'image/webp' in MIME_TO_FORMAT + + def test_format_to_extension_has_expected_formats(self): + """Test that FORMAT_TO_EXTENSION contains expected formats""" + assert 'JPEG' in FORMAT_TO_EXTENSION + assert 'PNG' in FORMAT_TO_EXTENSION + assert 'GIF' in FORMAT_TO_EXTENSION + assert 'WEBP' in FORMAT_TO_EXTENSION + + def test_jpeg_has_multiple_extensions(self): + """Test that JPEG format has multiple valid extensions""" + assert '.jpg' in FORMAT_TO_EXTENSION['JPEG'] + assert '.jpeg' in FORMAT_TO_EXTENSION['JPEG'] + + +if __name__ == '__main__': + pytest.main([__file__, '-v']) From 36e95aace323dc0d83e9c5d5ee443f3cc412ff22 Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Tue, 27 Jan 2026 13:09:20 +0000 Subject: [PATCH 02/22] Switched to put --- lf_toolkit/evaluation/image_upload.py | 8 ++- tests/evaluation/image_upload_test.py | 70 +++++++++++++-------------- 2 files changed, 42 insertions(+), 36 deletions(-) diff --git a/lf_toolkit/evaluation/image_upload.py b/lf_toolkit/evaluation/image_upload.py index 70abd17..19e049d 100644 --- a/lf_toolkit/evaluation/image_upload.py +++ b/lf_toolkit/evaluation/image_upload.py @@ -142,7 +142,7 @@ def upload_image(img: Image.Image, mime_type: str) -> Dict: buffer.seek(0) files: Dict[str, tuple] = {'file': (filename, buffer, mime_type)} - response: requests.Response = requests.post(url, files=files, timeout=30) + response: requests.Response = requests.put(url, files=files, timeout=30) if response.status_code != 200: raise ImageUploadError( @@ -158,3 +158,9 @@ def upload_image(img: Image.Image, mime_type: str) -> Dict: except Exception as e: raise ImageUploadError(f"Unexpected error: {str(e)}") +if __name__ == "__main__": + img = Image.new('RGB', (100, 100), color='red') + img.format = 'JPEG' + + # Execute + result = upload_image(img, 'image/jpeg') diff --git a/tests/evaluation/image_upload_test.py b/tests/evaluation/image_upload_test.py index 06400f2..a40b88e 100644 --- a/tests/evaluation/image_upload_test.py +++ b/tests/evaluation/image_upload_test.py @@ -184,10 +184,10 @@ def test_get_s3_bucket_uri_empty_string(self): class TestUploadImage: """Test suite for upload_image function""" - @patch('lf_toolkit.evaluation.image_upload.requests.post') + @patch('lf_toolkit.evaluation.image_upload.requests.put') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') - def test_successful_upload(self, mock_uuid, mock_getenv, mock_post): + def test_successful_upload(self, mock_uuid, mock_getenv, mock_put): """Test successful image upload with UUID-based filename""" # Setup mocks mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') @@ -196,7 +196,7 @@ def test_successful_upload(self, mock_uuid, mock_getenv, mock_post): mock_response = Mock() mock_response.status_code = 200 mock_response.json.return_value = {'url': f'https://s3.amazonaws.com/uploaded-image.jpg'} - mock_post.return_value = mock_response + mock_put.return_value = mock_response # Create a real PIL image for testing img = Image.new('RGB', (100, 100), color='red') @@ -207,19 +207,19 @@ def test_successful_upload(self, mock_uuid, mock_getenv, mock_post): # Verify response assert result == 'https://s3.amazonaws.com/uploaded-image.jpg' - assert mock_post.called - assert mock_post.call_args[1]['timeout'] == 30 + assert mock_put.called + assert mock_put.call_args[1]['timeout'] == 30 # Verify UUID-based filename is used - call_args = mock_post.call_args + call_args = mock_put.call_args filename, file_obj, mime_type = call_args[1]['files']['file'] assert filename == '12345678-1234-5678-1234-567812345678.jpeg' assert mime_type == 'image/jpeg' - @patch('lf_toolkit.evaluation.image_upload.requests.post') + @patch('lf_toolkit.evaluation.image_upload.requests.put') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') - def test_upload_with_png_image(self, mock_uuid, mock_getenv, mock_post): + def test_upload_with_png_image(self, mock_uuid, mock_getenv, mock_put): """Test uploading PNG image with UUID-based filename""" mock_uuid.return_value = uuid.UUID('aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee') mock_getenv.return_value = 'https://storage.example.com' @@ -227,7 +227,7 @@ def test_upload_with_png_image(self, mock_uuid, mock_getenv, mock_post): mock_response = Mock() mock_response.status_code = 200 mock_response.json.return_value = {'url': 'https://storage.example.com/image.png'} - mock_post.return_value = mock_response + mock_put.return_value = mock_response img = Image.new('RGBA', (50, 50), color=(0, 255, 0, 128)) img.format = 'PNG' @@ -237,7 +237,7 @@ def test_upload_with_png_image(self, mock_uuid, mock_getenv, mock_post): assert result == 'https://storage.example.com/image.png' # Verify UUID-based filename is used - call_args = mock_post.call_args + call_args = mock_put.call_args filename, file_obj, mime_type = call_args[1]['files']['file'] assert filename == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee.png' assert mime_type == 'image/png' @@ -264,10 +264,10 @@ def test_upload_invalid_mime_type(self, mock_getenv): with pytest.raises(InvalidMimeTypeError): upload_image(img, 'image/invalid') - @patch('lf_toolkit.evaluation.image_upload.requests.post') + @patch('lf_toolkit.evaluation.image_upload.requests.put') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') - def test_upload_server_error(self, mock_uuid, mock_getenv, mock_post): + def test_upload_server_error(self, mock_uuid, mock_getenv, mock_put): """Test upload fails when server returns error""" mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' @@ -275,7 +275,7 @@ def test_upload_server_error(self, mock_uuid, mock_getenv, mock_post): mock_response = Mock() mock_response.status_code = 500 mock_response.text = 'Internal Server Error' - mock_post.return_value = mock_response + mock_put.return_value = mock_response img = Image.new('RGB', (100, 100)) img.format = 'JPEG' @@ -285,15 +285,15 @@ def test_upload_server_error(self, mock_uuid, mock_getenv, mock_post): assert "Upload failed with status code 500" in str(exc_info.value) - @patch('lf_toolkit.evaluation.image_upload.requests.post') + @patch('lf_toolkit.evaluation.image_upload.requests.put') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') - def test_upload_network_error(self, mock_uuid, mock_getenv, mock_post): + def test_upload_network_error(self, mock_uuid, mock_getenv, mock_put): """Test upload fails on network error""" mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' - mock_post.side_effect = requests.exceptions.ConnectionError('Connection failed') + mock_put.side_effect = requests.exceptions.ConnectionError('Connection failed') img = Image.new('RGB', (100, 100)) img.format = 'JPEG' @@ -303,15 +303,15 @@ def test_upload_network_error(self, mock_uuid, mock_getenv, mock_post): assert "Network error" in str(exc_info.value) - @patch('lf_toolkit.evaluation.image_upload.requests.post') + @patch('lf_toolkit.evaluation.image_upload.requests.put') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') - def test_upload_timeout_error(self, mock_uuid, mock_getenv, mock_post): + def test_upload_timeout_error(self, mock_uuid, mock_getenv, mock_put): """Test upload fails on timeout""" mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' - mock_post.side_effect = requests.exceptions.Timeout('Request timed out') + mock_put.side_effect = requests.exceptions.Timeout('Request timed out') img = Image.new('RGB', (100, 100)) img.format = 'JPEG' @@ -321,10 +321,10 @@ def test_upload_timeout_error(self, mock_uuid, mock_getenv, mock_post): assert "Network error" in str(exc_info.value) - @patch('lf_toolkit.evaluation.image_upload.requests.post') + @patch('lf_toolkit.evaluation.image_upload.requests.put') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') - def test_upload_mime_type_mismatch(self, mock_uuid, mock_getenv, mock_post): + def test_upload_mime_type_mismatch(self, mock_uuid, mock_getenv, mock_put): """Test upload fails when MIME type doesn't match image format""" mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' @@ -335,10 +335,10 @@ def test_upload_mime_type_mismatch(self, mock_uuid, mock_getenv, mock_post): with pytest.raises(InvalidMimeTypeError): upload_image(img, 'image/jpeg') - @patch('lf_toolkit.evaluation.image_upload.requests.post') + @patch('lf_toolkit.evaluation.image_upload.requests.put') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') - def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_post): + def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_put): """Test upload with image that has no format (defaults to PNG) uses UUID filename""" mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' @@ -346,7 +346,7 @@ def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_post): mock_response = Mock() mock_response.status_code = 200 mock_response.json.return_value = {'url': 'https://s3.amazonaws.com/image.png'} - mock_post.return_value = mock_response + mock_put.return_value = mock_response img = Image.new('RGB', (100, 100)) img.format = None @@ -356,22 +356,22 @@ def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_post): assert result == 'https://s3.amazonaws.com/image.png' # Verify UUID-based filename with default .png extension - call_args = mock_post.call_args + call_args = mock_put.call_args filename, file_obj, mime_type = call_args[1]['files']['file'] assert filename == '12345678-1234-5678-1234-567812345678.png' assert mime_type == 'image/png' - @patch('lf_toolkit.evaluation.image_upload.requests.post') + @patch('lf_toolkit.evaluation.image_upload.requests.put') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') - def test_upload_uses_different_uuid_each_time(self, mock_uuid, mock_getenv, mock_post): + def test_upload_uses_different_uuid_each_time(self, mock_uuid, mock_getenv, mock_put): """Test that each upload generates a unique UUID-based filename""" mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' mock_response = Mock() mock_response.status_code = 200 mock_response.json.return_value = {'url': 'https://s3.amazonaws.com/uploaded.jpg'} - mock_post.return_value = mock_response + mock_put.return_value = mock_response # First upload with first UUID uuid1 = uuid.UUID('11111111-1111-1111-1111-111111111111') @@ -381,7 +381,7 @@ def test_upload_uses_different_uuid_each_time(self, mock_uuid, mock_getenv, mock img1.format = 'JPEG' upload_image(img1, 'image/jpeg') - filename1 = mock_post.call_args[1]['files']['file'][0] + filename1 = mock_put.call_args[1]['files']['file'][0] # Second upload with different UUID uuid2 = uuid.UUID('22222222-2222-2222-2222-222222222222') @@ -391,17 +391,17 @@ def test_upload_uses_different_uuid_each_time(self, mock_uuid, mock_getenv, mock img2.format = 'JPEG' upload_image(img2, 'image/jpeg') - filename2 = mock_post.call_args[1]['files']['file'][0] + filename2 = mock_put.call_args[1]['files']['file'][0] # Verify different UUIDs result in different filenames assert filename1 == '11111111-1111-1111-1111-111111111111.jpeg' assert filename2 == '22222222-2222-2222-2222-222222222222.jpeg' assert filename1 != filename2 - @patch('lf_toolkit.evaluation.image_upload.requests.post') + @patch('lf_toolkit.evaluation.image_upload.requests.put') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') - def test_upload_verifies_correct_file_uploaded(self, mock_uuid, mock_getenv, mock_post): + def test_upload_verifies_correct_file_uploaded(self, mock_uuid, mock_getenv, mock_put): """Test that the correct file data is sent in upload request""" mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' @@ -409,15 +409,15 @@ def test_upload_verifies_correct_file_uploaded(self, mock_uuid, mock_getenv, moc mock_response = Mock() mock_response.status_code = 200 mock_response.json.return_value = {'url': 'https://s3.amazonaws.com/image.jpg'} - mock_post.return_value = mock_response + mock_put.return_value = mock_response img = Image.new('RGB', (100, 100), color='blue') img.format = 'JPEG' upload_image(img, 'image/jpeg') - # Verify the post was called with correct arguments - call_args = mock_post.call_args + # Verify the put was called with correct arguments + call_args = mock_put.call_args assert call_args[0][0] == 'https://s3.amazonaws.com/bucket' assert 'files' in call_args[1] assert 'file' in call_args[1]['files'] From 97373a4696260ccf343afcde572760c298847c5f Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Tue, 27 Jan 2026 13:45:20 +0000 Subject: [PATCH 03/22] Fixed issue with request not sending file name and updated tests --- lf_toolkit/evaluation/image_upload.py | 17 ++++-- tests/evaluation/image_upload_test.py | 86 +++------------------------ 2 files changed, 20 insertions(+), 83 deletions(-) diff --git a/lf_toolkit/evaluation/image_upload.py b/lf_toolkit/evaluation/image_upload.py index 19e049d..9a97973 100644 --- a/lf_toolkit/evaluation/image_upload.py +++ b/lf_toolkit/evaluation/image_upload.py @@ -113,7 +113,7 @@ def get_s3_bucket_uri() -> str: return s3_uri -def upload_image(img: Image.Image, mime_type: str) -> Dict: +def upload_image(img: Image.Image, mime_type: str) -> str: """Upload PIL image with comprehensive MIME type validation Args: @@ -130,26 +130,32 @@ def upload_image(img: Image.Image, mime_type: str) -> Dict: """ try: # Get URL from environment variable - url: str = get_s3_bucket_uri() + base_url: str = get_s3_bucket_uri() filename: str = generate_file_name(img) validate_mime_type(mime_type, img, filename) + full_url = base_url + filename + buffer: BytesIO = BytesIO() img_format: str = img.format if img.format else 'PNG' img.save(buffer, format=img_format) buffer.seek(0) - files: Dict[str, tuple] = {'file': (filename, buffer, mime_type)} - response: requests.Response = requests.put(url, files=files, timeout=30) + response: requests.Response = requests.put( + full_url, + data=buffer, + headers={'Content-Type': mime_type}, + timeout=30 + ) if response.status_code != 200: raise ImageUploadError( f"Upload failed with status code {response.status_code}: {response.text}" ) - return response.json()['url'] + return full_url except (InvalidMimeTypeError, MissingEnvironmentVariableError): raise @@ -164,3 +170,4 @@ def upload_image(img: Image.Image, mime_type: str) -> Dict: # Execute result = upload_image(img, 'image/jpeg') + print(result) diff --git a/tests/evaluation/image_upload_test.py b/tests/evaluation/image_upload_test.py index a40b88e..866d3b3 100644 --- a/tests/evaluation/image_upload_test.py +++ b/tests/evaluation/image_upload_test.py @@ -1,7 +1,6 @@ import pytest import uuid -from io import BytesIO -from unittest.mock import Mock, patch, MagicMock +from unittest.mock import Mock, patch from PIL import Image import requests @@ -191,11 +190,10 @@ def test_successful_upload(self, mock_uuid, mock_getenv, mock_put): """Test successful image upload with UUID-based filename""" # Setup mocks mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') - mock_getenv.return_value = 'https://s3.amazonaws.com/my-bucket' + mock_getenv.return_value = 'https://s3.amazonaws.com/my-bucket/' mock_response = Mock() mock_response.status_code = 200 - mock_response.json.return_value = {'url': f'https://s3.amazonaws.com/uploaded-image.jpg'} mock_put.return_value = mock_response # Create a real PIL image for testing @@ -206,27 +204,20 @@ def test_successful_upload(self, mock_uuid, mock_getenv, mock_put): result = upload_image(img, 'image/jpeg') # Verify response - assert result == 'https://s3.amazonaws.com/uploaded-image.jpg' + assert result == 'https://s3.amazonaws.com/my-bucket/12345678-1234-5678-1234-567812345678.jpeg' assert mock_put.called assert mock_put.call_args[1]['timeout'] == 30 - # Verify UUID-based filename is used - call_args = mock_put.call_args - filename, file_obj, mime_type = call_args[1]['files']['file'] - assert filename == '12345678-1234-5678-1234-567812345678.jpeg' - assert mime_type == 'image/jpeg' - @patch('lf_toolkit.evaluation.image_upload.requests.put') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') def test_upload_with_png_image(self, mock_uuid, mock_getenv, mock_put): """Test uploading PNG image with UUID-based filename""" mock_uuid.return_value = uuid.UUID('aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee') - mock_getenv.return_value = 'https://storage.example.com' + mock_getenv.return_value = 'https://storage.example.com/' mock_response = Mock() mock_response.status_code = 200 - mock_response.json.return_value = {'url': 'https://storage.example.com/image.png'} mock_put.return_value = mock_response img = Image.new('RGBA', (50, 50), color=(0, 255, 0, 128)) @@ -234,13 +225,7 @@ def test_upload_with_png_image(self, mock_uuid, mock_getenv, mock_put): result = upload_image(img, 'image/png') - assert result == 'https://storage.example.com/image.png' - - # Verify UUID-based filename is used - call_args = mock_put.call_args - filename, file_obj, mime_type = call_args[1]['files']['file'] - assert filename == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee.png' - assert mime_type == 'image/png' + assert result == 'https://storage.example.com/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee.png' @patch('lf_toolkit.evaluation.image_upload.os.getenv') def test_upload_missing_s3_uri(self, mock_getenv): @@ -341,11 +326,10 @@ def test_upload_mime_type_mismatch(self, mock_uuid, mock_getenv, mock_put): def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_put): """Test upload with image that has no format (defaults to PNG) uses UUID filename""" mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') - mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' + mock_getenv.return_value = 'https://s3.amazonaws.com/bucket/' mock_response = Mock() mock_response.status_code = 200 - mock_response.json.return_value = {'url': 'https://s3.amazonaws.com/image.png'} mock_put.return_value = mock_response img = Image.new('RGB', (100, 100)) @@ -353,50 +337,7 @@ def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_put): result = upload_image(img, 'image/png') - assert result == 'https://s3.amazonaws.com/image.png' - - # Verify UUID-based filename with default .png extension - call_args = mock_put.call_args - filename, file_obj, mime_type = call_args[1]['files']['file'] - assert filename == '12345678-1234-5678-1234-567812345678.png' - assert mime_type == 'image/png' - - @patch('lf_toolkit.evaluation.image_upload.requests.put') - @patch('lf_toolkit.evaluation.image_upload.os.getenv') - @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') - def test_upload_uses_different_uuid_each_time(self, mock_uuid, mock_getenv, mock_put): - """Test that each upload generates a unique UUID-based filename""" - mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' - - mock_response = Mock() - mock_response.status_code = 200 - mock_response.json.return_value = {'url': 'https://s3.amazonaws.com/uploaded.jpg'} - mock_put.return_value = mock_response - - # First upload with first UUID - uuid1 = uuid.UUID('11111111-1111-1111-1111-111111111111') - mock_uuid.return_value = uuid1 - - img1 = Image.new('RGB', (100, 100)) - img1.format = 'JPEG' - upload_image(img1, 'image/jpeg') - - filename1 = mock_put.call_args[1]['files']['file'][0] - - # Second upload with different UUID - uuid2 = uuid.UUID('22222222-2222-2222-2222-222222222222') - mock_uuid.return_value = uuid2 - - img2 = Image.new('RGB', (100, 100)) - img2.format = 'JPEG' - upload_image(img2, 'image/jpeg') - - filename2 = mock_put.call_args[1]['files']['file'][0] - - # Verify different UUIDs result in different filenames - assert filename1 == '11111111-1111-1111-1111-111111111111.jpeg' - assert filename2 == '22222222-2222-2222-2222-222222222222.jpeg' - assert filename1 != filename2 + assert result == 'https://s3.amazonaws.com/bucket/12345678-1234-5678-1234-567812345678.png' @patch('lf_toolkit.evaluation.image_upload.requests.put') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @@ -404,11 +345,10 @@ def test_upload_uses_different_uuid_each_time(self, mock_uuid, mock_getenv, mock def test_upload_verifies_correct_file_uploaded(self, mock_uuid, mock_getenv, mock_put): """Test that the correct file data is sent in upload request""" mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') - mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' + mock_getenv.return_value = 'https://s3.amazonaws.com/bucket/' mock_response = Mock() mock_response.status_code = 200 - mock_response.json.return_value = {'url': 'https://s3.amazonaws.com/image.jpg'} mock_put.return_value = mock_response img = Image.new('RGB', (100, 100), color='blue') @@ -416,16 +356,6 @@ def test_upload_verifies_correct_file_uploaded(self, mock_uuid, mock_getenv, moc upload_image(img, 'image/jpeg') - # Verify the put was called with correct arguments - call_args = mock_put.call_args - assert call_args[0][0] == 'https://s3.amazonaws.com/bucket' - assert 'files' in call_args[1] - assert 'file' in call_args[1]['files'] - - filename, file_obj, mime_type = call_args[1]['files']['file'] - assert filename == '12345678-1234-5678-1234-567812345678.jpeg' - assert mime_type == 'image/jpeg' - class TestExceptionHierarchy: """Test suite for custom exception classes""" From cdf120cc8687e7e5ef5f08fd073ba46d792e43fa Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Wed, 28 Jan 2026 13:37:59 +0000 Subject: [PATCH 04/22] Switched to auto parsing of mime_type --- lf_toolkit/evaluation/image_upload.py | 74 +++---------- tests/evaluation/image_upload_test.py | 149 ++------------------------ 2 files changed, 21 insertions(+), 202 deletions(-) diff --git a/lf_toolkit/evaluation/image_upload.py b/lf_toolkit/evaluation/image_upload.py index 9a97973..0642a19 100644 --- a/lf_toolkit/evaluation/image_upload.py +++ b/lf_toolkit/evaluation/image_upload.py @@ -13,22 +13,16 @@ 'image/png': ['PNG'], 'image/gif': ['GIF'], 'image/bmp': ['BMP'], - 'image/webp': ['WEBP'], - 'image/tiff': ['TIFF', 'TIF'], - 'image/x-icon': ['ICO'], } -FORMAT_TO_EXTENSION: Dict[str, List[str]] = { - 'JPEG': ['.jpg', '.jpeg', '.jpe'], - 'PNG': ['.png'], - 'GIF': ['.gif'], - 'BMP': ['.bmp'], - 'WEBP': ['.webp'], - 'TIFF': ['.tiff', '.tif'], - 'ICO': ['.ico'], +FORMAT_TO_MIME: Dict[str, str] = { + 'JPEG': 'image/jpeg', + 'JPG': 'image/jpeg', + 'PNG': 'image/png', + 'GIF': 'image/gif', + "bmp": 'image/bmp' } - class ImageUploadError(Exception): """Custom exception for image upload failures""" pass @@ -57,50 +51,6 @@ def generate_file_name(img: Image.Image) -> str: format_ext: str = img.format.lower() if img.format else 'png' return f"{unique_id}.{format_ext}" - -def validate_mime_type(mime_type: str, img: Image.Image, filename: str) -> bool: - """Validate MIME type against image format and filename - - Args: - mime_type: MIME type string to validate - img: PIL Image object - filename: Name of the file - - Returns: - True if validation passes - - Raises: - InvalidMimeTypeError: If MIME type is invalid or doesn't match image - """ - if mime_type not in MIME_TO_FORMAT: - raise InvalidMimeTypeError( - f"Invalid MIME type '{mime_type}'. " - f"Supported types: {', '.join(MIME_TO_FORMAT.keys())}" - ) - - img_format: Optional[str] = img.format.upper() if img.format else None - - if img_format: - allowed_formats: List[str] = MIME_TO_FORMAT[mime_type] - if img_format not in allowed_formats: - raise InvalidMimeTypeError( - f"MIME type '{mime_type}' does not match image format '{img_format}'. " - f"Expected formats for {mime_type}: {', '.join(allowed_formats)}" - ) - - file_ext: str = filename[filename.rfind('.'):].lower() - - if img_format and img_format in FORMAT_TO_EXTENSION: - valid_extensions: List[str] = FORMAT_TO_EXTENSION[img_format] - if file_ext not in valid_extensions: - raise InvalidMimeTypeError( - f"File extension '{file_ext}' does not match format '{img_format}'. " - f"Expected extensions: {', '.join(valid_extensions)}" - ) - - return True - - def get_s3_bucket_uri() -> str: """Get S3 bucket URI from environment variable""" s3_uri: Optional[str] = os.getenv('S3_BUCKET_URI') @@ -113,12 +63,11 @@ def get_s3_bucket_uri() -> str: return s3_uri -def upload_image(img: Image.Image, mime_type: str) -> str: +def upload_image(img: Image.Image) -> str: """Upload PIL image with comprehensive MIME type validation Args: img: PIL Image object to upload - mime_type: MIME type for the upload Returns: JSON response from the server as a dictionary @@ -134,10 +83,13 @@ def upload_image(img: Image.Image, mime_type: str) -> str: filename: str = generate_file_name(img) - validate_mime_type(mime_type, img, filename) - full_url = base_url + filename + if img.format is None: + img.format = 'PNG' + + mime_type = FORMAT_TO_MIME[img.format.upper()] + buffer: BytesIO = BytesIO() img_format: str = img.format if img.format else 'PNG' img.save(buffer, format=img_format) @@ -169,5 +121,5 @@ def upload_image(img: Image.Image, mime_type: str) -> str: img.format = 'JPEG' # Execute - result = upload_image(img, 'image/jpeg') + result = upload_image(img) print(result) diff --git a/tests/evaluation/image_upload_test.py b/tests/evaluation/image_upload_test.py index 866d3b3..eefe471 100644 --- a/tests/evaluation/image_upload_test.py +++ b/tests/evaluation/image_upload_test.py @@ -7,14 +7,12 @@ # Import the module to test from lf_toolkit.evaluation.image_upload import ( generate_file_name, - validate_mime_type, get_s3_bucket_uri, upload_image, ImageUploadError, InvalidMimeTypeError, MissingEnvironmentVariableError, MIME_TO_FORMAT, - FORMAT_TO_EXTENSION ) @@ -65,89 +63,6 @@ def test_generate_file_name_unique(self): assert filename1 != filename2 -class TestValidateMimeType: - """Test suite for validate_mime_type function""" - - def test_valid_jpeg_mime_type(self): - """Test validation with valid JPEG MIME type""" - img = Mock(spec=Image.Image) - img.format = 'JPEG' - - result = validate_mime_type('image/jpeg', img, 'test.jpg') - assert result is True - - def test_valid_png_mime_type(self): - """Test validation with valid PNG MIME type""" - img = Mock(spec=Image.Image) - img.format = 'PNG' - - result = validate_mime_type('image/png', img, 'test.png') - assert result is True - - def test_invalid_mime_type(self): - """Test validation with unsupported MIME type""" - img = Mock(spec=Image.Image) - img.format = 'PNG' - - with pytest.raises(InvalidMimeTypeError) as exc_info: - validate_mime_type('image/invalid', img, 'test.png') - - assert "Invalid MIME type 'image/invalid'" in str(exc_info.value) - - def test_mime_type_format_mismatch(self): - """Test validation when MIME type doesn't match image format""" - img = Mock(spec=Image.Image) - img.format = 'PNG' - - with pytest.raises(InvalidMimeTypeError) as exc_info: - validate_mime_type('image/jpeg', img, 'test.png') - - assert "does not match image format 'PNG'" in str(exc_info.value) - - def test_extension_format_mismatch(self): - """Test validation when file extension doesn't match format""" - img = Mock(spec=Image.Image) - img.format = 'JPEG' - - with pytest.raises(InvalidMimeTypeError) as exc_info: - validate_mime_type('image/jpeg', img, 'test.png') - - assert "File extension '.png' does not match format 'JPEG'" in str(exc_info.value) - - def test_valid_with_no_image_format(self): - """Test validation when image has no format attribute""" - img = Mock(spec=Image.Image) - img.format = None - - # Should not raise when format is None - result = validate_mime_type('image/png', img, 'test.png') - assert result is True - - def test_valid_webp_mime_type(self): - """Test validation with valid WEBP MIME type""" - img = Mock(spec=Image.Image) - img.format = 'WEBP' - - result = validate_mime_type('image/webp', img, 'test.webp') - assert result is True - - def test_jpeg_with_jpg_extension(self): - """Test JPEG image with .jpg extension""" - img = Mock(spec=Image.Image) - img.format = 'JPEG' - - result = validate_mime_type('image/jpeg', img, 'photo.jpg') - assert result is True - - def test_jpeg_with_jpeg_extension(self): - """Test JPEG image with .jpeg extension""" - img = Mock(spec=Image.Image) - img.format = 'JPEG' - - result = validate_mime_type('image/jpeg', img, 'photo.jpeg') - assert result is True - - class TestGetS3BucketUri: """Test suite for get_s3_bucket_uri function""" @@ -201,7 +116,7 @@ def test_successful_upload(self, mock_uuid, mock_getenv, mock_put): img.format = 'JPEG' # Execute - result = upload_image(img, 'image/jpeg') + result = upload_image(img) # Verify response assert result == 'https://s3.amazonaws.com/my-bucket/12345678-1234-5678-1234-567812345678.jpeg' @@ -223,7 +138,7 @@ def test_upload_with_png_image(self, mock_uuid, mock_getenv, mock_put): img = Image.new('RGBA', (50, 50), color=(0, 255, 0, 128)) img.format = 'PNG' - result = upload_image(img, 'image/png') + result = upload_image(img) assert result == 'https://storage.example.com/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee.png' @@ -236,18 +151,7 @@ def test_upload_missing_s3_uri(self, mock_getenv): img.format = 'JPEG' with pytest.raises(MissingEnvironmentVariableError): - upload_image(img, 'image/jpeg') - - @patch('lf_toolkit.evaluation.image_upload.os.getenv') - def test_upload_invalid_mime_type(self, mock_getenv): - """Test upload fails with invalid MIME type""" - mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' - - img = Image.new('RGB', (100, 100)) - img.format = 'JPEG' - - with pytest.raises(InvalidMimeTypeError): - upload_image(img, 'image/invalid') + upload_image(img) @patch('lf_toolkit.evaluation.image_upload.requests.put') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @@ -266,7 +170,7 @@ def test_upload_server_error(self, mock_uuid, mock_getenv, mock_put): img.format = 'JPEG' with pytest.raises(ImageUploadError) as exc_info: - upload_image(img, 'image/jpeg') + upload_image(img) assert "Upload failed with status code 500" in str(exc_info.value) @@ -284,7 +188,7 @@ def test_upload_network_error(self, mock_uuid, mock_getenv, mock_put): img.format = 'JPEG' with pytest.raises(ImageUploadError) as exc_info: - upload_image(img, 'image/jpeg') + upload_image(img) assert "Network error" in str(exc_info.value) @@ -302,24 +206,10 @@ def test_upload_timeout_error(self, mock_uuid, mock_getenv, mock_put): img.format = 'JPEG' with pytest.raises(ImageUploadError) as exc_info: - upload_image(img, 'image/jpeg') + upload_image(img) assert "Network error" in str(exc_info.value) - @patch('lf_toolkit.evaluation.image_upload.requests.put') - @patch('lf_toolkit.evaluation.image_upload.os.getenv') - @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') - def test_upload_mime_type_mismatch(self, mock_uuid, mock_getenv, mock_put): - """Test upload fails when MIME type doesn't match image format""" - mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') - mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' - - img = Image.new('RGB', (100, 100)) - img.format = 'PNG' - - with pytest.raises(InvalidMimeTypeError): - upload_image(img, 'image/jpeg') - @patch('lf_toolkit.evaluation.image_upload.requests.put') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') @@ -335,7 +225,7 @@ def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_put): img = Image.new('RGB', (100, 100)) img.format = None - result = upload_image(img, 'image/png') + result = upload_image(img) assert result == 'https://s3.amazonaws.com/bucket/12345678-1234-5678-1234-567812345678.png' @@ -354,7 +244,7 @@ def test_upload_verifies_correct_file_uploaded(self, mock_uuid, mock_getenv, moc img = Image.new('RGB', (100, 100), color='blue') img.format = 'JPEG' - upload_image(img, 'image/jpeg') + upload_image(img) class TestExceptionHierarchy: @@ -385,28 +275,5 @@ def test_invalid_mime_type_error_caught_as_image_upload_error(self): raise InvalidMimeTypeError("Invalid MIME") -class TestConstants: - """Test suite for module constants""" - - def test_mime_to_format_has_expected_types(self): - """Test that MIME_TO_FORMAT contains expected image types""" - assert 'image/jpeg' in MIME_TO_FORMAT - assert 'image/png' in MIME_TO_FORMAT - assert 'image/gif' in MIME_TO_FORMAT - assert 'image/webp' in MIME_TO_FORMAT - - def test_format_to_extension_has_expected_formats(self): - """Test that FORMAT_TO_EXTENSION contains expected formats""" - assert 'JPEG' in FORMAT_TO_EXTENSION - assert 'PNG' in FORMAT_TO_EXTENSION - assert 'GIF' in FORMAT_TO_EXTENSION - assert 'WEBP' in FORMAT_TO_EXTENSION - - def test_jpeg_has_multiple_extensions(self): - """Test that JPEG format has multiple valid extensions""" - assert '.jpg' in FORMAT_TO_EXTENSION['JPEG'] - assert '.jpeg' in FORMAT_TO_EXTENSION['JPEG'] - - if __name__ == '__main__': pytest.main([__file__, '-v']) From fe5df2d0aa710b0fffa658585fc1cf08aefa1b67 Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Wed, 28 Jan 2026 17:49:59 +0000 Subject: [PATCH 05/22] Implemented auth for uploading to S3 --- lf_toolkit/evaluation/image_upload.py | 60 ++++++++++++++-- poetry.lock | 99 ++++++++++++++++++++++++++- pyproject.toml | 1 + 3 files changed, 155 insertions(+), 5 deletions(-) diff --git a/lf_toolkit/evaluation/image_upload.py b/lf_toolkit/evaluation/image_upload.py index 0642a19..9f86c53 100644 --- a/lf_toolkit/evaluation/image_upload.py +++ b/lf_toolkit/evaluation/image_upload.py @@ -1,3 +1,5 @@ +import hashlib + import requests import uuid import os @@ -6,6 +8,10 @@ from PIL import Image from dotenv import load_dotenv +from botocore.auth import SigV4Auth +from botocore.awsrequest import AWSRequest +from botocore.credentials import Credentials + load_dotenv() MIME_TO_FORMAT: Dict[str, List[str]] = { @@ -63,6 +69,49 @@ def get_s3_bucket_uri() -> str: return s3_uri +def get_aws_signed_request(full_url, buffer, mime_type): + credentials = Credentials( + access_key=os.environ['AWS_ACCESS_KEY_ID'], + secret_key=os.environ['AWS_SECRET_ACCESS_KEY'], + ) + + if hasattr(buffer, 'read'): + # It's a file-like object (BytesIO, etc.) + current_pos = buffer.tell() # Save current position + buffer.seek(0) # Go to start + data = buffer.read() # Read all data + buffer.seek(current_pos) # Restore position + else: + # It's already bytes + data = buffer + + # Calculate content hash and length + content_hash = hashlib.sha256(data).hexdigest() + content_length = len(data) + + # Create the request for signing with required headers + headers = { + 'Content-Type': mime_type, + 'Content-Length': str(content_length), + 'x-amz-content-sha256': content_hash + } + + # Create the request for signing + aws_request = AWSRequest( + method='PUT', + url=full_url, + data=buffer, + headers=headers + ) + + region = os.environ.get('AWS_REGION', 'eu-west-2') + + # Sign the request + SigV4Auth(credentials, 's3', region).add_auth(aws_request) + + return aws_request + + def upload_image(img: Image.Image) -> str: """Upload PIL image with comprehensive MIME type validation @@ -95,10 +144,13 @@ def upload_image(img: Image.Image) -> str: img.save(buffer, format=img_format) buffer.seek(0) - response: requests.Response = requests.put( - full_url, - data=buffer, - headers={'Content-Type': mime_type}, + aws_request = get_aws_signed_request(full_url, buffer, mime_type).prepare() + + response: requests.Response = requests.request( + method=aws_request.method, + url=aws_request.url, + data=aws_request.body, + headers=aws_request.headers, timeout=30 ) diff --git a/poetry.lock b/poetry.lock index fa953b9..58fff1f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -129,6 +129,46 @@ d = ["aiohttp (>=3.7.4) ; sys_platform != \"win32\" or implementation_name != \" jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] +[[package]] +name = "boto3" +version = "1.42.36" +description = "The AWS SDK for Python" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "boto3-1.42.36-py3-none-any.whl", hash = "sha256:e0ff6f2747bfdec63405b35ea185a7aea35239c3f4fe99e4d29368a6de9c4a84"}, + {file = "boto3-1.42.36.tar.gz", hash = "sha256:a4eb51105c8c5d7b2bc2a9e2316e69baf69a55611275b9f189c0cf59f1aae171"}, +] + +[package.dependencies] +botocore = ">=1.42.36,<1.43.0" +jmespath = ">=0.7.1,<2.0.0" +s3transfer = ">=0.16.0,<0.17.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "botocore" +version = "1.42.36" +description = "Low-level, data-driven core of boto 3." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "botocore-1.42.36-py3-none-any.whl", hash = "sha256:2cfae4c482e5e87bd835ab4289b711490c161ba57e852c06b65a03e7c25e08eb"}, + {file = "botocore-1.42.36.tar.gz", hash = "sha256:2ebd89cc75927944e2cee51b7adce749f38e0cb269a758a6464a27f8bcca65fb"}, +] + +[package.dependencies] +jmespath = ">=0.7.1,<2.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""} + +[package.extras] +crt = ["awscrt (==0.29.2)"] + [[package]] name = "build" version = "1.3.0" @@ -1023,6 +1063,18 @@ files = [ test = ["async-timeout ; python_version < \"3.11\"", "pytest", "pytest-asyncio (>=0.17)", "pytest-trio", "testpath", "trio"] trio = ["trio"] +[[package]] +name = "jmespath" +version = "1.1.0" +description = "JSON Matching Expressions" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64"}, + {file = "jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d"}, +] + [[package]] name = "jsonrpcserver" version = "5.0.9" @@ -1826,6 +1878,21 @@ pytest = ">=4.6" [package.extras] testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main"] +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[package.dependencies] +six = ">=1.5" + [[package]] name = "python-dotenv" version = "1.2.1" @@ -2266,6 +2333,24 @@ files = [ {file = "rpds_py-0.27.1.tar.gz", hash = "sha256:26a1c73171d10b7acccbded82bf6a586ab8203601e565badc74bbbf8bc5a10f8"}, ] +[[package]] +name = "s3transfer" +version = "0.16.0" +description = "An Amazon S3 Transfer Manager" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe"}, + {file = "s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920"}, +] + +[package.dependencies] +botocore = ">=1.37.4,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"] + [[package]] name = "secretstorage" version = "3.4.0" @@ -2295,6 +2380,18 @@ files = [ {file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"}, ] +[[package]] +name = "six" +version = "1.17.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main"] +files = [ + {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, + {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, +] + [[package]] name = "sniffio" version = "1.3.1" @@ -2751,4 +2848,4 @@ parsing = ["antlr4-python3-runtime", "lark", "latex2sympy"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "7fe52b482228044b36e97dde750e982e13c7837c6c7f9d6fd45433aeae8c18ea" +content-hash = "9dc3f7e12199191cf41834205dbb2705b1e1e4b2dd851b1bb57e312d3c4e8a8b" diff --git a/pyproject.toml b/pyproject.toml index e7f311a..e06cf72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ pytest-asyncio = "^1.2.0" pillow = "^12.1.0" requests = "^2.32.5" dotenv = "^0.9.9" +boto3 = "^1.42.36" [tool.poetry.group.dev.dependencies] black = "24.8.0" From e01140997e94e2fa9bf7f9fa82fd38bbe806046d Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Wed, 28 Jan 2026 18:10:44 +0000 Subject: [PATCH 06/22] Added session token --- lf_toolkit/evaluation/image_upload.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lf_toolkit/evaluation/image_upload.py b/lf_toolkit/evaluation/image_upload.py index 9f86c53..14e3975 100644 --- a/lf_toolkit/evaluation/image_upload.py +++ b/lf_toolkit/evaluation/image_upload.py @@ -73,6 +73,7 @@ def get_aws_signed_request(full_url, buffer, mime_type): credentials = Credentials( access_key=os.environ['AWS_ACCESS_KEY_ID'], secret_key=os.environ['AWS_SECRET_ACCESS_KEY'], + token=os.environ.get('AWS_SESSION_TOKEN', None) ) if hasattr(buffer, 'read'): From b375d16f0d55dc092fc30b036415a247e2459eea Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Thu, 29 Jan 2026 09:48:26 +0000 Subject: [PATCH 07/22] Added passing of folder name --- lf_toolkit/evaluation/image_upload.py | 7 +- tests/evaluation/image_upload_test.py | 157 +++++++++++++++++--------- 2 files changed, 110 insertions(+), 54 deletions(-) diff --git a/lf_toolkit/evaluation/image_upload.py b/lf_toolkit/evaluation/image_upload.py index 14e3975..802bab2 100644 --- a/lf_toolkit/evaluation/image_upload.py +++ b/lf_toolkit/evaluation/image_upload.py @@ -113,10 +113,11 @@ def get_aws_signed_request(full_url, buffer, mime_type): return aws_request -def upload_image(img: Image.Image) -> str: +def upload_image(img: Image.Image, folder_name: str) -> str: """Upload PIL image with comprehensive MIME type validation Args: + folder_name: name of folder to save image img: PIL Image object to upload Returns: @@ -133,7 +134,7 @@ def upload_image(img: Image.Image) -> str: filename: str = generate_file_name(img) - full_url = base_url + filename + full_url = os.path.join(base_url, folder_name, filename) if img.format is None: img.format = 'PNG' @@ -174,5 +175,5 @@ def upload_image(img: Image.Image) -> str: img.format = 'JPEG' # Execute - result = upload_image(img) + result = upload_image(img, "eduvision") print(result) diff --git a/tests/evaluation/image_upload_test.py b/tests/evaluation/image_upload_test.py index eefe471..b4a0125 100644 --- a/tests/evaluation/image_upload_test.py +++ b/tests/evaluation/image_upload_test.py @@ -12,7 +12,6 @@ ImageUploadError, InvalidMimeTypeError, MissingEnvironmentVariableError, - MIME_TO_FORMAT, ) @@ -98,49 +97,74 @@ def test_get_s3_bucket_uri_empty_string(self): class TestUploadImage: """Test suite for upload_image function""" - @patch('lf_toolkit.evaluation.image_upload.requests.put') + @patch('lf_toolkit.evaluation.image_upload.requests.request') + @patch('lf_toolkit.evaluation.image_upload.get_aws_signed_request') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') - def test_successful_upload(self, mock_uuid, mock_getenv, mock_put): + def test_successful_upload(self, mock_uuid, mock_getenv, mock_get_aws_signed_request, mock_request): """Test successful image upload with UUID-based filename""" # Setup mocks mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') - mock_getenv.return_value = 'https://s3.amazonaws.com/my-bucket/' + mock_getenv.return_value = 'https://s3.amazonaws.com/eduvision' + + # Mock the AWS signed request + mock_prepared_request = Mock() + mock_prepared_request.method = 'PUT' + mock_prepared_request.url = 'https://s3.amazonaws.com/eduvision/eduvision/12345678-1234-5678-1234-567812345678.jpeg' + mock_prepared_request.body = b'mock_body' + mock_prepared_request.headers = {'Content-Type': 'image/jpeg'} + + mock_aws_request = Mock() + mock_aws_request.prepare.return_value = mock_prepared_request + mock_get_aws_signed_request.return_value = mock_aws_request mock_response = Mock() mock_response.status_code = 200 - mock_put.return_value = mock_response + mock_request.return_value = mock_response # Create a real PIL image for testing img = Image.new('RGB', (100, 100), color='red') img.format = 'JPEG' # Execute - result = upload_image(img) + result = upload_image(img, "eduvision") # Verify response - assert result == 'https://s3.amazonaws.com/my-bucket/12345678-1234-5678-1234-567812345678.jpeg' - assert mock_put.called - assert mock_put.call_args[1]['timeout'] == 30 + assert result == 'https://s3.amazonaws.com/eduvision/eduvision/12345678-1234-5678-1234-567812345678.jpeg' + assert mock_request.called + assert mock_request.call_args[1]['timeout'] == 30 - @patch('lf_toolkit.evaluation.image_upload.requests.put') + @patch('lf_toolkit.evaluation.image_upload.requests.request') + @patch('lf_toolkit.evaluation.image_upload.get_aws_signed_request') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') - def test_upload_with_png_image(self, mock_uuid, mock_getenv, mock_put): + def test_upload_with_png(self, mock_uuid, mock_getenv, mock_get_aws_signed_request, mock_request): """Test uploading PNG image with UUID-based filename""" - mock_uuid.return_value = uuid.UUID('aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee') - mock_getenv.return_value = 'https://storage.example.com/' + # Setup mocks + mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') + mock_getenv.return_value = 'https://s3.amazonaws.com/eduvision' + + # Mock the AWS signed request + mock_prepared_request = Mock() + mock_prepared_request.method = 'PUT' + mock_prepared_request.url = 'https://s3.amazonaws.com/eduvision/eduvision/12345678-1234-5678-1234-567812345678.png' + mock_prepared_request.body = b'mock_body' + mock_prepared_request.headers = {'Content-Type': 'image/jpeg'} + + mock_aws_request = Mock() + mock_aws_request.prepare.return_value = mock_prepared_request + mock_get_aws_signed_request.return_value = mock_aws_request mock_response = Mock() mock_response.status_code = 200 - mock_put.return_value = mock_response + mock_request.return_value = mock_response img = Image.new('RGBA', (50, 50), color=(0, 255, 0, 128)) img.format = 'PNG' - result = upload_image(img) + result = upload_image(img, "eduvision") - assert result == 'https://storage.example.com/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee.png' + assert result == 'https://s3.amazonaws.com/eduvision/eduvision/12345678-1234-5678-1234-567812345678.png' @patch('lf_toolkit.evaluation.image_upload.os.getenv') def test_upload_missing_s3_uri(self, mock_getenv): @@ -151,100 +175,131 @@ def test_upload_missing_s3_uri(self, mock_getenv): img.format = 'JPEG' with pytest.raises(MissingEnvironmentVariableError): - upload_image(img) + upload_image(img, "eduvision") - @patch('lf_toolkit.evaluation.image_upload.requests.put') + @patch('lf_toolkit.evaluation.image_upload.requests.request') + @patch('lf_toolkit.evaluation.image_upload.get_aws_signed_request') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') - def test_upload_server_error(self, mock_uuid, mock_getenv, mock_put): + def test_upload_server_error(self, mock_uuid, mock_getenv, mock_get_aws_signed_request, mock_request): """Test upload fails when server returns error""" mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' + # Mock the AWS signed request + mock_prepared_request = Mock() + mock_prepared_request.method = 'PUT' + mock_prepared_request.url = 'https://s3.amazonaws.com/bucket/eduvision/12345678-1234-5678-1234-567812345678.jpeg' + mock_prepared_request.body = b'mock_body' + mock_prepared_request.headers = {'Content-Type': 'image/jpeg'} + + mock_aws_request = Mock() + mock_aws_request.prepare.return_value = mock_prepared_request + mock_get_aws_signed_request.return_value = mock_aws_request + mock_response = Mock() mock_response.status_code = 500 mock_response.text = 'Internal Server Error' - mock_put.return_value = mock_response + mock_request.return_value = mock_response img = Image.new('RGB', (100, 100)) img.format = 'JPEG' with pytest.raises(ImageUploadError) as exc_info: - upload_image(img) + upload_image(img, "eduvision") assert "Upload failed with status code 500" in str(exc_info.value) - @patch('lf_toolkit.evaluation.image_upload.requests.put') + @patch('lf_toolkit.evaluation.image_upload.requests.request') + @patch('lf_toolkit.evaluation.image_upload.get_aws_signed_request') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') - def test_upload_network_error(self, mock_uuid, mock_getenv, mock_put): + def test_upload_network_error(self, mock_uuid, mock_getenv, mock_get_aws_signed_request, mock_request): """Test upload fails on network error""" mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' - mock_put.side_effect = requests.exceptions.ConnectionError('Connection failed') + # Mock the AWS signed request + mock_prepared_request = Mock() + mock_prepared_request.method = 'PUT' + mock_prepared_request.url = 'https://s3.amazonaws.com/bucket/eduvision/12345678-1234-5678-1234-567812345678.jpeg' + mock_prepared_request.body = b'mock_body' + mock_prepared_request.headers = {'Content-Type': 'image/jpeg'} + + mock_aws_request = Mock() + mock_aws_request.prepare.return_value = mock_prepared_request + mock_get_aws_signed_request.return_value = mock_aws_request + + mock_request.side_effect = requests.exceptions.ConnectionError('Connection failed') img = Image.new('RGB', (100, 100)) img.format = 'JPEG' with pytest.raises(ImageUploadError) as exc_info: - upload_image(img) + upload_image(img, "eduvision") assert "Network error" in str(exc_info.value) - @patch('lf_toolkit.evaluation.image_upload.requests.put') + @patch('lf_toolkit.evaluation.image_upload.requests.request') + @patch('lf_toolkit.evaluation.image_upload.get_aws_signed_request') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') - def test_upload_timeout_error(self, mock_uuid, mock_getenv, mock_put): + def test_upload_timeout_error(self, mock_uuid, mock_getenv, mock_get_aws_signed_request, mock_request): """Test upload fails on timeout""" mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') mock_getenv.return_value = 'https://s3.amazonaws.com/bucket' - mock_put.side_effect = requests.exceptions.Timeout('Request timed out') + # Mock the AWS signed request + mock_prepared_request = Mock() + mock_prepared_request.method = 'PUT' + mock_prepared_request.url = 'https://s3.amazonaws.com/bucket/eduvision/12345678-1234-5678-1234-567812345678.jpeg' + mock_prepared_request.body = b'mock_body' + mock_prepared_request.headers = {'Content-Type': 'image/jpeg'} + + mock_aws_request = Mock() + mock_aws_request.prepare.return_value = mock_prepared_request + mock_get_aws_signed_request.return_value = mock_aws_request + + mock_request.side_effect = requests.exceptions.Timeout('Request timed out') img = Image.new('RGB', (100, 100)) img.format = 'JPEG' with pytest.raises(ImageUploadError) as exc_info: - upload_image(img) + upload_image(img, "eduvision") assert "Network error" in str(exc_info.value) - @patch('lf_toolkit.evaluation.image_upload.requests.put') + @patch('lf_toolkit.evaluation.image_upload.requests.request') + @patch('lf_toolkit.evaluation.image_upload.get_aws_signed_request') @patch('lf_toolkit.evaluation.image_upload.os.getenv') @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') - def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_put): + def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_get_aws_signed_request, mock_request): """Test upload with image that has no format (defaults to PNG) uses UUID filename""" mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') mock_getenv.return_value = 'https://s3.amazonaws.com/bucket/' + # Mock the AWS signed request + mock_prepared_request = Mock() + mock_prepared_request.method = 'PUT' + mock_prepared_request.url = 'https://s3.amazonaws.com/bucket/eduvision/12345678-1234-5678-1234-567812345678.png' + mock_prepared_request.body = b'mock_body' + mock_prepared_request.headers = {'Content-Type': 'image/png'} + + mock_aws_request = Mock() + mock_aws_request.prepare.return_value = mock_prepared_request + mock_get_aws_signed_request.return_value = mock_aws_request + mock_response = Mock() mock_response.status_code = 200 - mock_put.return_value = mock_response + mock_request.return_value = mock_response img = Image.new('RGB', (100, 100)) img.format = None - result = upload_image(img) - - assert result == 'https://s3.amazonaws.com/bucket/12345678-1234-5678-1234-567812345678.png' - - @patch('lf_toolkit.evaluation.image_upload.requests.put') - @patch('lf_toolkit.evaluation.image_upload.os.getenv') - @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') - def test_upload_verifies_correct_file_uploaded(self, mock_uuid, mock_getenv, mock_put): - """Test that the correct file data is sent in upload request""" - mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678') - mock_getenv.return_value = 'https://s3.amazonaws.com/bucket/' - - mock_response = Mock() - mock_response.status_code = 200 - mock_put.return_value = mock_response - - img = Image.new('RGB', (100, 100), color='blue') - img.format = 'JPEG' + result = upload_image(img, "eduvision") - upload_image(img) + assert result == 'https://s3.amazonaws.com/bucket/eduvision/12345678-1234-5678-1234-567812345678.png' class TestExceptionHierarchy: From fe90a965910c95ece76891a05b436509daef8d97 Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Wed, 11 Mar 2026 16:22:36 +0000 Subject: [PATCH 08/22] Added test suite for stream_io --- poetry.lock | 24 +++- pyproject.toml | 1 + tests/io/stream_io_test.py | 238 +++++++++++++++++++++++++++++++++++++ 3 files changed, 259 insertions(+), 4 deletions(-) create mode 100644 tests/io/stream_io_test.py diff --git a/poetry.lock b/poetry.lock index 58fff1f..3fcf25e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -32,7 +32,7 @@ version = "4.6.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" optional = false python-versions = ">=3.9" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "anyio-4.6.0-py3-none-any.whl", hash = "sha256:c7d2e9d63e31599eeb636c8c5c03a7e108d73b345f064f1c19fdc87b79036a9a"}, {file = "anyio-4.6.0.tar.gz", hash = "sha256:137b4559cbb034c477165047febb6ff83f390fc3b20bf181c1fc0a728cb8beeb"}, @@ -912,7 +912,7 @@ version = "3.10" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.6" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, @@ -1839,6 +1839,22 @@ pluggy = ">=1.5,<2" [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "pytest-anyio" +version = "0.0.0" +description = "The pytest anyio plugin is built into anyio. You don't need this package." +optional = false +python-versions = "*" +groups = ["dev"] +files = [ + {file = "pytest-anyio-0.0.0.tar.gz", hash = "sha256:b41234e9e9ad7ea1dbfefcc1d6891b23d5ef7c9f07ccf804c13a9cc338571fd3"}, + {file = "pytest_anyio-0.0.0-py2.py3-none-any.whl", hash = "sha256:dc8b5c4741cb16ff90be37fddd585ca943ed12bbeb563de7ace6cd94441d8746"}, +] + +[package.dependencies] +anyio = "*" +pytest = "*" + [[package]] name = "pytest-asyncio" version = "1.2.0" @@ -2398,7 +2414,7 @@ version = "1.3.1" description = "Sniff out which async library your code is running under" optional = false python-versions = ">=3.7" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, @@ -2848,4 +2864,4 @@ parsing = ["antlr4-python3-runtime", "lark", "latex2sympy"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "9dc3f7e12199191cf41834205dbb2705b1e1e4b2dd851b1bb57e312d3c4e8a8b" +content-hash = "828a10ad95eed705e623f10d27ef6d21568caf98e05636c91cca9246c34b7b58" diff --git a/pyproject.toml b/pyproject.toml index e06cf72..f1c6066 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,7 @@ pytest-cov = "5.0.0" ########################## # extras ########################## +pytest-anyio = "^0.0.0" [tool.poetry.extras] parsing = ["antlr4-python3-runtime", "lark", "latex2sympy"] diff --git a/tests/io/stream_io_test.py b/tests/io/stream_io_test.py new file mode 100644 index 0000000..d31e8e7 --- /dev/null +++ b/tests/io/stream_io_test.py @@ -0,0 +1,238 @@ +import pytest +import anyio + +from lf_toolkit.io.stream_io import StreamIO, PrefixStreamIO, StreamServer + + +@pytest.fixture +def anyio_backend(): + return "asyncio" + + + +def make_framed_message(payload: str) -> bytes: + """Wrap a JSON string in Content-Length framing.""" + body = payload.encode("utf-8") + header = f"Content-Length: {len(body)}\r\n\r\n".encode("utf-8") + return header + body + + +class FakeStreamIO(StreamIO): + """ + Simulates a bidirectional byte stream. + Feed messages via feed(), read responses via responses. + """ + + def __init__(self): + self._buffer = b"" + self.responses = [] + self.close_count = 0 + + def feed(self, data: bytes): + self._buffer += data + + async def read(self, size: int) -> bytes: + if not self._buffer: + raise anyio.EndOfStream() + chunk = self._buffer[:size] + self._buffer = self._buffer[size:] + return chunk + + async def write(self, data: bytes): + self.responses.append(data) + + async def close(self): + self.close_count += 1 + + +class EchoServer(StreamServer): + """ + Concrete StreamServer for testing. + - run() is required by BaseServer (abstract) but not used in tests + since we call _handle_client directly. + - dispatch() is overridden to echo the raw request back, bypassing + the real JsonRpcHandler so tests stay self-contained. + """ + + async def run(self): + pass + + async def dispatch(self, data: str) -> str: + return data + + +class BuggyStreamServer(StreamServer): + """ + Reproduces the original bug by overriding _handle_client with + close() inside the finally block. + """ + + async def run(self): + pass + + async def dispatch(self, data: str) -> str: + return data + + async def _handle_client(self, client: StreamIO): + io = self.wrap_io(client) + while True: + try: + data = await io.read(4096) + if not data: + break + response = await self.dispatch(data.decode("utf-8")) + await io.write(response.encode("utf-8")) + except anyio.EndOfStream: + break + except anyio.ClosedResourceError: + break + except Exception as e: + print(f"Exception: {e}") + finally: + await client.close() # BUG: closes after every message + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +class TestStreamServer: + + @pytest.fixture + def stream(self): + return FakeStreamIO() + + @pytest.fixture + def server(self): + return EchoServer() + + @pytest.fixture + def buggy_server(self): + return BuggyStreamServer() + + @pytest.mark.anyio + async def test_handles_multiple_messages(self, stream, server): + """ + Core fix test: the server must process multiple messages in a single + session without closing the connection between them. + """ + stream.feed(make_framed_message('{"command": "eval", "id": 1}')) + stream.feed(make_framed_message('{"command": "eval", "id": 2}')) + stream.feed(make_framed_message('{"command": "eval", "id": 3}')) + + await server._handle_client(stream) + + assert len(stream.responses) == 3, ( + f"Expected 3 responses but got {len(stream.responses)}. " + "Server likely closed the connection after the first message." + ) + + @pytest.mark.anyio + async def test_closes_only_once(self, stream, server): + """ + The client connection should be closed exactly once — after the loop + exits — not once per message. + """ + stream.feed(make_framed_message('{"id": 1}')) + stream.feed(make_framed_message('{"id": 2}')) + + await server._handle_client(stream) + + assert stream.close_count == 1, ( + f"Expected close() to be called once, but it was called " + f"{stream.close_count} times. This is the original bug." + ) + + @pytest.mark.anyio + async def test_buggy_server_closes_after_each_message(self, stream, buggy_server): + """ + Demonstrates the original bug: close() in the finally block causes + the stream to be closed after every message, not just at the end. + """ + stream.feed(make_framed_message('{"id": 1}')) + stream.feed(make_framed_message('{"id": 2}')) + + await buggy_server._handle_client(stream) + + assert stream.close_count > 1, ( + "Expected buggy server to call close() more than once, " + "confirming the bug exists in the original code." + ) + + @pytest.mark.anyio + async def test_single_message(self, stream, server): + """A single message round-trip should work correctly.""" + payload = '{"command": "eval", "response": "test"}' + stream.feed(make_framed_message(payload)) + + await server._handle_client(stream) + + assert len(stream.responses) == 1 + assert payload.encode() in stream.responses[0] + + @pytest.mark.anyio + async def test_closes_on_empty_stream(self, stream, server): + """Server should exit cleanly when the stream ends with no data.""" + await server._handle_client(stream) + + assert stream.close_count == 1 + + @pytest.mark.anyio + async def test_response_content(self, stream, server): + """Verify the actual response content is correct across messages.""" + messages = [ + '{"id": 1, "command": "eval"}', + '{"id": 2, "command": "preview"}', + ] + + for msg in messages: + stream.feed(make_framed_message(msg)) + + await server._handle_client(stream) + + assert len(stream.responses) == 2 + for i, msg in enumerate(messages): + assert msg.encode() in stream.responses[i] + + +class TestPrefixStreamIO: + + @pytest.fixture + def stream(self): + return FakeStreamIO() + + @pytest.mark.anyio + async def test_framing_round_trip(self, stream): + """PrefixStreamIO correctly encodes and decodes Content-Length framing.""" + prefix_io = PrefixStreamIO(stream) + + payload = b'{"command": "eval"}' + header = f"Content-Length: {len(payload)}\r\n\r\n".encode() + stream.feed(header + payload) + + result = await prefix_io.read(4096) + assert result == payload + + @pytest.mark.anyio + async def test_write_includes_content_length_header(self, stream): + """PrefixStreamIO write includes correct Content-Length header.""" + prefix_io = PrefixStreamIO(stream) + + payload = b'{"result": "ok"}' + await prefix_io.write(payload) + + assert len(stream.responses) == 1 + written = stream.responses[0] + assert b"Content-Length:" in written + assert f"{len(payload)}".encode() in written + assert payload in written + + @pytest.mark.anyio + async def test_raises_on_missing_content_length(self, stream): + """PrefixStreamIO should raise if Content-Length header is absent.""" + prefix_io = PrefixStreamIO(stream) + + stream.feed(b"X-Custom-Header: something\r\n\r\n") + + with pytest.raises(ValueError, match="Content-Length"): + await prefix_io.read(4096) From d0a14446fa301f2b8929fbe0c5621a18a36b032b Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Wed, 11 Mar 2026 16:31:45 +0000 Subject: [PATCH 09/22] Added test suite for stream_io --- tests/io/stream_io_test.py | 103 ++++++++----------------------------- 1 file changed, 21 insertions(+), 82 deletions(-) diff --git a/tests/io/stream_io_test.py b/tests/io/stream_io_test.py index d31e8e7..667c779 100644 --- a/tests/io/stream_io_test.py +++ b/tests/io/stream_io_test.py @@ -2,6 +2,7 @@ import anyio from lf_toolkit.io.stream_io import StreamIO, PrefixStreamIO, StreamServer +from lf_toolkit.io.stdio_server import StdioServer @pytest.fixture @@ -9,6 +10,9 @@ def anyio_backend(): return "asyncio" +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- def make_framed_message(payload: str) -> bytes: """Wrap a JSON string in Content-Length framing.""" @@ -45,58 +49,11 @@ async def close(self): self.close_count += 1 -class EchoServer(StreamServer): - """ - Concrete StreamServer for testing. - - run() is required by BaseServer (abstract) but not used in tests - since we call _handle_client directly. - - dispatch() is overridden to echo the raw request back, bypassing - the real JsonRpcHandler so tests stay self-contained. - """ - - async def run(self): - pass - - async def dispatch(self, data: str) -> str: - return data - - -class BuggyStreamServer(StreamServer): - """ - Reproduces the original bug by overriding _handle_client with - close() inside the finally block. - """ - - async def run(self): - pass - - async def dispatch(self, data: str) -> str: - return data - - async def _handle_client(self, client: StreamIO): - io = self.wrap_io(client) - while True: - try: - data = await io.read(4096) - if not data: - break - response = await self.dispatch(data.decode("utf-8")) - await io.write(response.encode("utf-8")) - except anyio.EndOfStream: - break - except anyio.ClosedResourceError: - break - except Exception as e: - print(f"Exception: {e}") - finally: - await client.close() # BUG: closes after every message - - # --------------------------------------------------------------------------- # Tests # --------------------------------------------------------------------------- -class TestStreamServer: +class TestStdioServer: @pytest.fixture def stream(self): @@ -104,11 +61,7 @@ def stream(self): @pytest.fixture def server(self): - return EchoServer() - - @pytest.fixture - def buggy_server(self): - return BuggyStreamServer() + return StdioServer() @pytest.mark.anyio async def test_handles_multiple_messages(self, stream, server): @@ -116,9 +69,9 @@ async def test_handles_multiple_messages(self, stream, server): Core fix test: the server must process multiple messages in a single session without closing the connection between them. """ - stream.feed(make_framed_message('{"command": "eval", "id": 1}')) - stream.feed(make_framed_message('{"command": "eval", "id": 2}')) - stream.feed(make_framed_message('{"command": "eval", "id": 3}')) + stream.feed(make_framed_message('{"jsonrpc":"2.0","method":"eval","params":{},"id":1}')) + stream.feed(make_framed_message('{"jsonrpc":"2.0","method":"eval","params":{},"id":2}')) + stream.feed(make_framed_message('{"jsonrpc":"2.0","method":"eval","params":{},"id":3}')) await server._handle_client(stream) @@ -133,8 +86,8 @@ async def test_closes_only_once(self, stream, server): The client connection should be closed exactly once — after the loop exits — not once per message. """ - stream.feed(make_framed_message('{"id": 1}')) - stream.feed(make_framed_message('{"id": 2}')) + stream.feed(make_framed_message('{"jsonrpc":"2.0","method":"eval","params":{},"id":1}')) + stream.feed(make_framed_message('{"jsonrpc":"2.0","method":"eval","params":{},"id":2}')) await server._handle_client(stream) @@ -143,32 +96,17 @@ async def test_closes_only_once(self, stream, server): f"{stream.close_count} times. This is the original bug." ) - @pytest.mark.anyio - async def test_buggy_server_closes_after_each_message(self, stream, buggy_server): - """ - Demonstrates the original bug: close() in the finally block causes - the stream to be closed after every message, not just at the end. - """ - stream.feed(make_framed_message('{"id": 1}')) - stream.feed(make_framed_message('{"id": 2}')) - - await buggy_server._handle_client(stream) - - assert stream.close_count > 1, ( - "Expected buggy server to call close() more than once, " - "confirming the bug exists in the original code." - ) - @pytest.mark.anyio async def test_single_message(self, stream, server): """A single message round-trip should work correctly.""" - payload = '{"command": "eval", "response": "test"}' - stream.feed(make_framed_message(payload)) + stream.feed(make_framed_message('{"jsonrpc":"2.0","method":"eval","params":{},"id":1}')) await server._handle_client(stream) assert len(stream.responses) == 1 - assert payload.encode() in stream.responses[0] + # Response is a framed JSON-RPC envelope + assert b"Content-Length:" in stream.responses[0] + assert b"jsonrpc" in stream.responses[0] @pytest.mark.anyio async def test_closes_on_empty_stream(self, stream, server): @@ -179,10 +117,10 @@ async def test_closes_on_empty_stream(self, stream, server): @pytest.mark.anyio async def test_response_content(self, stream, server): - """Verify the actual response content is correct across messages.""" + """Verify a response is returned for each message sent.""" messages = [ - '{"id": 1, "command": "eval"}', - '{"id": 2, "command": "preview"}', + '{"jsonrpc":"2.0","method":"eval","params":{},"id":1}', + '{"jsonrpc":"2.0","method":"preview","params":{},"id":2}', ] for msg in messages: @@ -191,8 +129,9 @@ async def test_response_content(self, stream, server): await server._handle_client(stream) assert len(stream.responses) == 2 - for i, msg in enumerate(messages): - assert msg.encode() in stream.responses[i] + for response in stream.responses: + assert b"Content-Length:" in response + assert b"jsonrpc" in response class TestPrefixStreamIO: From 30809680464bf4091a10488804a24fe0cf0c37a8 Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Wed, 11 Mar 2026 16:32:46 +0000 Subject: [PATCH 10/22] Changed how Stdio closes --- lf_toolkit/io/stream_io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lf_toolkit/io/stream_io.py b/lf_toolkit/io/stream_io.py index 34c835f..265ee9a 100644 --- a/lf_toolkit/io/stream_io.py +++ b/lf_toolkit/io/stream_io.py @@ -91,6 +91,7 @@ async def _handle_client(self, client: StreamIO): break response = await self.dispatch(data.decode("utf-8")) + print(f"Responding: {response}") # and this await io.write(response.encode("utf-8")) except anyio.EndOfStream: @@ -101,5 +102,4 @@ async def _handle_client(self, client: StreamIO): break except Exception as e: print(f"Exception: {e}") - finally: - await client.close() + await client.close() From 286ec2b52110224f8ab8c64737ad34810b31d1ed Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Wed, 11 Mar 2026 16:50:58 +0000 Subject: [PATCH 11/22] Changed exception to break instead of printing --- lf_toolkit/io/stream_io.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lf_toolkit/io/stream_io.py b/lf_toolkit/io/stream_io.py index 265ee9a..fffe91d 100644 --- a/lf_toolkit/io/stream_io.py +++ b/lf_toolkit/io/stream_io.py @@ -101,5 +101,6 @@ async def _handle_client(self, client: StreamIO): # print("Client disconnected") break except Exception as e: - print(f"Exception: {e}") + # print(f"Exception: {e}") + break await client.close() From 313a56d3989bf97d51b6f88c16270a93d008e10c Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Wed, 11 Mar 2026 16:50:58 +0000 Subject: [PATCH 12/22] Added debub statements --- lf_toolkit/io/stream_io.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lf_toolkit/io/stream_io.py b/lf_toolkit/io/stream_io.py index 265ee9a..fffe91d 100644 --- a/lf_toolkit/io/stream_io.py +++ b/lf_toolkit/io/stream_io.py @@ -101,5 +101,6 @@ async def _handle_client(self, client: StreamIO): # print("Client disconnected") break except Exception as e: - print(f"Exception: {e}") + # print(f"Exception: {e}") + break await client.close() From 3056704b9575a4191d40a98d72aa5792613e2b34 Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Wed, 11 Mar 2026 17:12:40 +0000 Subject: [PATCH 13/22] Added debug statements --- lf_toolkit/io/stream_io.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lf_toolkit/io/stream_io.py b/lf_toolkit/io/stream_io.py index fffe91d..b2ddd3e 100644 --- a/lf_toolkit/io/stream_io.py +++ b/lf_toolkit/io/stream_io.py @@ -84,23 +84,27 @@ async def _handle_client(self, client: StreamIO): while True: try: + import sys + print("waiting for data...", file=sys.stderr, flush=True) data = await io.read(4096) + print(f"got data: {data[:80]}", file=sys.stderr, flush=True) if not data: - # print("Received empty data") break + print("dispatching...", file=sys.stderr, flush=True) response = await self.dispatch(data.decode("utf-8")) - print(f"Responding: {response}") # and this + print(f"got response: {str(response)[:80]}", file=sys.stderr, flush=True) await io.write(response.encode("utf-8")) + print("wrote response", file=sys.stderr, flush=True) except anyio.EndOfStream: - # print("Client disconnected") break except anyio.ClosedResourceError: - # print("Client disconnected") break except Exception as e: - # print(f"Exception: {e}") + import traceback + traceback.print_exc(file=sys.stderr) break + await client.close() From abdb46b94bf2ccbfdb8041d310bd7b0d30c8705f Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Wed, 11 Mar 2026 17:20:50 +0000 Subject: [PATCH 14/22] Moved Stdioclient instationtion to run --- lf_toolkit/io/stdio_server.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lf_toolkit/io/stdio_server.py b/lf_toolkit/io/stdio_server.py index cbffea2..b47ca77 100644 --- a/lf_toolkit/io/stdio_server.py +++ b/lf_toolkit/io/stdio_server.py @@ -37,10 +37,11 @@ class StdioServer(StreamServer): def __init__(self, handler: Optional[Handler] = None): super().__init__(handler) - self._client = StdioClient() + def wrap_io(self, client: StreamIO) -> StreamIO: return PrefixStreamIO(client) async def run(self): + self._client = StdioClient() await self._handle_client(self._client) From a8ec7290a02df1d7ec0501d2f7b20a82f3519ee2 Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Wed, 11 Mar 2026 17:57:49 +0000 Subject: [PATCH 15/22] Improved `PrefixStreamIO.read()` to handle partial chunk reads and added tests for large payloads and partial chunks. --- lf_toolkit/io/stream_io.py | 9 +++++---- tests/io/stream_io_test.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/lf_toolkit/io/stream_io.py b/lf_toolkit/io/stream_io.py index b2ddd3e..223465b 100644 --- a/lf_toolkit/io/stream_io.py +++ b/lf_toolkit/io/stream_io.py @@ -63,10 +63,11 @@ async def read(self, size: int) -> bytes: if content_length == 0: raise ValueError("Content-Length header not found or is zero") - if content_length > size: - raise ValueError("Content-Length is larger than the read size") - - return await self.base.read(content_length) + data = b"" + while len(data) < content_length: + chunk = await self.base.read(content_length - len(data)) + data += chunk + return data async def write(self, data: bytes): response_headers_str = f"Content-Length: {len(data)}\r\n\r\n" diff --git a/tests/io/stream_io_test.py b/tests/io/stream_io_test.py index 667c779..e5be56a 100644 --- a/tests/io/stream_io_test.py +++ b/tests/io/stream_io_test.py @@ -175,3 +175,31 @@ async def test_raises_on_missing_content_length(self, stream): with pytest.raises(ValueError, match="Content-Length"): await prefix_io.read(4096) + + @pytest.mark.anyio + async def test_large_payload_does_not_raise(self, stream): + """Payloads larger than 4096 bytes must be read without raising.""" + prefix_io = PrefixStreamIO(stream) + + payload = b"x" * 8192 + header = f"Content-Length: {len(payload)}\r\n\r\n".encode() + stream.feed(header + payload) + + result = await prefix_io.read(4096) + assert result == payload + + @pytest.mark.anyio + async def test_exact_read_of_partial_chunks(self, stream): + """All bytes are read even when the underlying stream delivers chunks smaller than content_length.""" + prefix_io = PrefixStreamIO(stream) + + payload = b"a" * 100 + header = f"Content-Length: {len(payload)}\r\n\r\n".encode() + # Feed header and payload as separate tiny chunks (10 bytes each) + full = header + payload + for i in range(0, len(full), 10): + stream.feed(full[i:i + 10]) + + result = await prefix_io.read(4096) + assert result == payload + assert len(result) == 100 From 7f76f1ba8fb32df78f6acd8a3c81dbde7e47f402 Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Thu, 12 Mar 2026 12:52:42 +0000 Subject: [PATCH 16/22] Updated `AWSRequest` to use `data` instead of `buffer` in image upload logic --- lf_toolkit/evaluation/image_upload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lf_toolkit/evaluation/image_upload.py b/lf_toolkit/evaluation/image_upload.py index dff9233..d0d69f4 100644 --- a/lf_toolkit/evaluation/image_upload.py +++ b/lf_toolkit/evaluation/image_upload.py @@ -101,7 +101,7 @@ def get_aws_signed_request(full_url, buffer, mime_type): aws_request = AWSRequest( method='PUT', url=full_url, - data=buffer, + data=data, headers=headers ) From 7181b62225aca760b41c42b820a7b09fc8ceba9d Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Thu, 12 Mar 2026 13:30:55 +0000 Subject: [PATCH 17/22] Refactored `jsonrpc_handler` to use `Command` type and updated parameter handling --- lf_toolkit/io/rpc_handler.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lf_toolkit/io/rpc_handler.py b/lf_toolkit/io/rpc_handler.py index fe3fadb..354ae95 100644 --- a/lf_toolkit/io/rpc_handler.py +++ b/lf_toolkit/io/rpc_handler.py @@ -4,6 +4,7 @@ from jsonrpcserver import Success from jsonrpcserver import async_dispatch +from ..shared import Command from .handler import Handler @@ -23,10 +24,10 @@ async def dispatch(self, req: str) -> str: ) -def jsonrpc_handler(handler: Handler, name: str): +def jsonrpc_handler(handler: Handler, name: Command): async def wrapped(req: dict): try: - result = await handler.handle(name, req) + result = await handler.handle(name, {"params": req}) return Success(result) except Exception as e: return Error(0, str(e), e) From 47badae3312b54857f2f30a7170955ee3de61437 Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Thu, 12 Mar 2026 15:57:49 +0000 Subject: [PATCH 18/22] Updated `StdioServer` to use binary streams for stdin/stdout and added subprocess-based test for framed JSON-RPC communication --- lf_toolkit/io/stdio_server.py | 5 +++-- tests/io/stream_io_test.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/lf_toolkit/io/stdio_server.py b/lf_toolkit/io/stdio_server.py index b47ca77..c6d481d 100644 --- a/lf_toolkit/io/stdio_server.py +++ b/lf_toolkit/io/stdio_server.py @@ -16,8 +16,8 @@ class StdioClient(StreamIO): def __init__(self): self.stream = StapledByteStream( - FileWriteStream(sys.stdout), - FileReadStream(sys.stdin), + FileWriteStream(sys.stdout.buffer), + FileReadStream(sys.stdin.buffer), ) async def read(self, size: int) -> bytes: @@ -43,5 +43,6 @@ def wrap_io(self, client: StreamIO) -> StreamIO: return PrefixStreamIO(client) async def run(self): + print("StdioServer started", file=sys.stderr, flush=True) self._client = StdioClient() await self._handle_client(self._client) diff --git a/tests/io/stream_io_test.py b/tests/io/stream_io_test.py index e5be56a..09bb277 100644 --- a/tests/io/stream_io_test.py +++ b/tests/io/stream_io_test.py @@ -1,3 +1,6 @@ +import subprocess +import sys + import pytest import anyio @@ -203,3 +206,32 @@ async def test_exact_read_of_partial_chunks(self, stream): result = await prefix_io.read(4096) assert result == payload assert len(result) == 100 + + +class TestStdioServerSubprocess: + + def test_binary_pipe_roundtrip(self): + """ + Spawn the StdioServer as a subprocess and pipe a framed JSON-RPC + request to its stdin (as raw bytes). Confirms sys.stdin.buffer / + sys.stdout.buffer is used — text-mode streams would break this. + """ + msg = b'{"jsonrpc":"2.0","id":1,"method":"eval","params":{}}' + frame = f"Content-Length: {len(msg)}\r\n\r\n".encode() + msg + + proc = subprocess.Popen( + [sys.executable, "-c", + "import anyio; from lf_toolkit.io.stdio_server import StdioServer; " + "anyio.run(StdioServer().run)"], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + stdout, stderr = proc.communicate(input=frame, timeout=5) + + # Must receive a framed response + assert b"Content-Length:" in stdout, ( + f"No framed response received.\nstderr: {stderr.decode()}" + ) + assert b"jsonrpc" in stdout From e240d70bb1131cedb9b5ee51e2a1244faadf667e Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Fri, 13 Mar 2026 08:23:06 +0000 Subject: [PATCH 19/22] Fixed `StdioServer.write` to flush stdout buffer in a separate thread using `anyio.to_thread.run_sync`. --- lf_toolkit/io/stdio_server.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lf_toolkit/io/stdio_server.py b/lf_toolkit/io/stdio_server.py index c6d481d..d35f232 100644 --- a/lf_toolkit/io/stdio_server.py +++ b/lf_toolkit/io/stdio_server.py @@ -2,6 +2,7 @@ from typing import Optional +import anyio from anyio.streams.file import FileReadStream from anyio.streams.file import FileWriteStream from anyio.streams.stapled import StapledByteStream @@ -25,7 +26,7 @@ async def read(self, size: int) -> bytes: async def write(self, data: bytes): await self.stream.send(data) - await self.stream.flush() + await anyio.to_thread.run_sync(sys.stdout.buffer.flush) async def close(self): await self.stream.aclose() From f90147c1621c432547aaa3c5fce1616d0bbfe61e Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Fri, 13 Mar 2026 10:14:18 +0000 Subject: [PATCH 20/22] Refactored `StdioServer` and `StdioClient` to pass and use captured `stdout` buffer, ensuring isolated binary protocol handling on `stdout`. --- lf_toolkit/io/stdio_server.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/lf_toolkit/io/stdio_server.py b/lf_toolkit/io/stdio_server.py index d35f232..7400c17 100644 --- a/lf_toolkit/io/stdio_server.py +++ b/lf_toolkit/io/stdio_server.py @@ -1,5 +1,6 @@ import sys +from typing import BinaryIO from typing import Optional import anyio @@ -15,9 +16,10 @@ class StdioClient(StreamIO): - def __init__(self): + def __init__(self, stdout_buffer: BinaryIO): + self._stdout_buffer = stdout_buffer self.stream = StapledByteStream( - FileWriteStream(sys.stdout.buffer), + FileWriteStream(stdout_buffer), FileReadStream(sys.stdin.buffer), ) @@ -26,7 +28,7 @@ async def read(self, size: int) -> bytes: async def write(self, data: bytes): await self.stream.send(data) - await anyio.to_thread.run_sync(sys.stdout.buffer.flush) + await anyio.to_thread.run_sync(self._stdout_buffer.flush) async def close(self): await self.stream.aclose() @@ -35,15 +37,20 @@ async def close(self): class StdioServer(StreamServer): _client: StdioClient + _stdout_buffer: BinaryIO def __init__(self, handler: Optional[Handler] = None): super().__init__(handler) - + # Capture the real stdout buffer before redirecting sys.stdout. + # Any print() in user code after this point goes to stderr, + # keeping the binary Content-Length-framed protocol on fd 1 clean. + self._stdout_buffer = sys.stdout.buffer + sys.stdout = sys.stderr def wrap_io(self, client: StreamIO) -> StreamIO: return PrefixStreamIO(client) async def run(self): print("StdioServer started", file=sys.stderr, flush=True) - self._client = StdioClient() - await self._handle_client(self._client) + self._client = StdioClient(self._stdout_buffer) + await self._handle_client(self._client) \ No newline at end of file From 1c71198bbe61b9c108cdadc560847eda820de05d Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Fri, 13 Mar 2026 10:14:25 +0000 Subject: [PATCH 21/22] Renamed `file_server.py` to `file_server_test.py` for clarity and consistency in test file naming. --- tests/io/{file_server.py => file_server_test.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/io/{file_server.py => file_server_test.py} (100%) diff --git a/tests/io/file_server.py b/tests/io/file_server_test.py similarity index 100% rename from tests/io/file_server.py rename to tests/io/file_server_test.py From 140cf493bd3ab32aa379953fd4ab940d45cb1ecd Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Fri, 13 Mar 2026 13:12:53 +0000 Subject: [PATCH 22/22] Removed explicit `stdout_buffer` handling in `StdioServer` and `StdioClient`, directly using `sys.stdout.buffer` for simplicity and consistency. --- lf_toolkit/io/stdio_server.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/lf_toolkit/io/stdio_server.py b/lf_toolkit/io/stdio_server.py index 7400c17..3d6d567 100644 --- a/lf_toolkit/io/stdio_server.py +++ b/lf_toolkit/io/stdio_server.py @@ -1,6 +1,5 @@ import sys -from typing import BinaryIO from typing import Optional import anyio @@ -16,10 +15,10 @@ class StdioClient(StreamIO): - def __init__(self, stdout_buffer: BinaryIO): - self._stdout_buffer = stdout_buffer + def __init__(self): + self._stdout_buffer = sys.stdout.buffer self.stream = StapledByteStream( - FileWriteStream(stdout_buffer), + FileWriteStream(self._stdout_buffer), FileReadStream(sys.stdin.buffer), ) @@ -37,20 +36,14 @@ async def close(self): class StdioServer(StreamServer): _client: StdioClient - _stdout_buffer: BinaryIO def __init__(self, handler: Optional[Handler] = None): super().__init__(handler) - # Capture the real stdout buffer before redirecting sys.stdout. - # Any print() in user code after this point goes to stderr, - # keeping the binary Content-Length-framed protocol on fd 1 clean. - self._stdout_buffer = sys.stdout.buffer - sys.stdout = sys.stderr def wrap_io(self, client: StreamIO) -> StreamIO: return PrefixStreamIO(client) async def run(self): print("StdioServer started", file=sys.stderr, flush=True) - self._client = StdioClient(self._stdout_buffer) - await self._handle_client(self._client) \ No newline at end of file + self._client = StdioClient() + await self._handle_client(self._client)