From b76b20e6f898335f3ead6edeee44c3bcf893ad65 Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Tue, 27 Jan 2026 13:03:06 +0000
Subject: [PATCH 01/22] Started on image upload support

---
 lf_toolkit/evaluation/image_upload.py | 160 +++++++++
 poetry.lock                           | 140 +++++++-
 pyproject.toml                        |   3 +
 tests/evaluation/image_upload_test.py | 482 ++++++++++++++++++++++++++
 4 files changed, 784 insertions(+), 1 deletion(-)
 create mode 100644 lf_toolkit/evaluation/image_upload.py
 create mode 100644 tests/evaluation/image_upload_test.py

diff --git a/lf_toolkit/evaluation/image_upload.py b/lf_toolkit/evaluation/image_upload.py
new file mode 100644
index 0000000..70abd17
--- /dev/null
+++ b/lf_toolkit/evaluation/image_upload.py
@@ -0,0 +1,160 @@
+import requests
+import uuid
+import os
+from io import BytesIO
+from typing import Dict, List, Optional
+from PIL import Image
+from dotenv import load_dotenv
+
+load_dotenv()
+
+MIME_TO_FORMAT: Dict[str, List[str]] = {
+    'image/jpeg': ['JPEG', 'JPG'],
+    'image/png': ['PNG'],
+    'image/gif': ['GIF'],
+    'image/bmp': ['BMP'],
+    'image/webp': ['WEBP'],
+    'image/tiff': ['TIFF', 'TIF'],
+    'image/x-icon': ['ICO'],
+}
+
+FORMAT_TO_EXTENSION: Dict[str, List[str]] = {
+    'JPEG': ['.jpg', '.jpeg', '.jpe'],
+    'PNG': ['.png'],
+    'GIF': ['.gif'],
+    'BMP': ['.bmp'],
+    'WEBP': ['.webp'],
+    'TIFF': ['.tiff', '.tif'],
+    'ICO': ['.ico'],
+}
+
+
+class ImageUploadError(Exception):
+    """Custom exception for image upload failures"""
+    pass
+
+
+class InvalidMimeTypeError(ImageUploadError):
+    """Exception for invalid MIME type"""
+    pass
+
+
+class MissingEnvironmentVariableError(ImageUploadError):
+    """Exception for missing environment variables"""
+    pass
+
+
+def generate_file_name(img: Image.Image) -> str:
+    """Generate filename for the image
+
+    Args:
+        img: PIL Image object
+
+    Returns:
+        Generated filename string
+    """
+    unique_id: str = str(uuid.uuid4())
+    format_ext: str = img.format.lower() if img.format else 'png'
+    return f"{unique_id}.{format_ext}"
+
+
+def validate_mime_type(mime_type: str, img: Image.Image, filename: str) -> bool:
+    """Validate MIME type against image format and filename
+
+    Args:
+        mime_type: MIME type string to validate
+        img: PIL Image object
+        filename: Name of the file
+
+    Returns:
+        True if validation passes
+
+    Raises:
+        InvalidMimeTypeError: If MIME type is invalid or doesn't match image
+    """
+    if mime_type not in MIME_TO_FORMAT:
+        raise InvalidMimeTypeError(
+            f"Invalid MIME type '{mime_type}'. "
+            f"Supported types: {', '.join(MIME_TO_FORMAT.keys())}"
+        )
+
+    img_format: Optional[str] = img.format.upper() if img.format else None
+
+    if img_format:
+        allowed_formats: List[str] = MIME_TO_FORMAT[mime_type]
+        if img_format not in allowed_formats:
+            raise InvalidMimeTypeError(
+                f"MIME type '{mime_type}' does not match image format '{img_format}'. "
+                f"Expected formats for {mime_type}: {', '.join(allowed_formats)}"
+            )
+
+    file_ext: str = filename[filename.rfind('.'):].lower()
+
+    if img_format and img_format in FORMAT_TO_EXTENSION:
+        valid_extensions: List[str] = FORMAT_TO_EXTENSION[img_format]
+        if file_ext not in valid_extensions:
+            raise InvalidMimeTypeError(
+                f"File extension '{file_ext}' does not match format '{img_format}'. "
+                f"Expected extensions: {', '.join(valid_extensions)}"
+            )
+
+    return True
+
+
+def get_s3_bucket_uri() -> str:
+    """Get S3 bucket URI from environment variable"""
+    s3_uri: Optional[str] = os.getenv('S3_BUCKET_URI')
+
+    if not s3_uri:
+        raise MissingEnvironmentVariableError(
+            "S3_BUCKET_URI environment variable is not set"
+        )
+
+    return s3_uri
+
+
+def upload_image(img: Image.Image, mime_type: str) -> Dict:
+    """Upload PIL image with comprehensive MIME type validation
+
+    Args:
+        img: PIL Image object to upload
+        mime_type: MIME type for the upload
+
+    Returns:
+        JSON response from the server as a dictionary
+
+    Raises:
+        InvalidMimeTypeError: If MIME type validation fails
+        MissingEnvironmentVariableError: If S3_BUCKET_URI is not set
+        ImageUploadError: If upload fails for any reason
+    """
+    try:
+        # Get URL from environment variable
+        url: str = get_s3_bucket_uri()
+
+        filename: str = generate_file_name(img)
+
+        validate_mime_type(mime_type, img, filename)
+
+        buffer: BytesIO = BytesIO()
+        img_format: str = img.format if img.format else 'PNG'
+        img.save(buffer, format=img_format)
+        buffer.seek(0)
+
+        files: Dict[str, tuple] = {'file': (filename, buffer, mime_type)}
+        response: requests.Response = requests.post(url, files=files, timeout=30)
+
+        if response.status_code != 200:
+            raise ImageUploadError(
+                f"Upload failed with status code {response.status_code}: {response.text}"
+            )
+
+        return response.json()['url']
+
+    except (InvalidMimeTypeError, MissingEnvironmentVariableError):
+        raise
+    except requests.exceptions.RequestException as e:
+        raise ImageUploadError(f"Network error: {str(e)}")
+    except Exception as e:
+        raise ImageUploadError(f"Unexpected error: {str(e)}")
+
diff --git a/poetry.lock b/poetry.lock
index 84e4f85..fa953b9 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -646,6 +646,20 @@ files = [
     {file = "distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d"},
 ]
 
+[[package]]
+name = "dotenv"
+version = "0.9.9"
+description = "Deprecated package"
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+    {file = "dotenv-0.9.9-py2.py3-none-any.whl", hash = "sha256:29cf74a087b31dafdb5a446b6d7e11cbce8ed2741540e2339c69fbef92c94ce9"},
+]
+
+[package.dependencies]
+python-dotenv = "*"
+
 [[package]]
 name = "dulwich"
 version = "0.24.1"
@@ -1328,6 +1342,115 @@ all = ["pbs-installer[download,install]"]
 download = ["httpx (>=0.27.0,<1)"]
 install = ["zstandard (>=0.21.0)"]
 
+[[package]]
+name = "pillow"
+version = "12.1.0"
+description = "Python Imaging Library (fork)"
+optional = false
+python-versions = ">=3.10"
+groups = ["main"]
+files = [
+    {file = "pillow-12.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:fb125d860738a09d363a88daa0f59c4533529a90e564785e20fe875b200b6dbd"},
+    {file = "pillow-12.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cad302dc10fac357d3467a74a9561c90609768a6f73a1923b0fd851b6486f8b0"},
+    {file = "pillow-12.1.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a40905599d8079e09f25027423aed94f2823adaf2868940de991e53a449e14a8"},
+    {file = "pillow-12.1.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:92a7fe4225365c5e3a8e598982269c6d6698d3e783b3b1ae979e7819f9cd55c1"},
+    {file = "pillow-12.1.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f10c98f49227ed8383d28174ee95155a675c4ed7f85e2e573b04414f7e371bda"},
+    {file = "pillow-12.1.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8637e29d13f478bc4f153d8daa9ffb16455f0a6cb287da1b432fdad2bfbd66c7"},
+    {file = "pillow-12.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:21e686a21078b0f9cb8c8a961d99e6a4ddb88e0fc5ea6e130172ddddc2e5221a"},
+    {file = "pillow-12.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2415373395a831f53933c23ce051021e79c8cd7979822d8cc478547a3f4da8ef"},
+    {file = "pillow-12.1.0-cp310-cp310-win32.whl", hash = "sha256:e75d3dba8fc1ddfec0cd752108f93b83b4f8d6ab40e524a95d35f016b9683b09"},
+    {file = "pillow-12.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:64efdf00c09e31efd754448a383ea241f55a994fd079866b92d2bbff598aad91"},
+    {file = "pillow-12.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:f188028b5af6b8fb2e9a76ac0f841a575bd1bd396e46ef0840d9b88a48fdbcea"},
+    {file = "pillow-12.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:a83e0850cb8f5ac975291ebfc4170ba481f41a28065277f7f735c202cd8e0af3"},
+    {file = "pillow-12.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b6e53e82ec2db0717eabb276aa56cf4e500c9a7cec2c2e189b55c24f65a3e8c0"},
+    {file = "pillow-12.1.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:40a8e3b9e8773876d6e30daed22f016509e3987bab61b3b7fe309d7019a87451"},
+    {file = "pillow-12.1.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:800429ac32c9b72909c671aaf17ecd13110f823ddb7db4dfef412a5587c2c24e"},
+    {file = "pillow-12.1.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b022eaaf709541b391ee069f0022ee5b36c709df71986e3f7be312e46f42c84"},
+    {file = "pillow-12.1.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f345e7bc9d7f368887c712aa5054558bad44d2a301ddf9248599f4161abc7c0"},
+    {file = "pillow-12.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d70347c8a5b7ccd803ec0c85c8709f036e6348f1e6a5bf048ecd9c64d3550b8b"},
+    {file = "pillow-12.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1fcc52d86ce7a34fd17cb04e87cfdb164648a3662a6f20565910a99653d66c18"},
+    {file = "pillow-12.1.0-cp311-cp311-win32.whl", hash = "sha256:3ffaa2f0659e2f740473bcf03c702c39a8d4b2b7ffc629052028764324842c64"},
+    {file = "pillow-12.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:806f3987ffe10e867bab0ddad45df1148a2b98221798457fa097ad85d6e8bc75"},
+    {file = "pillow-12.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:9f5fefaca968e700ad1a4a9de98bf0869a94e397fe3524c4c9450c1445252304"},
+    {file = "pillow-12.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a332ac4ccb84b6dde65dbace8431f3af08874bf9770719d32a635c4ef411b18b"},
+    {file = "pillow-12.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:907bfa8a9cb790748a9aa4513e37c88c59660da3bcfffbd24a7d9e6abf224551"},
+    {file = "pillow-12.1.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:efdc140e7b63b8f739d09a99033aa430accce485ff78e6d311973a67b6bf3208"},
+    {file = "pillow-12.1.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bef9768cab184e7ae6e559c032e95ba8d07b3023c289f79a2bd36e8bf85605a5"},
+    {file = "pillow-12.1.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:742aea052cf5ab5034a53c3846165bc3ce88d7c38e954120db0ab867ca242661"},
+    {file = "pillow-12.1.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a6dfc2af5b082b635af6e08e0d1f9f1c4e04d17d4e2ca0ef96131e85eda6eb17"},
+    {file = "pillow-12.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:609e89d9f90b581c8d16358c9087df76024cf058fa693dd3e1e1620823f39670"},
+    {file = "pillow-12.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:43b4899cfd091a9693a1278c4982f3e50f7fb7cff5153b05174b4afc9593b616"},
+    {file = "pillow-12.1.0-cp312-cp312-win32.whl", hash = "sha256:aa0c9cc0b82b14766a99fbe6084409972266e82f459821cd26997a488a7261a7"},
+    {file = "pillow-12.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:d70534cea9e7966169ad29a903b99fc507e932069a881d0965a1a84bb57f6c6d"},
+    {file = "pillow-12.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:65b80c1ee7e14a87d6a068dd3b0aea268ffcabfe0498d38661b00c5b4b22e74c"},
+    {file = "pillow-12.1.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:7b5dd7cbae20285cdb597b10eb5a2c13aa9de6cde9bb64a3c1317427b1db1ae1"},
+    {file = "pillow-12.1.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:29a4cef9cb672363926f0470afc516dbf7305a14d8c54f7abbb5c199cd8f8179"},
+    {file = "pillow-12.1.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:681088909d7e8fa9e31b9799aaa59ba5234c58e5e4f1951b4c4d1082a2e980e0"},
+    {file = "pillow-12.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:983976c2ab753166dc66d36af6e8ec15bb511e4a25856e2227e5f7e00a160587"},
+    {file = "pillow-12.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:db44d5c160a90df2d24a24760bbd37607d53da0b34fb546c4c232af7192298ac"},
+    {file = "pillow-12.1.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6b7a9d1db5dad90e2991645874f708e87d9a3c370c243c2d7684d28f7e133e6b"},
+    {file = "pillow-12.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6258f3260986990ba2fa8a874f8b6e808cf5abb51a94015ca3dc3c68aa4f30ea"},
+    {file = "pillow-12.1.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e115c15e3bc727b1ca3e641a909f77f8ca72a64fff150f666fcc85e57701c26c"},
+    {file = "pillow-12.1.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6741e6f3074a35e47c77b23a4e4f2d90db3ed905cb1c5e6e0d49bff2045632bc"},
+    {file = "pillow-12.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:935b9d1aed48fcfb3f838caac506f38e29621b44ccc4f8a64d575cb1b2a88644"},
+    {file = "pillow-12.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5fee4c04aad8932da9f8f710af2c1a15a83582cfb884152a9caa79d4efcdbf9c"},
+    {file = "pillow-12.1.0-cp313-cp313-win32.whl", hash = "sha256:a786bf667724d84aa29b5db1c61b7bfdde380202aaca12c3461afd6b71743171"},
+    {file = "pillow-12.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:461f9dfdafa394c59cd6d818bdfdbab4028b83b02caadaff0ffd433faf4c9a7a"},
+    {file = "pillow-12.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:9212d6b86917a2300669511ed094a9406888362e085f2431a7da985a6b124f45"},
+    {file = "pillow-12.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:00162e9ca6d22b7c3ee8e61faa3c3253cd19b6a37f126cad04f2f88b306f557d"},
+    {file = "pillow-12.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7d6daa89a00b58c37cb1747ec9fb7ac3bc5ffd5949f5888657dfddde6d1312e0"},
+    {file = "pillow-12.1.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e2479c7f02f9d505682dc47df8c0ea1fc5e264c4d1629a5d63fe3e2334b89554"},
+    {file = "pillow-12.1.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f188d580bd870cda1e15183790d1cc2fa78f666e76077d103edf048eed9c356e"},
+    {file = "pillow-12.1.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0fde7ec5538ab5095cc02df38ee99b0443ff0e1c847a045554cf5f9af1f4aa82"},
+    {file = "pillow-12.1.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ed07dca4a8464bada6139ab38f5382f83e5f111698caf3191cb8dbf27d908b4"},
+    {file = "pillow-12.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f45bd71d1fa5e5749587613037b172e0b3b23159d1c00ef2fc920da6f470e6f0"},
+    {file = "pillow-12.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:277518bf4fe74aa91489e1b20577473b19ee70fb97c374aa50830b279f25841b"},
+    {file = "pillow-12.1.0-cp313-cp313t-win32.whl", hash = "sha256:7315f9137087c4e0ee73a761b163fc9aa3b19f5f606a7fc08d83fd3e4379af65"},
+    {file = "pillow-12.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:0ddedfaa8b5f0b4ffbc2fa87b556dc59f6bb4ecb14a53b33f9189713ae8053c0"},
+    {file = "pillow-12.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:80941e6d573197a0c28f394753de529bb436b1ca990ed6e765cf42426abc39f8"},
+    {file = "pillow-12.1.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:5cb7bc1966d031aec37ddb9dcf15c2da5b2e9f7cc3ca7c54473a20a927e1eb91"},
+    {file = "pillow-12.1.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:97e9993d5ed946aba26baf9c1e8cf18adbab584b99f452ee72f7ee8acb882796"},
+    {file = "pillow-12.1.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:414b9a78e14ffeb98128863314e62c3f24b8a86081066625700b7985b3f529bd"},
+    {file = "pillow-12.1.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e6bdb408f7c9dd2a5ff2b14a3b0bb6d4deb29fb9961e6eb3ae2031ae9a5cec13"},
+    {file = "pillow-12.1.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3413c2ae377550f5487991d444428f1a8ae92784aac79caa8b1e3b89b175f77e"},
+    {file = "pillow-12.1.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e5dcbe95016e88437ecf33544ba5db21ef1b8dd6e1b434a2cb2a3d605299e643"},
+    {file = "pillow-12.1.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d0a7735df32ccbcc98b98a1ac785cc4b19b580be1bdf0aeb5c03223220ea09d5"},
+    {file = "pillow-12.1.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0c27407a2d1b96774cbc4a7594129cc027339fd800cd081e44497722ea1179de"},
+    {file = "pillow-12.1.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15c794d74303828eaa957ff8070846d0efe8c630901a1c753fdc63850e19ecd9"},
+    {file = "pillow-12.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c990547452ee2800d8506c4150280757f88532f3de2a58e3022e9b179107862a"},
+    {file = "pillow-12.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b63e13dd27da389ed9475b3d28510f0f954bca0041e8e551b2a4eb1eab56a39a"},
+    {file = "pillow-12.1.0-cp314-cp314-win32.whl", hash = "sha256:1a949604f73eb07a8adab38c4fe50791f9919344398bdc8ac6b307f755fc7030"},
+    {file = "pillow-12.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:4f9f6a650743f0ddee5593ac9e954ba1bdbc5e150bc066586d4f26127853ab94"},
+    {file = "pillow-12.1.0-cp314-cp314-win_arm64.whl", hash = "sha256:808b99604f7873c800c4840f55ff389936ef1948e4e87645eaf3fccbc8477ac4"},
+    {file = "pillow-12.1.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:bc11908616c8a283cf7d664f77411a5ed2a02009b0097ff8abbba5e79128ccf2"},
+    {file = "pillow-12.1.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:896866d2d436563fa2a43a9d72f417874f16b5545955c54a64941e87c1376c61"},
+    {file = "pillow-12.1.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8e178e3e99d3c0ea8fc64b88447f7cac8ccf058af422a6cedc690d0eadd98c51"},
+    {file = "pillow-12.1.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:079af2fb0c599c2ec144ba2c02766d1b55498e373b3ac64687e43849fbbef5bc"},
+    {file = "pillow-12.1.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bdec5e43377761c5dbca620efb69a77f6855c5a379e32ac5b158f54c84212b14"},
+    {file = "pillow-12.1.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:565c986f4b45c020f5421a4cea13ef294dde9509a8577f29b2fc5edc7587fff8"},
+    {file = "pillow-12.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:43aca0a55ce1eefc0aefa6253661cb54571857b1a7b2964bd8a1e3ef4b729924"},
+    {file = "pillow-12.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0deedf2ea233722476b3a81e8cdfbad786f7adbed5d848469fa59fe52396e4ef"},
+    {file = "pillow-12.1.0-cp314-cp314t-win32.whl", hash = "sha256:b17fbdbe01c196e7e159aacb889e091f28e61020a8abeac07b68079b6e626988"},
+    {file = "pillow-12.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27b9baecb428899db6c0de572d6d305cfaf38ca1596b5c0542a5182e3e74e8c6"},
+    {file = "pillow-12.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f61333d817698bdcdd0f9d7793e365ac3d2a21c1f1eb02b32ad6aefb8d8ea831"},
+    {file = "pillow-12.1.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ca94b6aac0d7af2a10ba08c0f888b3d5114439b6b3ef39968378723622fed377"},
+    {file = "pillow-12.1.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:351889afef0f485b84078ea40fe33727a0492b9af3904661b0abbafee0355b72"},
+    {file = "pillow-12.1.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bb0984b30e973f7e2884362b7d23d0a348c7143ee559f38ef3eaab640144204c"},
+    {file = "pillow-12.1.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:84cabc7095dd535ca934d57e9ce2a72ffd216e435a84acb06b2277b1de2689bd"},
+    {file = "pillow-12.1.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53d8b764726d3af1a138dd353116f774e3862ec7e3794e0c8781e30db0f35dfc"},
+    {file = "pillow-12.1.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5da841d81b1a05ef940a8567da92decaa15bc4d7dedb540a8c219ad83d91808a"},
+    {file = "pillow-12.1.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:75af0b4c229ac519b155028fa1be632d812a519abba9b46b20e50c6caa184f19"},
+    {file = "pillow-12.1.0.tar.gz", hash = "sha256:5c5ae0a06e9ea030ab786b0251b32c7e4ce10e58d983c0d5c56029455180b5b9"},
+]
+
+[package.extras]
+docs = ["furo", "olefile", "sphinx (>=8.2)", "sphinx-autobuild", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"]
+fpx = ["olefile"]
+mic = ["olefile"]
+test-arrow = ["arro3-compute", "arro3-core", "nanoarrow", "pyarrow"]
+tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma (>=5)", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "trove-classifiers (>=2024.10.12)"]
+xmp = ["defusedxml"]
+
 [[package]]
 name = "pkginfo"
 version = "1.12.1.2"
@@ -1703,6 +1826,21 @@ pytest = ">=4.6"
 [package.extras]
 testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"]
 
+[[package]]
+name = "python-dotenv"
+version = "1.2.1"
+description = "Read key-value pairs from a .env file and set them as environment variables"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61"},
+    {file = "python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6"},
+]
+
+[package.extras]
+cli = ["click (>=5.0)"]
+
 [[package]]
 name = "pywin32"
 version = "306"
@@ -2613,4 +2751,4 @@ parsing = ["antlr4-python3-runtime", "lark", "latex2sympy"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.11"
-content-hash = "10f9e90114dd9d66fe62d35aabc2fee0eb962ff7b99840216a17fb1282a641f4"
+content-hash = "7fe52b482228044b36e97dde750e982e13c7837c6c7f9d6fd45433aeae8c18ea"
diff --git a/pyproject.toml b/pyproject.toml
index 2ae3f68..e7f311a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,6 +43,9 @@ pywin32 = { version = "^306", platform = "win32", optional = true }
 ##########################
 poetry-plugin-export = "^1.9.0"
 pytest-asyncio = "^1.2.0"
+pillow = "^12.1.0"
+requests = "^2.32.5"
+dotenv = "^0.9.9"
 
 [tool.poetry.group.dev.dependencies]
 black = "24.8.0"
diff --git a/tests/evaluation/image_upload_test.py b/tests/evaluation/image_upload_test.py
new file mode 100644
index 0000000..06400f2
--- /dev/null
+++ b/tests/evaluation/image_upload_test.py
@@ -0,0 +1,482 @@
+import pytest
+import uuid
+from io import BytesIO
+from unittest.mock import Mock, patch, MagicMock
+from PIL import Image
+import requests
+
+# Import the module to test
+from lf_toolkit.evaluation.image_upload import (
+    generate_file_name,
+    validate_mime_type,
+    get_s3_bucket_uri,
+    upload_image,
+    ImageUploadError,
+    InvalidMimeTypeError,
+    MissingEnvironmentVariableError,
+    MIME_TO_FORMAT,
+    FORMAT_TO_EXTENSION
+)
+
+
+class TestGenerateFileName:
+    """Test suite for generate_file_name function"""
+
+    def test_generate_file_name_with_jpeg_format(self):
+        """Test filename generation for JPEG image"""
+        img = Mock(spec=Image.Image)
+        img.format = 'JPEG'
+
+        with patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') as mock_uuid:
+            mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
+            filename = generate_file_name(img)
+
+        assert filename == '12345678-1234-5678-1234-567812345678.jpeg'
+
+    def test_generate_file_name_with_png_format(self):
+        """Test filename generation for PNG image"""
+        img = Mock(spec=Image.Image)
+        img.format = 'PNG'
+
+        with patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') as mock_uuid:
+            mock_uuid.return_value = uuid.UUID('abcdef12-3456-7890-abcd-ef1234567890')
+            filename = generate_file_name(img)
+
+        assert filename == 'abcdef12-3456-7890-abcd-ef1234567890.png'
+
+    def test_generate_file_name_with_no_format(self):
+        """Test filename generation when image has no format (defaults to png)"""
+        img = Mock(spec=Image.Image)
+        img.format = None
+
+        with patch('lf_toolkit.evaluation.image_upload.uuid.uuid4') as mock_uuid:
+            mock_uuid.return_value = uuid.UUID('00000000-0000-0000-0000-000000000000')
+            filename = generate_file_name(img)
+
+        assert filename == '00000000-0000-0000-0000-000000000000.png'
+
+    def test_generate_file_name_unique(self):
+        """Test that generated filenames are unique"""
+        img = Mock(spec=Image.Image)
+        img.format = 'PNG'
+
+        filename1 = generate_file_name(img)
+        filename2 = generate_file_name(img)
+
+        assert filename1 != filename2
+
+
+class TestValidateMimeType:
+    """Test suite for validate_mime_type function"""
+
+    def test_valid_jpeg_mime_type(self):
+        """Test validation with valid JPEG MIME type"""
+        img = Mock(spec=Image.Image)
+        img.format = 'JPEG'
+
+        result = validate_mime_type('image/jpeg', img, 'test.jpg')
+        assert result is True
+
+    def test_valid_png_mime_type(self):
+        """Test validation with valid PNG MIME type"""
+        img = Mock(spec=Image.Image)
+        img.format = 'PNG'
+
+        result = validate_mime_type('image/png', img, 'test.png')
+        assert result is True
+
+    def test_invalid_mime_type(self):
+        """Test validation with unsupported MIME type"""
+        img = Mock(spec=Image.Image)
+        img.format = 'PNG'
+
+        with pytest.raises(InvalidMimeTypeError) as exc_info:
+            validate_mime_type('image/invalid', img, 'test.png')
+
+        assert "Invalid MIME type 'image/invalid'" in str(exc_info.value)
+
+    def test_mime_type_format_mismatch(self):
+        """Test validation when MIME type doesn't match image format"""
+        img = Mock(spec=Image.Image)
+        img.format = 'PNG'
+
+        with pytest.raises(InvalidMimeTypeError) as exc_info:
+            validate_mime_type('image/jpeg', img, 'test.png')
+
+        assert "does not match image format 'PNG'" in str(exc_info.value)
+
+    def test_extension_format_mismatch(self):
+        """Test validation when file extension doesn't match format"""
+        img = Mock(spec=Image.Image)
+        img.format = 'JPEG'
+
+        with pytest.raises(InvalidMimeTypeError) as exc_info:
+            validate_mime_type('image/jpeg', img, 'test.png')
+
+        assert "File extension '.png' does not match format 'JPEG'" in str(exc_info.value)
+
+    def test_valid_with_no_image_format(self):
+        """Test validation when image has no format attribute"""
+        img = Mock(spec=Image.Image)
+        img.format = None
+
+        # Should not raise when format is None
+        result = validate_mime_type('image/png', img, 'test.png')
+        assert result is True
+
+    def test_valid_webp_mime_type(self):
+        """Test validation with valid WEBP MIME type"""
+        img = Mock(spec=Image.Image)
+        img.format = 'WEBP'
+
+        result = validate_mime_type('image/webp', img, 'test.webp')
+        assert result is True
+
+    def test_jpeg_with_jpg_extension(self):
+        """Test JPEG image with .jpg extension"""
+        img = Mock(spec=Image.Image)
+        img.format = 'JPEG'
+
+        result = validate_mime_type('image/jpeg', img, 'photo.jpg')
+        assert result is True
+
+    def test_jpeg_with_jpeg_extension(self):
+        """Test JPEG image with .jpeg extension"""
+        img = Mock(spec=Image.Image)
+        img.format = 'JPEG'
+
+        result = validate_mime_type('image/jpeg', img, 'photo.jpeg')
+        assert result is True
+
+
+class TestGetS3BucketUri:
+    """Test suite for get_s3_bucket_uri function"""
+
+    def test_get_s3_bucket_uri_success(self):
+        """Test successful retrieval of S3 bucket URI"""
+        with patch('lf_toolkit.evaluation.image_upload.os.getenv') as mock_getenv:
+            mock_getenv.return_value = 'https://s3.amazonaws.com/my-bucket'
+
+            uri = get_s3_bucket_uri()
+
+            assert uri == 'https://s3.amazonaws.com/my-bucket'
+            mock_getenv.assert_called_once_with('S3_BUCKET_URI')
+
+    def test_get_s3_bucket_uri_missing(self):
+        """Test error when S3_BUCKET_URI is not set"""
+        with patch('lf_toolkit.evaluation.image_upload.os.getenv') as mock_getenv:
+            mock_getenv.return_value = None
+
+            with pytest.raises(MissingEnvironmentVariableError) as exc_info:
+                get_s3_bucket_uri()
+
+            assert "S3_BUCKET_URI environment variable is not set" in str(exc_info.value)
+
+    def test_get_s3_bucket_uri_empty_string(self):
+        """Test error when S3_BUCKET_URI is empty string"""
+        with patch('lf_toolkit.evaluation.image_upload.os.getenv') as mock_getenv:
+            mock_getenv.return_value = ''
+
+            with pytest.raises(MissingEnvironmentVariableError):
+                get_s3_bucket_uri()
+
+
+class TestUploadImage:
+    """Test suite for upload_image function"""
+
+    @patch('lf_toolkit.evaluation.image_upload.requests.post')
+    @patch('lf_toolkit.evaluation.image_upload.os.getenv')
+    @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
+    def test_successful_upload(self, mock_uuid, mock_getenv, mock_post):
+        """Test successful image upload with UUID-based filename"""
+        # Setup mocks
+        mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
+        mock_getenv.return_value = 'https://s3.amazonaws.com/my-bucket'
+
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {'url': f'https://s3.amazonaws.com/uploaded-image.jpg'}
+        mock_post.return_value = mock_response
+
+        # Create a real PIL image for testing
+        img = Image.new('RGB', (100, 100), color='red')
+        img.format = 'JPEG'
+
+        # Execute
+        result = upload_image(img, 'image/jpeg')
+
+        # Verify response
+        assert result == 'https://s3.amazonaws.com/uploaded-image.jpg'
+        assert mock_post.called
+        assert mock_post.call_args[1]['timeout'] == 30
+
+        # Verify UUID-based filename is used
+        call_args = mock_post.call_args
+        filename, file_obj, mime_type = call_args[1]['files']['file']
+        assert filename == '12345678-1234-5678-1234-567812345678.jpeg'
+        assert mime_type == 'image/jpeg'
+
+    @patch('lf_toolkit.evaluation.image_upload.requests.post')
+    @patch('lf_toolkit.evaluation.image_upload.os.getenv')
+    @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
+    def test_upload_with_png_image(self, mock_uuid, mock_getenv, mock_post):
+        """Test uploading PNG image with UUID-based filename"""
+        mock_uuid.return_value = uuid.UUID('aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee')
+        mock_getenv.return_value = 'https://storage.example.com'
+
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {'url': 'https://storage.example.com/image.png'}
+        mock_post.return_value = mock_response
+
+        img = Image.new('RGBA', (50, 50), color=(0, 255, 0, 128))
+        img.format = 'PNG'
+
+        result = upload_image(img, 'image/png')
+
+        assert result == 'https://storage.example.com/image.png'
+
+        # Verify UUID-based filename is used
+        call_args = mock_post.call_args
+        filename, file_obj, mime_type = call_args[1]['files']['file']
+        assert filename == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee.png'
+        assert mime_type == 'image/png'
+
+    @patch('lf_toolkit.evaluation.image_upload.os.getenv')
+    def test_upload_missing_s3_uri(self, mock_getenv):
+        """Test upload fails when S3_BUCKET_URI is missing"""
+        mock_getenv.return_value = None
+
+        img = Image.new('RGB', (100, 100))
+        img.format = 'JPEG'
+
+        with pytest.raises(MissingEnvironmentVariableError):
+            upload_image(img, 'image/jpeg')
+
+    @patch('lf_toolkit.evaluation.image_upload.os.getenv')
+    def test_upload_invalid_mime_type(self, mock_getenv):
+        """Test upload fails with invalid MIME type"""
+        mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
+
+        img = Image.new('RGB', (100, 100))
+        img.format = 'JPEG'
+
+        with pytest.raises(InvalidMimeTypeError):
+            upload_image(img, 'image/invalid')
+
+    @patch('lf_toolkit.evaluation.image_upload.requests.post')
+    @patch('lf_toolkit.evaluation.image_upload.os.getenv')
+    @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
+    def test_upload_server_error(self, mock_uuid, mock_getenv, mock_post):
+        """Test upload fails when server returns error"""
+        mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
+        mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
+
+        mock_response = Mock()
+        mock_response.status_code = 500
+        mock_response.text = 'Internal Server Error'
+        mock_post.return_value = mock_response
+
+        img = Image.new('RGB', (100, 100))
+        img.format = 'JPEG'
+
+        with pytest.raises(ImageUploadError) as exc_info:
+            upload_image(img, 'image/jpeg')
+
+        assert "Upload failed with status code 500" in str(exc_info.value)
+
+    @patch('lf_toolkit.evaluation.image_upload.requests.post')
+    @patch('lf_toolkit.evaluation.image_upload.os.getenv')
+    @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
+    def test_upload_network_error(self, mock_uuid, mock_getenv, mock_post):
+        """Test upload fails on network error"""
+        mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
+        mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
+
+        mock_post.side_effect = requests.exceptions.ConnectionError('Connection failed')
+
+        img = Image.new('RGB', (100, 100))
+        img.format = 'JPEG'
+
+        with pytest.raises(ImageUploadError) as exc_info:
+            upload_image(img, 'image/jpeg')
+
+        assert "Network error" in str(exc_info.value)
+
+    @patch('lf_toolkit.evaluation.image_upload.requests.post')
+    @patch('lf_toolkit.evaluation.image_upload.os.getenv')
+    @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
+    def test_upload_timeout_error(self, mock_uuid, mock_getenv, mock_post):
+        """Test upload fails on timeout"""
+        mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
+        mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
+
+        mock_post.side_effect = requests.exceptions.Timeout('Request timed out')
+
+        img = Image.new('RGB', (100, 100))
+        img.format = 'JPEG'
+
+        with pytest.raises(ImageUploadError) as exc_info:
+            upload_image(img, 'image/jpeg')
+
+        assert "Network error" in str(exc_info.value)
+
+    @patch('lf_toolkit.evaluation.image_upload.requests.post')
+    @patch('lf_toolkit.evaluation.image_upload.os.getenv')
+    @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
+    def test_upload_mime_type_mismatch(self, mock_uuid, mock_getenv, mock_post):
+        """Test upload fails when MIME type doesn't match image format"""
+        mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
+        mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
+
+        img = Image.new('RGB', (100, 100))
+        img.format = 'PNG'
+
+        with pytest.raises(InvalidMimeTypeError):
+            upload_image(img, 'image/jpeg')
+
+    @patch('lf_toolkit.evaluation.image_upload.requests.post')
+    @patch('lf_toolkit.evaluation.image_upload.os.getenv')
+    @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
+    def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_post):
+        """Test upload with image that has no format (defaults to PNG) uses UUID filename"""
+        mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
+        mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
+
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {'url': 'https://s3.amazonaws.com/image.png'}
+        mock_post.return_value = mock_response
+
+        img = Image.new('RGB', (100, 100))
+        img.format = None
+
+        result = upload_image(img, 'image/png')
+
+        assert result == 'https://s3.amazonaws.com/image.png'
+
+        # Verify UUID-based filename with default .png extension
+        call_args = mock_post.call_args
+        filename, file_obj, mime_type = call_args[1]['files']['file']
+        assert filename == '12345678-1234-5678-1234-567812345678.png'
+        assert mime_type == 'image/png'
+
+    @patch('lf_toolkit.evaluation.image_upload.requests.post')
+    @patch('lf_toolkit.evaluation.image_upload.os.getenv')
+    @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
+    def test_upload_uses_different_uuid_each_time(self, mock_uuid, mock_getenv, mock_post):
+        """Test that each upload generates a unique UUID-based filename"""
+        mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
+
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {'url': 'https://s3.amazonaws.com/uploaded.jpg'}
+        mock_post.return_value = mock_response
+
+        # First upload with first UUID
+        uuid1 = uuid.UUID('11111111-1111-1111-1111-111111111111')
+        mock_uuid.return_value = uuid1
+
+        img1 = Image.new('RGB', (100, 100))
+        img1.format = 'JPEG'
+        upload_image(img1, 'image/jpeg')
+
+        filename1 = mock_post.call_args[1]['files']['file'][0]
+
+        # Second upload with different UUID
+        uuid2 = uuid.UUID('22222222-2222-2222-2222-222222222222')
+        mock_uuid.return_value = uuid2
+
+        img2 = Image.new('RGB', (100, 100))
+        img2.format = 'JPEG'
+        upload_image(img2, 'image/jpeg')
+
+        filename2 = mock_post.call_args[1]['files']['file'][0]
+
+        # Verify different UUIDs result in different filenames
+        assert filename1 == '11111111-1111-1111-1111-111111111111.jpeg'
+        assert filename2 == '22222222-2222-2222-2222-222222222222.jpeg'
+        assert filename1 != filename2
+
+    @patch('lf_toolkit.evaluation.image_upload.requests.post')
+    @patch('lf_toolkit.evaluation.image_upload.os.getenv')
+    @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
+    def test_upload_verifies_correct_file_uploaded(self, mock_uuid, mock_getenv, mock_post):
+        """Test that the correct file data is sent in upload request"""
+        mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
+        mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
+
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {'url': 'https://s3.amazonaws.com/image.jpg'}
+        mock_post.return_value = mock_response
+
+        img = Image.new('RGB', (100, 100), color='blue')
+        img.format = 'JPEG'
+
+        upload_image(img, 'image/jpeg')
+
+        # Verify the post was called with correct arguments
+        call_args = mock_post.call_args
+        assert call_args[0][0] == 'https://s3.amazonaws.com/bucket'
+        assert 'files' in call_args[1]
+        assert 'file' in call_args[1]['files']
+
+        filename, file_obj, mime_type = call_args[1]['files']['file']
+        assert filename == '12345678-1234-5678-1234-567812345678.jpeg'
+        assert mime_type == 'image/jpeg'
+
+
+class TestExceptionHierarchy:
+    """Test suite for custom exception classes"""
+
+    def test_image_upload_error_is_exception(self):
+        """Test that ImageUploadError inherits from Exception"""
+        assert issubclass(ImageUploadError, Exception)
+
+    def test_invalid_mime_type_error_is_image_upload_error(self):
+        """Test that InvalidMimeTypeError inherits from ImageUploadError"""
+        assert issubclass(InvalidMimeTypeError, ImageUploadError)
+        assert issubclass(InvalidMimeTypeError, Exception)
+
+    def test_missing_environment_variable_error_is_image_upload_error(self):
+        """Test that MissingEnvironmentVariableError inherits from ImageUploadError"""
+        assert issubclass(MissingEnvironmentVariableError, ImageUploadError)
+        assert issubclass(MissingEnvironmentVariableError, Exception)
+
+    def test_can_raise_and_catch_image_upload_error(self):
+        """Test that custom exceptions can be raised and caught"""
+        with pytest.raises(ImageUploadError):
+            raise ImageUploadError("Test error")
+
+    def test_invalid_mime_type_error_caught_as_image_upload_error(self):
+        """Test that InvalidMimeTypeError can be caught as ImageUploadError"""
+        with pytest.raises(ImageUploadError):
+            raise InvalidMimeTypeError("Invalid MIME")
+
+
+class TestConstants:
+    """Test suite for module constants"""
+
+    def test_mime_to_format_has_expected_types(self):
+        """Test that MIME_TO_FORMAT contains expected image types"""
+        assert 'image/jpeg' in MIME_TO_FORMAT
+        assert 'image/png' in MIME_TO_FORMAT
+        assert 'image/gif' in MIME_TO_FORMAT
+        assert 'image/webp' in MIME_TO_FORMAT
+
+    def test_format_to_extension_has_expected_formats(self):
+        """Test that FORMAT_TO_EXTENSION contains expected formats"""
+        assert 'JPEG' in FORMAT_TO_EXTENSION
+        assert 'PNG' in FORMAT_TO_EXTENSION
+        assert 'GIF' in FORMAT_TO_EXTENSION
+        assert 'WEBP' in FORMAT_TO_EXTENSION
+
+    def test_jpeg_has_multiple_extensions(self):
+        """Test that JPEG format has multiple valid extensions"""
+        assert '.jpg' in FORMAT_TO_EXTENSION['JPEG']
+        assert '.jpeg' in FORMAT_TO_EXTENSION['JPEG']
+
+
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])

From 36e95aace323dc0d83e9c5d5ee443f3cc412ff22 Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Tue, 27 Jan 2026 13:09:20 +0000
Subject: [PATCH 02/22] Switched to put

---
 lf_toolkit/evaluation/image_upload.py |  8 ++-
 tests/evaluation/image_upload_test.py | 70 +++++++++++++--------------
 2 files changed, 42 insertions(+), 36 deletions(-)

diff --git a/lf_toolkit/evaluation/image_upload.py b/lf_toolkit/evaluation/image_upload.py
index 70abd17..19e049d 100644
--- a/lf_toolkit/evaluation/image_upload.py
+++ b/lf_toolkit/evaluation/image_upload.py
@@ -142,7 +142,7 @@ def upload_image(img: Image.Image, mime_type: str) -> Dict:
         buffer.seek(0)
 
         files: Dict[str, tuple] = {'file': (filename, buffer, mime_type)}
-        response: requests.Response = requests.post(url, files=files, timeout=30)
+        response: requests.Response = requests.put(url, files=files, timeout=30)
 
         if response.status_code != 200:
             raise ImageUploadError(
@@ -158,3 +158,9 @@ def upload_image(img: Image.Image, mime_type: str) -> Dict:
     except Exception as e:
         raise ImageUploadError(f"Unexpected error: {str(e)}")
 
+if __name__ == "__main__":
+    img = Image.new('RGB', (100, 100), color='red')
+    img.format = 'JPEG'
+
+    # Execute
+    result = upload_image(img, 'image/jpeg')
diff --git a/tests/evaluation/image_upload_test.py b/tests/evaluation/image_upload_test.py
index 06400f2..a40b88e 100644
--- a/tests/evaluation/image_upload_test.py
+++ b/tests/evaluation/image_upload_test.py
@@ -184,10 +184,10 @@ def test_get_s3_bucket_uri_empty_string(self):
 class TestUploadImage:
     """Test suite for upload_image function"""
 
-    @patch('lf_toolkit.evaluation.image_upload.requests.post')
+    @patch('lf_toolkit.evaluation.image_upload.requests.put')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
-    def test_successful_upload(self, mock_uuid, mock_getenv, mock_post):
+    def test_successful_upload(self, mock_uuid, mock_getenv, mock_put):
         """Test successful image upload with UUID-based filename"""
         # Setup mocks
         mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
@@ -196,7 +196,7 @@ def test_successful_upload(self, mock_uuid, mock_getenv, mock_post):
         mock_response = Mock()
         mock_response.status_code = 200
         mock_response.json.return_value = {'url': f'https://s3.amazonaws.com/uploaded-image.jpg'}
-        mock_post.return_value = mock_response
+        mock_put.return_value = mock_response
 
         # Create a real PIL image for testing
         img = Image.new('RGB', (100, 100), color='red')
@@ -207,19 +207,19 @@ def test_successful_upload(self, mock_uuid, mock_getenv, mock_post):
 
         # Verify response
         assert result == 'https://s3.amazonaws.com/uploaded-image.jpg'
-        assert mock_post.called
-        assert mock_post.call_args[1]['timeout'] == 30
+        assert mock_put.called
+        assert mock_put.call_args[1]['timeout'] == 30
 
         # Verify UUID-based filename is used
-        call_args = mock_post.call_args
+        call_args = mock_put.call_args
         filename, file_obj, mime_type = call_args[1]['files']['file']
         assert filename == '12345678-1234-5678-1234-567812345678.jpeg'
         assert mime_type == 'image/jpeg'
 
-    @patch('lf_toolkit.evaluation.image_upload.requests.post')
+    @patch('lf_toolkit.evaluation.image_upload.requests.put')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
-    def test_upload_with_png_image(self, mock_uuid, mock_getenv, mock_post):
+    def test_upload_with_png_image(self, mock_uuid, mock_getenv, mock_put):
         """Test uploading PNG image with UUID-based filename"""
         mock_uuid.return_value = uuid.UUID('aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee')
         mock_getenv.return_value = 'https://storage.example.com'
@@ -227,7 +227,7 @@ def test_upload_with_png_image(self, mock_uuid, mock_getenv, mock_post):
         mock_response = Mock()
         mock_response.status_code = 200
         mock_response.json.return_value = {'url': 'https://storage.example.com/image.png'}
-        mock_post.return_value = mock_response
+        mock_put.return_value = mock_response
 
         img = Image.new('RGBA', (50, 50), color=(0, 255, 0, 128))
         img.format = 'PNG'
@@ -237,7 +237,7 @@ def test_upload_with_png_image(self, mock_uuid, mock_getenv, mock_post):
         assert result == 'https://storage.example.com/image.png'
 
         # Verify UUID-based filename is used
-        call_args = mock_post.call_args
+        call_args = mock_put.call_args
         filename, file_obj, mime_type = call_args[1]['files']['file']
         assert filename == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee.png'
         assert mime_type == 'image/png'
@@ -264,10 +264,10 @@ def test_upload_invalid_mime_type(self, mock_getenv):
         with pytest.raises(InvalidMimeTypeError):
             upload_image(img, 'image/invalid')
 
-    @patch('lf_toolkit.evaluation.image_upload.requests.post')
+    @patch('lf_toolkit.evaluation.image_upload.requests.put')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
-    def test_upload_server_error(self, mock_uuid, mock_getenv, mock_post):
+    def test_upload_server_error(self, mock_uuid, mock_getenv, mock_put):
         """Test upload fails when server returns error"""
         mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
         mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
@@ -275,7 +275,7 @@ def test_upload_server_error(self, mock_uuid, mock_getenv, mock_post):
         mock_response = Mock()
         mock_response.status_code = 500
         mock_response.text = 'Internal Server Error'
-        mock_post.return_value = mock_response
+        mock_put.return_value = mock_response
 
         img = Image.new('RGB', (100, 100))
         img.format = 'JPEG'
@@ -285,15 +285,15 @@ def test_upload_server_error(self, mock_uuid, mock_getenv, mock_post):
 
         assert "Upload failed with status code 500" in str(exc_info.value)
 
-    @patch('lf_toolkit.evaluation.image_upload.requests.post')
+    @patch('lf_toolkit.evaluation.image_upload.requests.put')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
-    def test_upload_network_error(self, mock_uuid, mock_getenv, mock_post):
+    def test_upload_network_error(self, mock_uuid, mock_getenv, mock_put):
         """Test upload fails on network error"""
         mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
         mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
 
-        mock_post.side_effect = requests.exceptions.ConnectionError('Connection failed')
+        mock_put.side_effect = requests.exceptions.ConnectionError('Connection failed')
 
         img = Image.new('RGB', (100, 100))
         img.format = 'JPEG'
@@ -303,15 +303,15 @@ def test_upload_network_error(self, mock_uuid, mock_getenv, mock_post):
 
         assert "Network error" in str(exc_info.value)
 
-    @patch('lf_toolkit.evaluation.image_upload.requests.post')
+    @patch('lf_toolkit.evaluation.image_upload.requests.put')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
-    def test_upload_timeout_error(self, mock_uuid, mock_getenv, mock_post):
+    def test_upload_timeout_error(self, mock_uuid, mock_getenv, mock_put):
         """Test upload fails on timeout"""
         mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
         mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
 
-        mock_post.side_effect = requests.exceptions.Timeout('Request timed out')
+        mock_put.side_effect = requests.exceptions.Timeout('Request timed out')
 
         img = Image.new('RGB', (100, 100))
         img.format = 'JPEG'
@@ -321,10 +321,10 @@ def test_upload_timeout_error(self, mock_uuid, mock_getenv, mock_post):
 
         assert "Network error" in str(exc_info.value)
 
-    @patch('lf_toolkit.evaluation.image_upload.requests.post')
+    @patch('lf_toolkit.evaluation.image_upload.requests.put')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
-    def test_upload_mime_type_mismatch(self, mock_uuid, mock_getenv, mock_post):
+    def test_upload_mime_type_mismatch(self, mock_uuid, mock_getenv, mock_put):
         """Test upload fails when MIME type doesn't match image format"""
         mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
         mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
@@ -335,10 +335,10 @@ def test_upload_mime_type_mismatch(self, mock_uuid, mock_getenv, mock_post):
         with pytest.raises(InvalidMimeTypeError):
             upload_image(img, 'image/jpeg')
 
-    @patch('lf_toolkit.evaluation.image_upload.requests.post')
+    @patch('lf_toolkit.evaluation.image_upload.requests.put')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
-    def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_post):
+    def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_put):
         """Test upload with image that has no format (defaults to PNG) uses UUID filename"""
         mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
         mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
@@ -346,7 +346,7 @@ def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_post):
         mock_response = Mock()
         mock_response.status_code = 200
         mock_response.json.return_value = {'url': 'https://s3.amazonaws.com/image.png'}
-        mock_post.return_value = mock_response
+        mock_put.return_value = mock_response
 
         img = Image.new('RGB', (100, 100))
         img.format = None
@@ -356,22 +356,22 @@ def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_post):
         assert result == 'https://s3.amazonaws.com/image.png'
 
         # Verify UUID-based filename with default .png extension
-        call_args = mock_post.call_args
+        call_args = mock_put.call_args
         filename, file_obj, mime_type = call_args[1]['files']['file']
         assert filename == '12345678-1234-5678-1234-567812345678.png'
         assert mime_type == 'image/png'
 
-    @patch('lf_toolkit.evaluation.image_upload.requests.post')
+    @patch('lf_toolkit.evaluation.image_upload.requests.put')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
-    def test_upload_uses_different_uuid_each_time(self, mock_uuid, mock_getenv, mock_post):
+    def test_upload_uses_different_uuid_each_time(self, mock_uuid, mock_getenv, mock_put):
         """Test that each upload generates a unique UUID-based filename"""
         mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
 
         mock_response = Mock()
         mock_response.status_code = 200
         mock_response.json.return_value = {'url': 'https://s3.amazonaws.com/uploaded.jpg'}
-        mock_post.return_value = mock_response
+        mock_put.return_value = mock_response
 
         # First upload with first UUID
         uuid1 = uuid.UUID('11111111-1111-1111-1111-111111111111')
@@ -381,7 +381,7 @@ def test_upload_uses_different_uuid_each_time(self, mock_uuid, mock_getenv, mock
         img1.format = 'JPEG'
         upload_image(img1, 'image/jpeg')
 
-        filename1 = mock_post.call_args[1]['files']['file'][0]
+        filename1 = mock_put.call_args[1]['files']['file'][0]
 
         # Second upload with different UUID
         uuid2 = uuid.UUID('22222222-2222-2222-2222-222222222222')
@@ -391,17 +391,17 @@ def test_upload_uses_different_uuid_each_time(self, mock_uuid, mock_getenv, mock
         img2.format = 'JPEG'
         upload_image(img2, 'image/jpeg')
 
-        filename2 = mock_post.call_args[1]['files']['file'][0]
+        filename2 = mock_put.call_args[1]['files']['file'][0]
 
         # Verify different UUIDs result in different filenames
         assert filename1 == '11111111-1111-1111-1111-111111111111.jpeg'
         assert filename2 == '22222222-2222-2222-2222-222222222222.jpeg'
         assert filename1 != filename2
 
-    @patch('lf_toolkit.evaluation.image_upload.requests.post')
+    @patch('lf_toolkit.evaluation.image_upload.requests.put')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
-    def test_upload_verifies_correct_file_uploaded(self, mock_uuid, mock_getenv, mock_post):
+    def test_upload_verifies_correct_file_uploaded(self, mock_uuid, mock_getenv, mock_put):
         """Test that the correct file data is sent in upload request"""
         mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
         mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
@@ -409,15 +409,15 @@ def test_upload_verifies_correct_file_uploaded(self, mock_uuid, mock_getenv, moc
         mock_response = Mock()
         mock_response.status_code = 200
         mock_response.json.return_value = {'url': 'https://s3.amazonaws.com/image.jpg'}
-        mock_post.return_value = mock_response
+        mock_put.return_value = mock_response
 
         img = Image.new('RGB', (100, 100), color='blue')
         img.format = 'JPEG'
 
         upload_image(img, 'image/jpeg')
 
-        # Verify the post was called with correct arguments
-        call_args = mock_post.call_args
+        # Verify the put was called with correct arguments
+        call_args = mock_put.call_args
         assert call_args[0][0] == 'https://s3.amazonaws.com/bucket'
         assert 'files' in call_args[1]
         assert 'file' in call_args[1]['files']

From 97373a4696260ccf343afcde572760c298847c5f Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Tue, 27 Jan 2026 13:45:20 +0000
Subject: [PATCH 03/22] Fixed issue with request not sending file name and
 updated tests

---
 lf_toolkit/evaluation/image_upload.py | 17 ++++--
 tests/evaluation/image_upload_test.py | 86 +++------------------------
 2 files changed, 20 insertions(+), 83 deletions(-)

diff --git a/lf_toolkit/evaluation/image_upload.py b/lf_toolkit/evaluation/image_upload.py
index 19e049d..9a97973 100644
--- a/lf_toolkit/evaluation/image_upload.py
+++ b/lf_toolkit/evaluation/image_upload.py
@@ -113,7 +113,7 @@ def get_s3_bucket_uri() -> str:
     return s3_uri
 
 
-def upload_image(img: Image.Image, mime_type: str) -> Dict:
+def upload_image(img: Image.Image, mime_type: str) -> str:
     """Upload PIL image with comprehensive MIME type validation
 
     Args:
@@ -130,26 +130,32 @@ def upload_image(img: Image.Image, mime_type: str) -> Dict:
     """
     try:
         # Get URL from environment variable
-        url: str = get_s3_bucket_uri()
+        base_url: str = get_s3_bucket_uri()
 
         filename: str = generate_file_name(img)
 
         validate_mime_type(mime_type, img, filename)
 
+        full_url = base_url + filename
+
         buffer: BytesIO = BytesIO()
         img_format: str = img.format if img.format else 'PNG'
         img.save(buffer, format=img_format)
         buffer.seek(0)
 
-        files: Dict[str, tuple] = {'file': (filename, buffer, mime_type)}
-        response: requests.Response = requests.put(url, files=files, timeout=30)
+        response: requests.Response = requests.put(
+            full_url,
+            data=buffer,
+            headers={'Content-Type': mime_type},
+            timeout=30
+        )
 
         if response.status_code != 200:
             raise ImageUploadError(
                 f"Upload failed with status code {response.status_code}: {response.text}"
             )
 
-        return response.json()['url']
+        return full_url
 
     except (InvalidMimeTypeError, MissingEnvironmentVariableError):
         raise
@@ -164,3 +170,4 @@ def upload_image(img: Image.Image, mime_type: str) -> Dict:
 
     # Execute
     result = upload_image(img, 'image/jpeg')
+    print(result)
diff --git a/tests/evaluation/image_upload_test.py b/tests/evaluation/image_upload_test.py
index a40b88e..866d3b3 100644
--- a/tests/evaluation/image_upload_test.py
+++ b/tests/evaluation/image_upload_test.py
@@ -1,7 +1,6 @@
 import pytest
 import uuid
-from io import BytesIO
-from unittest.mock import Mock, patch, MagicMock
+from unittest.mock import Mock, patch
 from PIL import Image
 import requests
 
@@ -191,11 +190,10 @@ def test_successful_upload(self, mock_uuid, mock_getenv, mock_put):
         """Test successful image upload with UUID-based filename"""
         # Setup mocks
         mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
-        mock_getenv.return_value = 'https://s3.amazonaws.com/my-bucket'
+        mock_getenv.return_value = 'https://s3.amazonaws.com/my-bucket/'
 
         mock_response = Mock()
         mock_response.status_code = 200
-        mock_response.json.return_value = {'url': f'https://s3.amazonaws.com/uploaded-image.jpg'}
         mock_put.return_value = mock_response
 
         # Create a real PIL image for testing
@@ -206,27 +204,20 @@ def test_successful_upload(self, mock_uuid, mock_getenv, mock_put):
         result = upload_image(img, 'image/jpeg')
 
         # Verify response
-        assert result == 'https://s3.amazonaws.com/uploaded-image.jpg'
+        assert result == 'https://s3.amazonaws.com/my-bucket/12345678-1234-5678-1234-567812345678.jpeg'
         assert mock_put.called
         assert mock_put.call_args[1]['timeout'] == 30
 
-        # Verify UUID-based filename is used
-        call_args = mock_put.call_args
-        filename, file_obj, mime_type = call_args[1]['files']['file']
-        assert filename == '12345678-1234-5678-1234-567812345678.jpeg'
-        assert mime_type == 'image/jpeg'
-
     @patch('lf_toolkit.evaluation.image_upload.requests.put')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
     def test_upload_with_png_image(self, mock_uuid, mock_getenv, mock_put):
         """Test uploading PNG image with UUID-based filename"""
         mock_uuid.return_value = uuid.UUID('aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee')
-        mock_getenv.return_value = 'https://storage.example.com'
+        mock_getenv.return_value = 'https://storage.example.com/'
 
         mock_response = Mock()
         mock_response.status_code = 200
-        mock_response.json.return_value = {'url': 'https://storage.example.com/image.png'}
         mock_put.return_value = mock_response
 
         img = Image.new('RGBA', (50, 50), color=(0, 255, 0, 128))
@@ -234,13 +225,7 @@ def test_upload_with_png_image(self, mock_uuid, mock_getenv, mock_put):
 
         result = upload_image(img, 'image/png')
 
-        assert result == 'https://storage.example.com/image.png'
-
-        # Verify UUID-based filename is used
-        call_args = mock_put.call_args
-        filename, file_obj, mime_type = call_args[1]['files']['file']
-        assert filename == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee.png'
-        assert mime_type == 'image/png'
+        assert result == 'https://storage.example.com/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee.png'
 
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     def test_upload_missing_s3_uri(self, mock_getenv):
@@ -341,11 +326,10 @@ def test_upload_mime_type_mismatch(self, mock_uuid, mock_getenv, mock_put):
     def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_put):
         """Test upload with image that has no format (defaults to PNG) uses UUID filename"""
         mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
-        mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
+        mock_getenv.return_value = 'https://s3.amazonaws.com/bucket/'
 
         mock_response = Mock()
         mock_response.status_code = 200
-        mock_response.json.return_value = {'url': 'https://s3.amazonaws.com/image.png'}
         mock_put.return_value = mock_response
 
         img = Image.new('RGB', (100, 100))
@@ -353,50 +337,7 @@ def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_put):
 
         result = upload_image(img, 'image/png')
 
-        assert result == 'https://s3.amazonaws.com/image.png'
-
-        # Verify UUID-based filename with default .png extension
-        call_args = mock_put.call_args
-        filename, file_obj, mime_type = call_args[1]['files']['file']
-        assert filename == '12345678-1234-5678-1234-567812345678.png'
-        assert mime_type == 'image/png'
-
-    @patch('lf_toolkit.evaluation.image_upload.requests.put')
-    @patch('lf_toolkit.evaluation.image_upload.os.getenv')
-    @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
-    def test_upload_uses_different_uuid_each_time(self, mock_uuid, mock_getenv, mock_put):
-        """Test that each upload generates a unique UUID-based filename"""
-        mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
-
-        mock_response = Mock()
-        mock_response.status_code = 200
-        mock_response.json.return_value = {'url': 'https://s3.amazonaws.com/uploaded.jpg'}
-        mock_put.return_value = mock_response
-
-        # First upload with first UUID
-        uuid1 = uuid.UUID('11111111-1111-1111-1111-111111111111')
-        mock_uuid.return_value = uuid1
-
-        img1 = Image.new('RGB', (100, 100))
-        img1.format = 'JPEG'
-        upload_image(img1, 'image/jpeg')
-
-        filename1 = mock_put.call_args[1]['files']['file'][0]
-
-        # Second upload with different UUID
-        uuid2 = uuid.UUID('22222222-2222-2222-2222-222222222222')
-        mock_uuid.return_value = uuid2
-
-        img2 = Image.new('RGB', (100, 100))
-        img2.format = 'JPEG'
-        upload_image(img2, 'image/jpeg')
-
-        filename2 = mock_put.call_args[1]['files']['file'][0]
-
-        # Verify different UUIDs result in different filenames
-        assert filename1 == '11111111-1111-1111-1111-111111111111.jpeg'
-        assert filename2 == '22222222-2222-2222-2222-222222222222.jpeg'
-        assert filename1 != filename2
+        assert result == 'https://s3.amazonaws.com/bucket/12345678-1234-5678-1234-567812345678.png'
 
     @patch('lf_toolkit.evaluation.image_upload.requests.put')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
@@ -404,11 +345,10 @@ def test_upload_uses_different_uuid_each_time(self, mock_uuid, mock_getenv, mock
     def test_upload_verifies_correct_file_uploaded(self, mock_uuid, mock_getenv, mock_put):
         """Test that the correct file data is sent in upload request"""
         mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
-        mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
+        mock_getenv.return_value = 'https://s3.amazonaws.com/bucket/'
 
         mock_response = Mock()
         mock_response.status_code = 200
-        mock_response.json.return_value = {'url': 'https://s3.amazonaws.com/image.jpg'}
         mock_put.return_value = mock_response
 
         img = Image.new('RGB', (100, 100), color='blue')
@@ -416,16 +356,6 @@ def test_upload_verifies_correct_file_uploaded(self, mock_uuid, mock_getenv, moc
 
         upload_image(img, 'image/jpeg')
 
-        # Verify the put was called with correct arguments
-        call_args = mock_put.call_args
-        assert call_args[0][0] == 'https://s3.amazonaws.com/bucket'
-        assert 'files' in call_args[1]
-        assert 'file' in call_args[1]['files']
-
-        filename, file_obj, mime_type = call_args[1]['files']['file']
-        assert filename == '12345678-1234-5678-1234-567812345678.jpeg'
-        assert mime_type == 'image/jpeg'
-
 
 class TestExceptionHierarchy:
     """Test suite for custom exception classes"""

From cdf120cc8687e7e5ef5f08fd073ba46d792e43fa Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Wed, 28 Jan 2026 13:37:59 +0000
Subject: [PATCH 04/22] Switched to auto parsing of mime_type

---
 lf_toolkit/evaluation/image_upload.py |  74 +++----------
 tests/evaluation/image_upload_test.py | 149 ++------------------------
 2 files changed, 21 insertions(+), 202 deletions(-)

diff --git a/lf_toolkit/evaluation/image_upload.py b/lf_toolkit/evaluation/image_upload.py
index 9a97973..0642a19 100644
--- a/lf_toolkit/evaluation/image_upload.py
+++ b/lf_toolkit/evaluation/image_upload.py
@@ -13,22 +13,16 @@
     'image/png': ['PNG'],
     'image/gif': ['GIF'],
     'image/bmp': ['BMP'],
-    'image/webp': ['WEBP'],
-    'image/tiff': ['TIFF', 'TIF'],
-    'image/x-icon': ['ICO'],
 }
 
-FORMAT_TO_EXTENSION: Dict[str, List[str]] = {
-    'JPEG': ['.jpg', '.jpeg', '.jpe'],
-    'PNG': ['.png'],
-    'GIF': ['.gif'],
-    'BMP': ['.bmp'],
-    'WEBP': ['.webp'],
-    'TIFF': ['.tiff', '.tif'],
-    'ICO': ['.ico'],
+FORMAT_TO_MIME: Dict[str, str] = {
+    'JPEG': 'image/jpeg',
+    'JPG': 'image/jpeg',
+    'PNG': 'image/png',
+    'GIF': 'image/gif',
+    "bmp": 'image/bmp'
 }
 
-
 class ImageUploadError(Exception):
     """Custom exception for image upload failures"""
     pass
@@ -57,50 +51,6 @@ def generate_file_name(img: Image.Image) -> str:
     format_ext: str = img.format.lower() if img.format else 'png'
     return f"{unique_id}.{format_ext}"
 
-
-def validate_mime_type(mime_type: str, img: Image.Image, filename: str) -> bool:
-    """Validate MIME type against image format and filename
-
-    Args:
-        mime_type: MIME type string to validate
-        img: PIL Image object
-        filename: Name of the file
-
-    Returns:
-        True if validation passes
-
-    Raises:
-        InvalidMimeTypeError: If MIME type is invalid or doesn't match image
-    """
-    if mime_type not in MIME_TO_FORMAT:
-        raise InvalidMimeTypeError(
-            f"Invalid MIME type '{mime_type}'. "
-            f"Supported types: {', '.join(MIME_TO_FORMAT.keys())}"
-        )
-
-    img_format: Optional[str] = img.format.upper() if img.format else None
-
-    if img_format:
-        allowed_formats: List[str] = MIME_TO_FORMAT[mime_type]
-        if img_format not in allowed_formats:
-            raise InvalidMimeTypeError(
-                f"MIME type '{mime_type}' does not match image format '{img_format}'. "
-                f"Expected formats for {mime_type}: {', '.join(allowed_formats)}"
-            )
-
-    file_ext: str = filename[filename.rfind('.'):].lower()
-
-    if img_format and img_format in FORMAT_TO_EXTENSION:
-        valid_extensions: List[str] = FORMAT_TO_EXTENSION[img_format]
-        if file_ext not in valid_extensions:
-            raise InvalidMimeTypeError(
-                f"File extension '{file_ext}' does not match format '{img_format}'. "
-                f"Expected extensions: {', '.join(valid_extensions)}"
-            )
-
-    return True
-
-
 def get_s3_bucket_uri() -> str:
     """Get S3 bucket URI from environment variable"""
     s3_uri: Optional[str] = os.getenv('S3_BUCKET_URI')
@@ -113,12 +63,11 @@ def get_s3_bucket_uri() -> str:
     return s3_uri
 
 
-def upload_image(img: Image.Image, mime_type: str) -> str:
+def upload_image(img: Image.Image) -> str:
     """Upload PIL image with comprehensive MIME type validation
 
     Args:
         img: PIL Image object to upload
-        mime_type: MIME type for the upload
 
     Returns:
         JSON response from the server as a dictionary
@@ -134,10 +83,13 @@ def upload_image(img: Image.Image, mime_type: str) -> str:
 
         filename: str = generate_file_name(img)
 
-        validate_mime_type(mime_type, img, filename)
-
         full_url = base_url + filename
 
+        if img.format is None:
+            img.format = 'PNG'
+
+        mime_type = FORMAT_TO_MIME[img.format.upper()]
+
         buffer: BytesIO = BytesIO()
         img_format: str = img.format if img.format else 'PNG'
         img.save(buffer, format=img_format)
@@ -169,5 +121,5 @@ def upload_image(img: Image.Image, mime_type: str) -> str:
     img.format = 'JPEG'
 
     # Execute
-    result = upload_image(img, 'image/jpeg')
+    result = upload_image(img)
     print(result)
diff --git a/tests/evaluation/image_upload_test.py b/tests/evaluation/image_upload_test.py
index 866d3b3..eefe471 100644
--- a/tests/evaluation/image_upload_test.py
+++ b/tests/evaluation/image_upload_test.py
@@ -7,14 +7,12 @@
 # Import the module to test
 from lf_toolkit.evaluation.image_upload import (
     generate_file_name,
-    validate_mime_type,
     get_s3_bucket_uri,
     upload_image,
     ImageUploadError,
     InvalidMimeTypeError,
     MissingEnvironmentVariableError,
     MIME_TO_FORMAT,
-    FORMAT_TO_EXTENSION
 )
 
 
@@ -65,89 +63,6 @@ def test_generate_file_name_unique(self):
         assert filename1 != filename2
 
 
-class TestValidateMimeType:
-    """Test suite for validate_mime_type function"""
-
-    def test_valid_jpeg_mime_type(self):
-        """Test validation with valid JPEG MIME type"""
-        img = Mock(spec=Image.Image)
-        img.format = 'JPEG'
-
-        result = validate_mime_type('image/jpeg', img, 'test.jpg')
-        assert result is True
-
-    def test_valid_png_mime_type(self):
-        """Test validation with valid PNG MIME type"""
-        img = Mock(spec=Image.Image)
-        img.format = 'PNG'
-
-        result = validate_mime_type('image/png', img, 'test.png')
-        assert result is True
-
-    def test_invalid_mime_type(self):
-        """Test validation with unsupported MIME type"""
-        img = Mock(spec=Image.Image)
-        img.format = 'PNG'
-
-        with pytest.raises(InvalidMimeTypeError) as exc_info:
-            validate_mime_type('image/invalid', img, 'test.png')
-
-        assert "Invalid MIME type 'image/invalid'" in str(exc_info.value)
-
-    def test_mime_type_format_mismatch(self):
-        """Test validation when MIME type doesn't match image format"""
-        img = Mock(spec=Image.Image)
-        img.format = 'PNG'
-
-        with pytest.raises(InvalidMimeTypeError) as exc_info:
-            validate_mime_type('image/jpeg', img, 'test.png')
-
-        assert "does not match image format 'PNG'" in str(exc_info.value)
-
-    def test_extension_format_mismatch(self):
-        """Test validation when file extension doesn't match format"""
-        img = Mock(spec=Image.Image)
-        img.format = 'JPEG'
-
-        with pytest.raises(InvalidMimeTypeError) as exc_info:
-            validate_mime_type('image/jpeg', img, 'test.png')
-
-        assert "File extension '.png' does not match format 'JPEG'" in str(exc_info.value)
-
-    def test_valid_with_no_image_format(self):
-        """Test validation when image has no format attribute"""
-        img = Mock(spec=Image.Image)
-        img.format = None
-
-        # Should not raise when format is None
-        result = validate_mime_type('image/png', img, 'test.png')
-        assert result is True
-
-    def test_valid_webp_mime_type(self):
-        """Test validation with valid WEBP MIME type"""
-        img = Mock(spec=Image.Image)
-        img.format = 'WEBP'
-
-        result = validate_mime_type('image/webp', img, 'test.webp')
-        assert result is True
-
-    def test_jpeg_with_jpg_extension(self):
-        """Test JPEG image with .jpg extension"""
-        img = Mock(spec=Image.Image)
-        img.format = 'JPEG'
-
-        result = validate_mime_type('image/jpeg', img, 'photo.jpg')
-        assert result is True
-
-    def test_jpeg_with_jpeg_extension(self):
-        """Test JPEG image with .jpeg extension"""
-        img = Mock(spec=Image.Image)
-        img.format = 'JPEG'
-
-        result = validate_mime_type('image/jpeg', img, 'photo.jpeg')
-        assert result is True
-
-
 class TestGetS3BucketUri:
     """Test suite for get_s3_bucket_uri function"""
 
@@ -201,7 +116,7 @@ def test_successful_upload(self, mock_uuid, mock_getenv, mock_put):
         img.format = 'JPEG'
 
         # Execute
-        result = upload_image(img, 'image/jpeg')
+        result = upload_image(img)
 
         # Verify response
         assert result == 'https://s3.amazonaws.com/my-bucket/12345678-1234-5678-1234-567812345678.jpeg'
@@ -223,7 +138,7 @@ def test_upload_with_png_image(self, mock_uuid, mock_getenv, mock_put):
         img = Image.new('RGBA', (50, 50), color=(0, 255, 0, 128))
         img.format = 'PNG'
 
-        result = upload_image(img, 'image/png')
+        result = upload_image(img)
 
         assert result == 'https://storage.example.com/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee.png'
 
@@ -236,18 +151,7 @@ def test_upload_missing_s3_uri(self, mock_getenv):
         img.format = 'JPEG'
 
         with pytest.raises(MissingEnvironmentVariableError):
-            upload_image(img, 'image/jpeg')
-
-    @patch('lf_toolkit.evaluation.image_upload.os.getenv')
-    def test_upload_invalid_mime_type(self, mock_getenv):
-        """Test upload fails with invalid MIME type"""
-        mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
-
-        img = Image.new('RGB', (100, 100))
-        img.format = 'JPEG'
-
-        with pytest.raises(InvalidMimeTypeError):
-            upload_image(img, 'image/invalid')
+            upload_image(img)
 
     @patch('lf_toolkit.evaluation.image_upload.requests.put')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
@@ -266,7 +170,7 @@ def test_upload_server_error(self, mock_uuid, mock_getenv, mock_put):
         img.format = 'JPEG'
 
         with pytest.raises(ImageUploadError) as exc_info:
-            upload_image(img, 'image/jpeg')
+            upload_image(img)
 
         assert "Upload failed with status code 500" in str(exc_info.value)
 
@@ -284,7 +188,7 @@ def test_upload_network_error(self, mock_uuid, mock_getenv, mock_put):
         img.format = 'JPEG'
 
         with pytest.raises(ImageUploadError) as exc_info:
-            upload_image(img, 'image/jpeg')
+            upload_image(img)
 
         assert "Network error" in str(exc_info.value)
 
@@ -302,24 +206,10 @@ def test_upload_timeout_error(self, mock_uuid, mock_getenv, mock_put):
         img.format = 'JPEG'
 
         with pytest.raises(ImageUploadError) as exc_info:
-            upload_image(img, 'image/jpeg')
+            upload_image(img)
 
         assert "Network error" in str(exc_info.value)
 
-    @patch('lf_toolkit.evaluation.image_upload.requests.put')
-    @patch('lf_toolkit.evaluation.image_upload.os.getenv')
-    @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
-    def test_upload_mime_type_mismatch(self, mock_uuid, mock_getenv, mock_put):
-        """Test upload fails when MIME type doesn't match image format"""
-        mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
-        mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
-
-        img = Image.new('RGB', (100, 100))
-        img.format = 'PNG'
-
-        with pytest.raises(InvalidMimeTypeError):
-            upload_image(img, 'image/jpeg')
-
     @patch('lf_toolkit.evaluation.image_upload.requests.put')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
@@ -335,7 +225,7 @@ def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_put):
         img = Image.new('RGB', (100, 100))
         img.format = None
 
-        result = upload_image(img, 'image/png')
+        result = upload_image(img)
 
         assert result == 'https://s3.amazonaws.com/bucket/12345678-1234-5678-1234-567812345678.png'
 
@@ -354,7 +244,7 @@ def test_upload_verifies_correct_file_uploaded(self, mock_uuid, mock_getenv, moc
         img = Image.new('RGB', (100, 100), color='blue')
         img.format = 'JPEG'
 
-        upload_image(img, 'image/jpeg')
+        upload_image(img)
 
 
 class TestExceptionHierarchy:
@@ -385,28 +275,5 @@ def test_invalid_mime_type_error_caught_as_image_upload_error(self):
             raise InvalidMimeTypeError("Invalid MIME")
 
 
-class TestConstants:
-    """Test suite for module constants"""
-
-    def test_mime_to_format_has_expected_types(self):
-        """Test that MIME_TO_FORMAT contains expected image types"""
-        assert 'image/jpeg' in MIME_TO_FORMAT
-        assert 'image/png' in MIME_TO_FORMAT
-        assert 'image/gif' in MIME_TO_FORMAT
-        assert 'image/webp' in MIME_TO_FORMAT
-
-    def test_format_to_extension_has_expected_formats(self):
-        """Test that FORMAT_TO_EXTENSION contains expected formats"""
-        assert 'JPEG' in FORMAT_TO_EXTENSION
-        assert 'PNG' in FORMAT_TO_EXTENSION
-        assert 'GIF' in FORMAT_TO_EXTENSION
-        assert 'WEBP' in FORMAT_TO_EXTENSION
-
-    def test_jpeg_has_multiple_extensions(self):
-        """Test that JPEG format has multiple valid extensions"""
-        assert '.jpg' in FORMAT_TO_EXTENSION['JPEG']
-        assert '.jpeg' in FORMAT_TO_EXTENSION['JPEG']
-
-
 if __name__ == '__main__':
     pytest.main([__file__, '-v'])

From fe5df2d0aa710b0fffa658585fc1cf08aefa1b67 Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Wed, 28 Jan 2026 17:49:59 +0000
Subject: [PATCH 05/22] Implemented auth for uploading to S3

---
 lf_toolkit/evaluation/image_upload.py | 60 ++++++++++++++--
 poetry.lock                           | 99 ++++++++++++++++++++++++++-
 pyproject.toml                        |  1 +
 3 files changed, 155 insertions(+), 5 deletions(-)

diff --git a/lf_toolkit/evaluation/image_upload.py b/lf_toolkit/evaluation/image_upload.py
index 0642a19..9f86c53 100644
--- a/lf_toolkit/evaluation/image_upload.py
+++ b/lf_toolkit/evaluation/image_upload.py
@@ -1,3 +1,5 @@
+import hashlib
+
 import requests
 import uuid
 import os
@@ -6,6 +8,10 @@
 from PIL import Image
 from dotenv import load_dotenv
 
+from botocore.auth import SigV4Auth
+from botocore.awsrequest import AWSRequest
+from botocore.credentials import Credentials
+
 load_dotenv()
 
 MIME_TO_FORMAT: Dict[str, List[str]] = {
@@ -63,6 +69,49 @@ def get_s3_bucket_uri() -> str:
     return s3_uri
 
 
+def get_aws_signed_request(full_url, buffer, mime_type):
+    credentials = Credentials(
+        access_key=os.environ['AWS_ACCESS_KEY_ID'],
+        secret_key=os.environ['AWS_SECRET_ACCESS_KEY'],
+    )
+
+    if hasattr(buffer, 'read'):
+        # It's a file-like object (BytesIO, etc.)
+        current_pos = buffer.tell()  # Save current position
+        buffer.seek(0)  # Go to start
+        data = buffer.read()  # Read all data
+        buffer.seek(current_pos)  # Restore position
+    else:
+        # It's already bytes
+        data = buffer
+
+        # Calculate content hash and length
+    content_hash = hashlib.sha256(data).hexdigest()
+    content_length = len(data)
+
+    # Create the request for signing with required headers
+    headers = {
+        'Content-Type': mime_type,
+        'Content-Length': str(content_length),
+        'x-amz-content-sha256': content_hash
+    }
+
+    # Create the request for signing
+    aws_request = AWSRequest(
+        method='PUT',
+        url=full_url,
+        data=buffer,
+        headers=headers
+    )
+
+    region = os.environ.get('AWS_REGION', 'eu-west-2')
+
+    # Sign the request
+    SigV4Auth(credentials, 's3', region).add_auth(aws_request)
+
+    return aws_request
+
+
 def upload_image(img: Image.Image) -> str:
     """Upload PIL image with comprehensive MIME type validation
 
@@ -95,10 +144,13 @@ def upload_image(img: Image.Image) -> str:
         img.save(buffer, format=img_format)
         buffer.seek(0)
 
-        response: requests.Response = requests.put(
-            full_url,
-            data=buffer,
-            headers={'Content-Type': mime_type},
+        aws_request = get_aws_signed_request(full_url, buffer, mime_type).prepare()
+
+        response: requests.Response = requests.request(
+            method=aws_request.method,
+            url=aws_request.url,
+            data=aws_request.body,
+            headers=aws_request.headers,
             timeout=30
         )
 
diff --git a/poetry.lock b/poetry.lock
index fa953b9..58fff1f 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -129,6 +129,46 @@ d = ["aiohttp (>=3.7.4) ; sys_platform != \"win32\" or implementation_name != \"
 jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
 uvloop = ["uvloop (>=0.15.2)"]
 
+[[package]]
+name = "boto3"
+version = "1.42.36"
+description = "The AWS SDK for Python"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "boto3-1.42.36-py3-none-any.whl", hash = "sha256:e0ff6f2747bfdec63405b35ea185a7aea35239c3f4fe99e4d29368a6de9c4a84"},
+    {file = "boto3-1.42.36.tar.gz", hash = "sha256:a4eb51105c8c5d7b2bc2a9e2316e69baf69a55611275b9f189c0cf59f1aae171"},
+]
+
+[package.dependencies]
+botocore = ">=1.42.36,<1.43.0"
+jmespath = ">=0.7.1,<2.0.0"
+s3transfer = ">=0.16.0,<0.17.0"
+
+[package.extras]
+crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
+
+[[package]]
+name = "botocore"
+version = "1.42.36"
+description = "Low-level, data-driven core of boto 3."
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "botocore-1.42.36-py3-none-any.whl", hash = "sha256:2cfae4c482e5e87bd835ab4289b711490c161ba57e852c06b65a03e7c25e08eb"},
+    {file = "botocore-1.42.36.tar.gz", hash = "sha256:2ebd89cc75927944e2cee51b7adce749f38e0cb269a758a6464a27f8bcca65fb"},
+]
+
+[package.dependencies]
+jmespath = ">=0.7.1,<2.0.0"
+python-dateutil = ">=2.1,<3.0.0"
+urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}
+
+[package.extras]
+crt = ["awscrt (==0.29.2)"]
+
 [[package]]
 name = "build"
 version = "1.3.0"
@@ -1023,6 +1063,18 @@ files = [
 test = ["async-timeout ; python_version < \"3.11\"", "pytest", "pytest-asyncio (>=0.17)", "pytest-trio", "testpath", "trio"]
 trio = ["trio"]
 
+[[package]]
+name = "jmespath"
+version = "1.1.0"
+description = "JSON Matching Expressions"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64"},
+    {file = "jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d"},
+]
+
 [[package]]
 name = "jsonrpcserver"
 version = "5.0.9"
@@ -1826,6 +1878,21 @@ pytest = ">=4.6"
 [package.extras]
 testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"]
 
+[[package]]
+name = "python-dateutil"
+version = "2.9.0.post0"
+description = "Extensions to the standard Python datetime module"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+groups = ["main"]
+files = [
+    {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
+    {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
+]
+
+[package.dependencies]
+six = ">=1.5"
+
 [[package]]
 name = "python-dotenv"
 version = "1.2.1"
@@ -2266,6 +2333,24 @@ files = [
     {file = "rpds_py-0.27.1.tar.gz", hash = "sha256:26a1c73171d10b7acccbded82bf6a586ab8203601e565badc74bbbf8bc5a10f8"},
 ]
 
+[[package]]
+name = "s3transfer"
+version = "0.16.0"
+description = "An Amazon S3 Transfer Manager"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe"},
+    {file = "s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920"},
+]
+
+[package.dependencies]
+botocore = ">=1.37.4,<2.0a.0"
+
+[package.extras]
+crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"]
+
 [[package]]
 name = "secretstorage"
 version = "3.4.0"
@@ -2295,6 +2380,18 @@ files = [
     {file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"},
 ]
 
+[[package]]
+name = "six"
+version = "1.17.0"
+description = "Python 2 and 3 compatibility utilities"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+groups = ["main"]
+files = [
+    {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"},
+    {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
+]
+
 [[package]]
 name = "sniffio"
 version = "1.3.1"
@@ -2751,4 +2848,4 @@ parsing = ["antlr4-python3-runtime", "lark", "latex2sympy"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.11"
-content-hash = "7fe52b482228044b36e97dde750e982e13c7837c6c7f9d6fd45433aeae8c18ea"
+content-hash = "9dc3f7e12199191cf41834205dbb2705b1e1e4b2dd851b1bb57e312d3c4e8a8b"
diff --git a/pyproject.toml b/pyproject.toml
index e7f311a..e06cf72 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,6 +46,7 @@ pytest-asyncio = "^1.2.0"
 pillow = "^12.1.0"
 requests = "^2.32.5"
 dotenv = "^0.9.9"
+boto3 = "^1.42.36"
 
 [tool.poetry.group.dev.dependencies]
 black = "24.8.0"

From e01140997e94e2fa9bf7f9fa82fd38bbe806046d Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Wed, 28 Jan 2026 18:10:44 +0000
Subject: [PATCH 06/22] Added session token

---
 lf_toolkit/evaluation/image_upload.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lf_toolkit/evaluation/image_upload.py b/lf_toolkit/evaluation/image_upload.py
index 9f86c53..14e3975 100644
--- a/lf_toolkit/evaluation/image_upload.py
+++ b/lf_toolkit/evaluation/image_upload.py
@@ -73,6 +73,7 @@ def get_aws_signed_request(full_url, buffer, mime_type):
     credentials = Credentials(
         access_key=os.environ['AWS_ACCESS_KEY_ID'],
         secret_key=os.environ['AWS_SECRET_ACCESS_KEY'],
+        token=os.environ.get('AWS_SESSION_TOKEN', None)
     )
 
     if hasattr(buffer, 'read'):

From b375d16f0d55dc092fc30b036415a247e2459eea Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Thu, 29 Jan 2026 09:48:26 +0000
Subject: [PATCH 07/22] Added passing of folder name

---
 lf_toolkit/evaluation/image_upload.py |   7 +-
 tests/evaluation/image_upload_test.py | 157 +++++++++++++++++---------
 2 files changed, 110 insertions(+), 54 deletions(-)

diff --git a/lf_toolkit/evaluation/image_upload.py b/lf_toolkit/evaluation/image_upload.py
index 14e3975..802bab2 100644
--- a/lf_toolkit/evaluation/image_upload.py
+++ b/lf_toolkit/evaluation/image_upload.py
@@ -113,10 +113,11 @@ def get_aws_signed_request(full_url, buffer, mime_type):
     return aws_request
 
 
-def upload_image(img: Image.Image) -> str:
+def upload_image(img: Image.Image, folder_name: str) -> str:
     """Upload PIL image with comprehensive MIME type validation
 
     Args:
+        folder_name: name of folder to save image
         img: PIL Image object to upload
 
     Returns:
@@ -133,7 +134,7 @@ def upload_image(img: Image.Image) -> str:
 
         filename: str = generate_file_name(img)
 
-        full_url = base_url + filename
+        full_url = os.path.join(base_url, folder_name, filename)
 
         if img.format is None:
             img.format = 'PNG'
@@ -174,5 +175,5 @@ def upload_image(img: Image.Image) -> str:
     img.format = 'JPEG'
 
     # Execute
-    result = upload_image(img)
+    result = upload_image(img, "eduvision")
     print(result)
diff --git a/tests/evaluation/image_upload_test.py b/tests/evaluation/image_upload_test.py
index eefe471..b4a0125 100644
--- a/tests/evaluation/image_upload_test.py
+++ b/tests/evaluation/image_upload_test.py
@@ -12,7 +12,6 @@
     ImageUploadError,
     InvalidMimeTypeError,
     MissingEnvironmentVariableError,
-    MIME_TO_FORMAT,
 )
 
 
@@ -98,49 +97,74 @@ def test_get_s3_bucket_uri_empty_string(self):
 class TestUploadImage:
     """Test suite for upload_image function"""
 
-    @patch('lf_toolkit.evaluation.image_upload.requests.put')
+    @patch('lf_toolkit.evaluation.image_upload.requests.request')
+    @patch('lf_toolkit.evaluation.image_upload.get_aws_signed_request')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
-    def test_successful_upload(self, mock_uuid, mock_getenv, mock_put):
+    def test_successful_upload(self, mock_uuid, mock_getenv, mock_get_aws_signed_request, mock_request):
         """Test successful image upload with UUID-based filename"""
         # Setup mocks
         mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
-        mock_getenv.return_value = 'https://s3.amazonaws.com/my-bucket/'
+        mock_getenv.return_value = 'https://s3.amazonaws.com/eduvision'
+
+        # Mock the AWS signed request
+        mock_prepared_request = Mock()
+        mock_prepared_request.method = 'PUT'
+        mock_prepared_request.url = 'https://s3.amazonaws.com/eduvision/eduvision/12345678-1234-5678-1234-567812345678.jpeg'
+        mock_prepared_request.body = b'mock_body'
+        mock_prepared_request.headers = {'Content-Type': 'image/jpeg'}
+
+        mock_aws_request = Mock()
+        mock_aws_request.prepare.return_value = mock_prepared_request
+        mock_get_aws_signed_request.return_value = mock_aws_request
 
         mock_response = Mock()
         mock_response.status_code = 200
-        mock_put.return_value = mock_response
+        mock_request.return_value = mock_response
 
         # Create a real PIL image for testing
         img = Image.new('RGB', (100, 100), color='red')
         img.format = 'JPEG'
 
         # Execute
-        result = upload_image(img)
+        result = upload_image(img, "eduvision")
 
         # Verify response
-        assert result == 'https://s3.amazonaws.com/my-bucket/12345678-1234-5678-1234-567812345678.jpeg'
-        assert mock_put.called
-        assert mock_put.call_args[1]['timeout'] == 30
+        assert result == 'https://s3.amazonaws.com/eduvision/eduvision/12345678-1234-5678-1234-567812345678.jpeg'
+        assert mock_request.called
+        assert mock_request.call_args[1]['timeout'] == 30
 
-    @patch('lf_toolkit.evaluation.image_upload.requests.put')
+    @patch('lf_toolkit.evaluation.image_upload.requests.request')
+    @patch('lf_toolkit.evaluation.image_upload.get_aws_signed_request')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
-    def test_upload_with_png_image(self, mock_uuid, mock_getenv, mock_put):
+    def test_upload_with_png(self, mock_uuid, mock_getenv, mock_get_aws_signed_request, mock_request):
         """Test uploading PNG image with UUID-based filename"""
-        mock_uuid.return_value = uuid.UUID('aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee')
-        mock_getenv.return_value = 'https://storage.example.com/'
+        # Setup mocks
+        mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
+        mock_getenv.return_value = 'https://s3.amazonaws.com/eduvision'
+
+        # Mock the AWS signed request
+        mock_prepared_request = Mock()
+        mock_prepared_request.method = 'PUT'
+        mock_prepared_request.url = 'https://s3.amazonaws.com/eduvision/eduvision/12345678-1234-5678-1234-567812345678.png'
+        mock_prepared_request.body = b'mock_body'
+        mock_prepared_request.headers = {'Content-Type': 'image/jpeg'}
+
+        mock_aws_request = Mock()
+        mock_aws_request.prepare.return_value = mock_prepared_request
+        mock_get_aws_signed_request.return_value = mock_aws_request
 
         mock_response = Mock()
         mock_response.status_code = 200
-        mock_put.return_value = mock_response
+        mock_request.return_value = mock_response
 
         img = Image.new('RGBA', (50, 50), color=(0, 255, 0, 128))
         img.format = 'PNG'
 
-        result = upload_image(img)
+        result = upload_image(img, "eduvision")
 
-        assert result == 'https://storage.example.com/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee.png'
+        assert result == 'https://s3.amazonaws.com/eduvision/eduvision/12345678-1234-5678-1234-567812345678.png'
 
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     def test_upload_missing_s3_uri(self, mock_getenv):
@@ -151,100 +175,131 @@ def test_upload_missing_s3_uri(self, mock_getenv):
         img.format = 'JPEG'
 
         with pytest.raises(MissingEnvironmentVariableError):
-            upload_image(img)
+            upload_image(img, "eduvision")
 
-    @patch('lf_toolkit.evaluation.image_upload.requests.put')
+    @patch('lf_toolkit.evaluation.image_upload.requests.request')
+    @patch('lf_toolkit.evaluation.image_upload.get_aws_signed_request')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
-    def test_upload_server_error(self, mock_uuid, mock_getenv, mock_put):
+    def test_upload_server_error(self, mock_uuid, mock_getenv, mock_get_aws_signed_request, mock_request):
         """Test upload fails when server returns error"""
         mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
         mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
 
+        # Mock the AWS signed request
+        mock_prepared_request = Mock()
+        mock_prepared_request.method = 'PUT'
+        mock_prepared_request.url = 'https://s3.amazonaws.com/bucket/eduvision/12345678-1234-5678-1234-567812345678.jpeg'
+        mock_prepared_request.body = b'mock_body'
+        mock_prepared_request.headers = {'Content-Type': 'image/jpeg'}
+
+        mock_aws_request = Mock()
+        mock_aws_request.prepare.return_value = mock_prepared_request
+        mock_get_aws_signed_request.return_value = mock_aws_request
+
         mock_response = Mock()
         mock_response.status_code = 500
         mock_response.text = 'Internal Server Error'
-        mock_put.return_value = mock_response
+        mock_request.return_value = mock_response
 
         img = Image.new('RGB', (100, 100))
         img.format = 'JPEG'
 
         with pytest.raises(ImageUploadError) as exc_info:
-            upload_image(img)
+            upload_image(img, "eduvision")
 
         assert "Upload failed with status code 500" in str(exc_info.value)
 
-    @patch('lf_toolkit.evaluation.image_upload.requests.put')
+    @patch('lf_toolkit.evaluation.image_upload.requests.request')
+    @patch('lf_toolkit.evaluation.image_upload.get_aws_signed_request')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
-    def test_upload_network_error(self, mock_uuid, mock_getenv, mock_put):
+    def test_upload_network_error(self, mock_uuid, mock_getenv, mock_get_aws_signed_request, mock_request):
         """Test upload fails on network error"""
         mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
         mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
 
-        mock_put.side_effect = requests.exceptions.ConnectionError('Connection failed')
+        # Mock the AWS signed request
+        mock_prepared_request = Mock()
+        mock_prepared_request.method = 'PUT'
+        mock_prepared_request.url = 'https://s3.amazonaws.com/bucket/eduvision/12345678-1234-5678-1234-567812345678.jpeg'
+        mock_prepared_request.body = b'mock_body'
+        mock_prepared_request.headers = {'Content-Type': 'image/jpeg'}
+
+        mock_aws_request = Mock()
+        mock_aws_request.prepare.return_value = mock_prepared_request
+        mock_get_aws_signed_request.return_value = mock_aws_request
+
+        mock_request.side_effect = requests.exceptions.ConnectionError('Connection failed')
 
         img = Image.new('RGB', (100, 100))
         img.format = 'JPEG'
 
         with pytest.raises(ImageUploadError) as exc_info:
-            upload_image(img)
+            upload_image(img, "eduvision")
 
         assert "Network error" in str(exc_info.value)
 
-    @patch('lf_toolkit.evaluation.image_upload.requests.put')
+    @patch('lf_toolkit.evaluation.image_upload.requests.request')
+    @patch('lf_toolkit.evaluation.image_upload.get_aws_signed_request')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
-    def test_upload_timeout_error(self, mock_uuid, mock_getenv, mock_put):
+    def test_upload_timeout_error(self, mock_uuid, mock_getenv, mock_get_aws_signed_request, mock_request):
         """Test upload fails on timeout"""
         mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
         mock_getenv.return_value = 'https://s3.amazonaws.com/bucket'
 
-        mock_put.side_effect = requests.exceptions.Timeout('Request timed out')
+        # Mock the AWS signed request
+        mock_prepared_request = Mock()
+        mock_prepared_request.method = 'PUT'
+        mock_prepared_request.url = 'https://s3.amazonaws.com/bucket/eduvision/12345678-1234-5678-1234-567812345678.jpeg'
+        mock_prepared_request.body = b'mock_body'
+        mock_prepared_request.headers = {'Content-Type': 'image/jpeg'}
+
+        mock_aws_request = Mock()
+        mock_aws_request.prepare.return_value = mock_prepared_request
+        mock_get_aws_signed_request.return_value = mock_aws_request
+
+        mock_request.side_effect = requests.exceptions.Timeout('Request timed out')
 
         img = Image.new('RGB', (100, 100))
         img.format = 'JPEG'
 
         with pytest.raises(ImageUploadError) as exc_info:
-            upload_image(img)
+            upload_image(img, "eduvision")
 
         assert "Network error" in str(exc_info.value)
 
-    @patch('lf_toolkit.evaluation.image_upload.requests.put')
+    @patch('lf_toolkit.evaluation.image_upload.requests.request')
+    @patch('lf_toolkit.evaluation.image_upload.get_aws_signed_request')
     @patch('lf_toolkit.evaluation.image_upload.os.getenv')
     @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
-    def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_put):
+    def test_upload_image_no_format(self, mock_uuid, mock_getenv, mock_get_aws_signed_request, mock_request):
         """Test upload with image that has no format (defaults to PNG) uses UUID filename"""
         mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
         mock_getenv.return_value = 'https://s3.amazonaws.com/bucket/'
 
+        # Mock the AWS signed request
+        mock_prepared_request = Mock()
+        mock_prepared_request.method = 'PUT'
+        mock_prepared_request.url = 'https://s3.amazonaws.com/bucket/eduvision/12345678-1234-5678-1234-567812345678.png'
+        mock_prepared_request.body = b'mock_body'
+        mock_prepared_request.headers = {'Content-Type': 'image/png'}
+
+        mock_aws_request = Mock()
+        mock_aws_request.prepare.return_value = mock_prepared_request
+        mock_get_aws_signed_request.return_value = mock_aws_request
+
         mock_response = Mock()
         mock_response.status_code = 200
-        mock_put.return_value = mock_response
+        mock_request.return_value = mock_response
 
         img = Image.new('RGB', (100, 100))
         img.format = None
 
-        result = upload_image(img)
-
-        assert result == 'https://s3.amazonaws.com/bucket/12345678-1234-5678-1234-567812345678.png'
-
-    @patch('lf_toolkit.evaluation.image_upload.requests.put')
-    @patch('lf_toolkit.evaluation.image_upload.os.getenv')
-    @patch('lf_toolkit.evaluation.image_upload.uuid.uuid4')
-    def test_upload_verifies_correct_file_uploaded(self, mock_uuid, mock_getenv, mock_put):
-        """Test that the correct file data is sent in upload request"""
-        mock_uuid.return_value = uuid.UUID('12345678-1234-5678-1234-567812345678')
-        mock_getenv.return_value = 'https://s3.amazonaws.com/bucket/'
-
-        mock_response = Mock()
-        mock_response.status_code = 200
-        mock_put.return_value = mock_response
-
-        img = Image.new('RGB', (100, 100), color='blue')
-        img.format = 'JPEG'
+        result = upload_image(img, "eduvision")
 
-        upload_image(img)
+        assert result == 'https://s3.amazonaws.com/bucket/eduvision/12345678-1234-5678-1234-567812345678.png'
 
 
 class TestExceptionHierarchy:

From fe90a965910c95ece76891a05b436509daef8d97 Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Wed, 11 Mar 2026 16:22:36 +0000
Subject: [PATCH 08/22] Added test suite for stream_io

---
 poetry.lock                |  24 +++-
 pyproject.toml             |   1 +
 tests/io/stream_io_test.py | 238 +++++++++++++++++++++++++++++++++++++
 3 files changed, 259 insertions(+), 4 deletions(-)
 create mode 100644 tests/io/stream_io_test.py

diff --git a/poetry.lock b/poetry.lock
index 58fff1f..3fcf25e 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -32,7 +32,7 @@ version = "4.6.0"
 description = "High level compatibility layer for multiple asynchronous event loop implementations"
 optional = false
 python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "anyio-4.6.0-py3-none-any.whl", hash = "sha256:c7d2e9d63e31599eeb636c8c5c03a7e108d73b345f064f1c19fdc87b79036a9a"},
     {file = "anyio-4.6.0.tar.gz", hash = "sha256:137b4559cbb034c477165047febb6ff83f390fc3b20bf181c1fc0a728cb8beeb"},
@@ -912,7 +912,7 @@ version = "3.10"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
 python-versions = ">=3.6"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
     {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
@@ -1839,6 +1839,22 @@ pluggy = ">=1.5,<2"
 [package.extras]
 dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
 
+[[package]]
+name = "pytest-anyio"
+version = "0.0.0"
+description = "The pytest anyio plugin is built into anyio. You don't need this package."
+optional = false
+python-versions = "*"
+groups = ["dev"]
+files = [
+    {file = "pytest-anyio-0.0.0.tar.gz", hash = "sha256:b41234e9e9ad7ea1dbfefcc1d6891b23d5ef7c9f07ccf804c13a9cc338571fd3"},
+    {file = "pytest_anyio-0.0.0-py2.py3-none-any.whl", hash = "sha256:dc8b5c4741cb16ff90be37fddd585ca943ed12bbeb563de7ace6cd94441d8746"},
+]
+
+[package.dependencies]
+anyio = "*"
+pytest = "*"
+
 [[package]]
 name = "pytest-asyncio"
 version = "1.2.0"
@@ -2398,7 +2414,7 @@ version = "1.3.1"
 description = "Sniff out which async library your code is running under"
 optional = false
 python-versions = ">=3.7"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
     {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
@@ -2848,4 +2864,4 @@ parsing = ["antlr4-python3-runtime", "lark", "latex2sympy"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.11"
-content-hash = "9dc3f7e12199191cf41834205dbb2705b1e1e4b2dd851b1bb57e312d3c4e8a8b"
+content-hash = "828a10ad95eed705e623f10d27ef6d21568caf98e05636c91cca9246c34b7b58"
diff --git a/pyproject.toml b/pyproject.toml
index e06cf72..f1c6066 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -59,6 +59,7 @@ pytest-cov = "5.0.0"
 ##########################
 # extras
 ##########################
+pytest-anyio = "^0.0.0"
 
 [tool.poetry.extras]
 parsing = ["antlr4-python3-runtime", "lark", "latex2sympy"]
diff --git a/tests/io/stream_io_test.py b/tests/io/stream_io_test.py
new file mode 100644
index 0000000..d31e8e7
--- /dev/null
+++ b/tests/io/stream_io_test.py
@@ -0,0 +1,238 @@
+import pytest
+import anyio
+
+from lf_toolkit.io.stream_io import StreamIO, PrefixStreamIO, StreamServer
+
+
+@pytest.fixture
+def anyio_backend():
+    return "asyncio"
+
+
+
+def make_framed_message(payload: str) -> bytes:
+    """Wrap a JSON string in Content-Length framing."""
+    body = payload.encode("utf-8")
+    header = f"Content-Length: {len(body)}\r\n\r\n".encode("utf-8")
+    return header + body
+
+
+class FakeStreamIO(StreamIO):
+    """
+    Simulates a bidirectional byte stream.
+    Feed messages via feed(), read responses via responses.
+    """
+
+    def __init__(self):
+        self._buffer = b""
+        self.responses = []
+        self.close_count = 0
+
+    def feed(self, data: bytes):
+        self._buffer += data
+
+    async def read(self, size: int) -> bytes:
+        if not self._buffer:
+            raise anyio.EndOfStream()
+        chunk = self._buffer[:size]
+        self._buffer = self._buffer[size:]
+        return chunk
+
+    async def write(self, data: bytes):
+        self.responses.append(data)
+
+    async def close(self):
+        self.close_count += 1
+
+
+class EchoServer(StreamServer):
+    """
+    Concrete StreamServer for testing.
+    - run() is required by BaseServer (abstract) but not used in tests
+      since we call _handle_client directly.
+    - dispatch() is overridden to echo the raw request back, bypassing
+      the real JsonRpcHandler so tests stay self-contained.
+    """
+
+    async def run(self):
+        pass
+
+    async def dispatch(self, data: str) -> str:
+        return data
+
+
+class BuggyStreamServer(StreamServer):
+    """
+    Reproduces the original bug by overriding _handle_client with
+    close() inside the finally block.
+    """
+
+    async def run(self):
+        pass
+
+    async def dispatch(self, data: str) -> str:
+        return data
+
+    async def _handle_client(self, client: StreamIO):
+        io = self.wrap_io(client)
+        while True:
+            try:
+                data = await io.read(4096)
+                if not data:
+                    break
+                response = await self.dispatch(data.decode("utf-8"))
+                await io.write(response.encode("utf-8"))
+            except anyio.EndOfStream:
+                break
+            except anyio.ClosedResourceError:
+                break
+            except Exception as e:
+                print(f"Exception: {e}")
+            finally:
+                await client.close()  # BUG: closes after every message
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+class TestStreamServer:
+
+    @pytest.fixture
+    def stream(self):
+        return FakeStreamIO()
+
+    @pytest.fixture
+    def server(self):
+        return EchoServer()
+
+    @pytest.fixture
+    def buggy_server(self):
+        return BuggyStreamServer()
+
+    @pytest.mark.anyio
+    async def test_handles_multiple_messages(self, stream, server):
+        """
+        Core fix test: the server must process multiple messages in a single
+        session without closing the connection between them.
+        """
+        stream.feed(make_framed_message('{"command": "eval", "id": 1}'))
+        stream.feed(make_framed_message('{"command": "eval", "id": 2}'))
+        stream.feed(make_framed_message('{"command": "eval", "id": 3}'))
+
+        await server._handle_client(stream)
+
+        assert len(stream.responses) == 3, (
+            f"Expected 3 responses but got {len(stream.responses)}. "
+            "Server likely closed the connection after the first message."
+        )
+
+    @pytest.mark.anyio
+    async def test_closes_only_once(self, stream, server):
+        """
+        The client connection should be closed exactly once — after the loop
+        exits — not once per message.
+        """
+        stream.feed(make_framed_message('{"id": 1}'))
+        stream.feed(make_framed_message('{"id": 2}'))
+
+        await server._handle_client(stream)
+
+        assert stream.close_count == 1, (
+            f"Expected close() to be called once, but it was called "
+            f"{stream.close_count} times. This is the original bug."
+        )
+
+    @pytest.mark.anyio
+    async def test_buggy_server_closes_after_each_message(self, stream, buggy_server):
+        """
+        Demonstrates the original bug: close() in the finally block causes
+        the stream to be closed after every message, not just at the end.
+        """
+        stream.feed(make_framed_message('{"id": 1}'))
+        stream.feed(make_framed_message('{"id": 2}'))
+
+        await buggy_server._handle_client(stream)
+
+        assert stream.close_count > 1, (
+            "Expected buggy server to call close() more than once, "
+            "confirming the bug exists in the original code."
+        )
+
+    @pytest.mark.anyio
+    async def test_single_message(self, stream, server):
+        """A single message round-trip should work correctly."""
+        payload = '{"command": "eval", "response": "test"}'
+        stream.feed(make_framed_message(payload))
+
+        await server._handle_client(stream)
+
+        assert len(stream.responses) == 1
+        assert payload.encode() in stream.responses[0]
+
+    @pytest.mark.anyio
+    async def test_closes_on_empty_stream(self, stream, server):
+        """Server should exit cleanly when the stream ends with no data."""
+        await server._handle_client(stream)
+
+        assert stream.close_count == 1
+
+    @pytest.mark.anyio
+    async def test_response_content(self, stream, server):
+        """Verify the actual response content is correct across messages."""
+        messages = [
+            '{"id": 1, "command": "eval"}',
+            '{"id": 2, "command": "preview"}',
+        ]
+
+        for msg in messages:
+            stream.feed(make_framed_message(msg))
+
+        await server._handle_client(stream)
+
+        assert len(stream.responses) == 2
+        for i, msg in enumerate(messages):
+            assert msg.encode() in stream.responses[i]
+
+
+class TestPrefixStreamIO:
+
+    @pytest.fixture
+    def stream(self):
+        return FakeStreamIO()
+
+    @pytest.mark.anyio
+    async def test_framing_round_trip(self, stream):
+        """PrefixStreamIO correctly encodes and decodes Content-Length framing."""
+        prefix_io = PrefixStreamIO(stream)
+
+        payload = b'{"command": "eval"}'
+        header = f"Content-Length: {len(payload)}\r\n\r\n".encode()
+        stream.feed(header + payload)
+
+        result = await prefix_io.read(4096)
+        assert result == payload
+
+    @pytest.mark.anyio
+    async def test_write_includes_content_length_header(self, stream):
+        """PrefixStreamIO write includes correct Content-Length header."""
+        prefix_io = PrefixStreamIO(stream)
+
+        payload = b'{"result": "ok"}'
+        await prefix_io.write(payload)
+
+        assert len(stream.responses) == 1
+        written = stream.responses[0]
+        assert b"Content-Length:" in written
+        assert f"{len(payload)}".encode() in written
+        assert payload in written
+
+    @pytest.mark.anyio
+    async def test_raises_on_missing_content_length(self, stream):
+        """PrefixStreamIO should raise if Content-Length header is absent."""
+        prefix_io = PrefixStreamIO(stream)
+
+        stream.feed(b"X-Custom-Header: something\r\n\r\n")
+
+        with pytest.raises(ValueError, match="Content-Length"):
+            await prefix_io.read(4096)

From d0a14446fa301f2b8929fbe0c5621a18a36b032b Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Wed, 11 Mar 2026 16:31:45 +0000
Subject: [PATCH 09/22] Added test suite for stream_io

---
 tests/io/stream_io_test.py | 103 ++++++++-----------------------------
 1 file changed, 21 insertions(+), 82 deletions(-)

diff --git a/tests/io/stream_io_test.py b/tests/io/stream_io_test.py
index d31e8e7..667c779 100644
--- a/tests/io/stream_io_test.py
+++ b/tests/io/stream_io_test.py
@@ -2,6 +2,7 @@
 import anyio
 
 from lf_toolkit.io.stream_io import StreamIO, PrefixStreamIO, StreamServer
+from lf_toolkit.io.stdio_server import StdioServer
 
 
 @pytest.fixture
@@ -9,6 +10,9 @@ def anyio_backend():
     return "asyncio"
 
 
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
 
 def make_framed_message(payload: str) -> bytes:
     """Wrap a JSON string in Content-Length framing."""
@@ -45,58 +49,11 @@ async def close(self):
         self.close_count += 1
 
 
-class EchoServer(StreamServer):
-    """
-    Concrete StreamServer for testing.
-    - run() is required by BaseServer (abstract) but not used in tests
-      since we call _handle_client directly.
-    - dispatch() is overridden to echo the raw request back, bypassing
-      the real JsonRpcHandler so tests stay self-contained.
-    """
-
-    async def run(self):
-        pass
-
-    async def dispatch(self, data: str) -> str:
-        return data
-
-
-class BuggyStreamServer(StreamServer):
-    """
-    Reproduces the original bug by overriding _handle_client with
-    close() inside the finally block.
-    """
-
-    async def run(self):
-        pass
-
-    async def dispatch(self, data: str) -> str:
-        return data
-
-    async def _handle_client(self, client: StreamIO):
-        io = self.wrap_io(client)
-        while True:
-            try:
-                data = await io.read(4096)
-                if not data:
-                    break
-                response = await self.dispatch(data.decode("utf-8"))
-                await io.write(response.encode("utf-8"))
-            except anyio.EndOfStream:
-                break
-            except anyio.ClosedResourceError:
-                break
-            except Exception as e:
-                print(f"Exception: {e}")
-            finally:
-                await client.close()  # BUG: closes after every message
-
-
 # ---------------------------------------------------------------------------
 # Tests
 # ---------------------------------------------------------------------------
 
-class TestStreamServer:
+class TestStdioServer:
 
     @pytest.fixture
     def stream(self):
@@ -104,11 +61,7 @@ def stream(self):
 
     @pytest.fixture
     def server(self):
-        return EchoServer()
-
-    @pytest.fixture
-    def buggy_server(self):
-        return BuggyStreamServer()
+        return StdioServer()
 
     @pytest.mark.anyio
     async def test_handles_multiple_messages(self, stream, server):
@@ -116,9 +69,9 @@ async def test_handles_multiple_messages(self, stream, server):
         Core fix test: the server must process multiple messages in a single
         session without closing the connection between them.
         """
-        stream.feed(make_framed_message('{"command": "eval", "id": 1}'))
-        stream.feed(make_framed_message('{"command": "eval", "id": 2}'))
-        stream.feed(make_framed_message('{"command": "eval", "id": 3}'))
+        stream.feed(make_framed_message('{"jsonrpc":"2.0","method":"eval","params":{},"id":1}'))
+        stream.feed(make_framed_message('{"jsonrpc":"2.0","method":"eval","params":{},"id":2}'))
+        stream.feed(make_framed_message('{"jsonrpc":"2.0","method":"eval","params":{},"id":3}'))
 
         await server._handle_client(stream)
 
@@ -133,8 +86,8 @@ async def test_closes_only_once(self, stream, server):
         The client connection should be closed exactly once — after the loop
         exits — not once per message.
         """
-        stream.feed(make_framed_message('{"id": 1}'))
-        stream.feed(make_framed_message('{"id": 2}'))
+        stream.feed(make_framed_message('{"jsonrpc":"2.0","method":"eval","params":{},"id":1}'))
+        stream.feed(make_framed_message('{"jsonrpc":"2.0","method":"eval","params":{},"id":2}'))
 
         await server._handle_client(stream)
 
@@ -143,32 +96,17 @@ async def test_closes_only_once(self, stream, server):
             f"{stream.close_count} times. This is the original bug."
         )
 
-    @pytest.mark.anyio
-    async def test_buggy_server_closes_after_each_message(self, stream, buggy_server):
-        """
-        Demonstrates the original bug: close() in the finally block causes
-        the stream to be closed after every message, not just at the end.
-        """
-        stream.feed(make_framed_message('{"id": 1}'))
-        stream.feed(make_framed_message('{"id": 2}'))
-
-        await buggy_server._handle_client(stream)
-
-        assert stream.close_count > 1, (
-            "Expected buggy server to call close() more than once, "
-            "confirming the bug exists in the original code."
-        )
-
     @pytest.mark.anyio
     async def test_single_message(self, stream, server):
         """A single message round-trip should work correctly."""
-        payload = '{"command": "eval", "response": "test"}'
-        stream.feed(make_framed_message(payload))
+        stream.feed(make_framed_message('{"jsonrpc":"2.0","method":"eval","params":{},"id":1}'))
 
         await server._handle_client(stream)
 
         assert len(stream.responses) == 1
-        assert payload.encode() in stream.responses[0]
+        # Response is a framed JSON-RPC envelope
+        assert b"Content-Length:" in stream.responses[0]
+        assert b"jsonrpc" in stream.responses[0]
 
     @pytest.mark.anyio
     async def test_closes_on_empty_stream(self, stream, server):
@@ -179,10 +117,10 @@ async def test_closes_on_empty_stream(self, stream, server):
 
     @pytest.mark.anyio
     async def test_response_content(self, stream, server):
-        """Verify the actual response content is correct across messages."""
+        """Verify a response is returned for each message sent."""
         messages = [
-            '{"id": 1, "command": "eval"}',
-            '{"id": 2, "command": "preview"}',
+            '{"jsonrpc":"2.0","method":"eval","params":{},"id":1}',
+            '{"jsonrpc":"2.0","method":"preview","params":{},"id":2}',
         ]
 
         for msg in messages:
@@ -191,8 +129,9 @@ async def test_response_content(self, stream, server):
         await server._handle_client(stream)
 
         assert len(stream.responses) == 2
-        for i, msg in enumerate(messages):
-            assert msg.encode() in stream.responses[i]
+        for response in stream.responses:
+            assert b"Content-Length:" in response
+            assert b"jsonrpc" in response
 
 
 class TestPrefixStreamIO:

From 30809680464bf4091a10488804a24fe0cf0c37a8 Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Wed, 11 Mar 2026 16:32:46 +0000
Subject: [PATCH 10/22] Changed how Stdio closes

---
 lf_toolkit/io/stream_io.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lf_toolkit/io/stream_io.py b/lf_toolkit/io/stream_io.py
index 34c835f..265ee9a 100644
--- a/lf_toolkit/io/stream_io.py
+++ b/lf_toolkit/io/stream_io.py
@@ -91,6 +91,7 @@ async def _handle_client(self, client: StreamIO):
                     break
 
                 response = await self.dispatch(data.decode("utf-8"))
+                print(f"Responding: {response}")  # and this
 
                 await io.write(response.encode("utf-8"))
             except anyio.EndOfStream:
@@ -101,5 +102,4 @@ async def _handle_client(self, client: StreamIO):
                 break
             except Exception as e:
                 print(f"Exception: {e}")
-            finally:
-                await client.close()
+        await client.close()

From 286ec2b52110224f8ab8c64737ad34810b31d1ed Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Wed, 11 Mar 2026 16:50:58 +0000
Subject: [PATCH 11/22] Changed exception to break instead of printing

---
 lf_toolkit/io/stream_io.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lf_toolkit/io/stream_io.py b/lf_toolkit/io/stream_io.py
index 265ee9a..fffe91d 100644
--- a/lf_toolkit/io/stream_io.py
+++ b/lf_toolkit/io/stream_io.py
@@ -101,5 +101,6 @@ async def _handle_client(self, client: StreamIO):
                 # print("Client disconnected")
                 break
             except Exception as e:
-                print(f"Exception: {e}")
+                # print(f"Exception: {e}")
+                break
         await client.close()

From 313a56d3989bf97d51b6f88c16270a93d008e10c Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Wed, 11 Mar 2026 16:50:58 +0000
Subject: [PATCH 12/22] Added debub statements

---
 lf_toolkit/io/stream_io.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lf_toolkit/io/stream_io.py b/lf_toolkit/io/stream_io.py
index 265ee9a..fffe91d 100644
--- a/lf_toolkit/io/stream_io.py
+++ b/lf_toolkit/io/stream_io.py
@@ -101,5 +101,6 @@ async def _handle_client(self, client: StreamIO):
                 # print("Client disconnected")
                 break
             except Exception as e:
-                print(f"Exception: {e}")
+                # print(f"Exception: {e}")
+                break
         await client.close()

From 3056704b9575a4191d40a98d72aa5792613e2b34 Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Wed, 11 Mar 2026 17:12:40 +0000
Subject: [PATCH 13/22] Added debug statements

---
 lf_toolkit/io/stream_io.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/lf_toolkit/io/stream_io.py b/lf_toolkit/io/stream_io.py
index fffe91d..b2ddd3e 100644
--- a/lf_toolkit/io/stream_io.py
+++ b/lf_toolkit/io/stream_io.py
@@ -84,23 +84,27 @@ async def _handle_client(self, client: StreamIO):
 
         while True:
             try:
+                import sys
+                print("waiting for data...", file=sys.stderr, flush=True)
                 data = await io.read(4096)
+                print(f"got data: {data[:80]}", file=sys.stderr, flush=True)
 
                 if not data:
-                    # print("Received empty data")
                     break
 
+                print("dispatching...", file=sys.stderr, flush=True)
                 response = await self.dispatch(data.decode("utf-8"))
-                print(f"Responding: {response}")  # and this
+                print(f"got response: {str(response)[:80]}", file=sys.stderr, flush=True)
 
                 await io.write(response.encode("utf-8"))
+                print("wrote response", file=sys.stderr, flush=True)
             except anyio.EndOfStream:
-                # print("Client disconnected")
                 break
             except anyio.ClosedResourceError:
-                # print("Client disconnected")
                 break
             except Exception as e:
-                # print(f"Exception: {e}")
+                import traceback
+                traceback.print_exc(file=sys.stderr)
                 break
+
         await client.close()

From abdb46b94bf2ccbfdb8041d310bd7b0d30c8705f Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Wed, 11 Mar 2026 17:20:50 +0000
Subject: [PATCH 14/22] Moved Stdioclient instationtion to run

---
 lf_toolkit/io/stdio_server.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lf_toolkit/io/stdio_server.py b/lf_toolkit/io/stdio_server.py
index cbffea2..b47ca77 100644
--- a/lf_toolkit/io/stdio_server.py
+++ b/lf_toolkit/io/stdio_server.py
@@ -37,10 +37,11 @@ class StdioServer(StreamServer):
 
     def __init__(self, handler: Optional[Handler] = None):
         super().__init__(handler)
-        self._client = StdioClient()
+
 
     def wrap_io(self, client: StreamIO) -> StreamIO:
         return PrefixStreamIO(client)
 
     async def run(self):
+        self._client = StdioClient()
         await self._handle_client(self._client)

From a8ec7290a02df1d7ec0501d2f7b20a82f3519ee2 Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Wed, 11 Mar 2026 17:57:49 +0000
Subject: [PATCH 15/22] Improved `PrefixStreamIO.read()` to handle partial
 chunk reads and added tests for large payloads and partial chunks.

---
 lf_toolkit/io/stream_io.py |  9 +++++----
 tests/io/stream_io_test.py | 28 ++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/lf_toolkit/io/stream_io.py b/lf_toolkit/io/stream_io.py
index b2ddd3e..223465b 100644
--- a/lf_toolkit/io/stream_io.py
+++ b/lf_toolkit/io/stream_io.py
@@ -63,10 +63,11 @@ async def read(self, size: int) -> bytes:
         if content_length == 0:
             raise ValueError("Content-Length header not found or is zero")
 
-        if content_length > size:
-            raise ValueError("Content-Length is larger than the read size")
-
-        return await self.base.read(content_length)
+        data = b""
+        while len(data) < content_length:
+            chunk = await self.base.read(content_length - len(data))
+            data += chunk
+        return data
 
     async def write(self, data: bytes):
         response_headers_str = f"Content-Length: {len(data)}\r\n\r\n"
diff --git a/tests/io/stream_io_test.py b/tests/io/stream_io_test.py
index 667c779..e5be56a 100644
--- a/tests/io/stream_io_test.py
+++ b/tests/io/stream_io_test.py
@@ -175,3 +175,31 @@ async def test_raises_on_missing_content_length(self, stream):
 
         with pytest.raises(ValueError, match="Content-Length"):
             await prefix_io.read(4096)
+
+    @pytest.mark.anyio
+    async def test_large_payload_does_not_raise(self, stream):
+        """Payloads larger than 4096 bytes must be read without raising."""
+        prefix_io = PrefixStreamIO(stream)
+
+        payload = b"x" * 8192
+        header = f"Content-Length: {len(payload)}\r\n\r\n".encode()
+        stream.feed(header + payload)
+
+        result = await prefix_io.read(4096)
+        assert result == payload
+
+    @pytest.mark.anyio
+    async def test_exact_read_of_partial_chunks(self, stream):
+        """All bytes are read even when the underlying stream delivers chunks smaller than content_length."""
+        prefix_io = PrefixStreamIO(stream)
+
+        payload = b"a" * 100
+        header = f"Content-Length: {len(payload)}\r\n\r\n".encode()
+        # Feed header and payload as separate tiny chunks (10 bytes each)
+        full = header + payload
+        for i in range(0, len(full), 10):
+            stream.feed(full[i:i + 10])
+
+        result = await prefix_io.read(4096)
+        assert result == payload
+        assert len(result) == 100

From 7f76f1ba8fb32df78f6acd8a3c81dbde7e47f402 Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Thu, 12 Mar 2026 12:52:42 +0000
Subject: [PATCH 16/22] Updated `AWSRequest` to use `data` instead of `buffer`
 in image upload logic

---
 lf_toolkit/evaluation/image_upload.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lf_toolkit/evaluation/image_upload.py b/lf_toolkit/evaluation/image_upload.py
index dff9233..d0d69f4 100644
--- a/lf_toolkit/evaluation/image_upload.py
+++ b/lf_toolkit/evaluation/image_upload.py
@@ -101,7 +101,7 @@ def get_aws_signed_request(full_url, buffer, mime_type):
     aws_request = AWSRequest(
         method='PUT',
         url=full_url,
-        data=buffer,
+        data=data,
         headers=headers
     )
 

From 7181b62225aca760b41c42b820a7b09fc8ceba9d Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Thu, 12 Mar 2026 13:30:55 +0000
Subject: [PATCH 17/22] Refactored `jsonrpc_handler` to use `Command` type and
 updated parameter handling

---
 lf_toolkit/io/rpc_handler.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lf_toolkit/io/rpc_handler.py b/lf_toolkit/io/rpc_handler.py
index fe3fadb..354ae95 100644
--- a/lf_toolkit/io/rpc_handler.py
+++ b/lf_toolkit/io/rpc_handler.py
@@ -4,6 +4,7 @@
 from jsonrpcserver import Success
 from jsonrpcserver import async_dispatch
 
+from ..shared import Command
 from .handler import Handler
 
 
@@ -23,10 +24,10 @@ async def dispatch(self, req: str) -> str:
         )
 
 
-def jsonrpc_handler(handler: Handler, name: str):
+def jsonrpc_handler(handler: Handler, name: Command):
     async def wrapped(req: dict):
         try:
-            result = await handler.handle(name, req)
+            result = await handler.handle(name, {"params": req})
             return Success(result)
         except Exception as e:
             return Error(0, str(e), e)

From 47badae3312b54857f2f30a7170955ee3de61437 Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Thu, 12 Mar 2026 15:57:49 +0000
Subject: [PATCH 18/22] Updated `StdioServer` to use binary streams for
 stdin/stdout and added subprocess-based test for framed JSON-RPC
 communication

---
 lf_toolkit/io/stdio_server.py |  5 +++--
 tests/io/stream_io_test.py    | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/lf_toolkit/io/stdio_server.py b/lf_toolkit/io/stdio_server.py
index b47ca77..c6d481d 100644
--- a/lf_toolkit/io/stdio_server.py
+++ b/lf_toolkit/io/stdio_server.py
@@ -16,8 +16,8 @@ class StdioClient(StreamIO):
 
     def __init__(self):
         self.stream = StapledByteStream(
-            FileWriteStream(sys.stdout),
-            FileReadStream(sys.stdin),
+            FileWriteStream(sys.stdout.buffer),
+            FileReadStream(sys.stdin.buffer),
         )
 
     async def read(self, size: int) -> bytes:
@@ -43,5 +43,6 @@ def wrap_io(self, client: StreamIO) -> StreamIO:
         return PrefixStreamIO(client)
 
     async def run(self):
+        print("StdioServer started", file=sys.stderr, flush=True)
         self._client = StdioClient()
         await self._handle_client(self._client)
diff --git a/tests/io/stream_io_test.py b/tests/io/stream_io_test.py
index e5be56a..09bb277 100644
--- a/tests/io/stream_io_test.py
+++ b/tests/io/stream_io_test.py
@@ -1,3 +1,6 @@
+import subprocess
+import sys
+
 import pytest
 import anyio
 
@@ -203,3 +206,32 @@ async def test_exact_read_of_partial_chunks(self, stream):
         result = await prefix_io.read(4096)
         assert result == payload
         assert len(result) == 100
+
+
+class TestStdioServerSubprocess:
+
+    def test_binary_pipe_roundtrip(self):
+        """
+        Spawn the StdioServer as a subprocess and pipe a framed JSON-RPC
+        request to its stdin (as raw bytes). Confirms sys.stdin.buffer /
+        sys.stdout.buffer is used — text-mode streams would break this.
+        """
+        msg = b'{"jsonrpc":"2.0","id":1,"method":"eval","params":{}}'
+        frame = f"Content-Length: {len(msg)}\r\n\r\n".encode() + msg
+
+        proc = subprocess.Popen(
+            [sys.executable, "-c",
+             "import anyio; from lf_toolkit.io.stdio_server import StdioServer; "
+             "anyio.run(StdioServer().run)"],
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+
+        stdout, stderr = proc.communicate(input=frame, timeout=5)
+
+        # Must receive a framed response
+        assert b"Content-Length:" in stdout, (
+            f"No framed response received.\nstderr: {stderr.decode()}"
+        )
+        assert b"jsonrpc" in stdout

From e240d70bb1131cedb9b5ee51e2a1244faadf667e Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Fri, 13 Mar 2026 08:23:06 +0000
Subject: [PATCH 19/22] Fixed `StdioServer.write` to flush stdout buffer in a
 separate thread using `anyio.to_thread.run_sync`.

---
 lf_toolkit/io/stdio_server.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lf_toolkit/io/stdio_server.py b/lf_toolkit/io/stdio_server.py
index c6d481d..d35f232 100644
--- a/lf_toolkit/io/stdio_server.py
+++ b/lf_toolkit/io/stdio_server.py
@@ -2,6 +2,7 @@
 
 from typing import Optional
 
+import anyio
 from anyio.streams.file import FileReadStream
 from anyio.streams.file import FileWriteStream
 from anyio.streams.stapled import StapledByteStream
@@ -25,7 +26,7 @@ async def read(self, size: int) -> bytes:
 
     async def write(self, data: bytes):
         await self.stream.send(data)
-        await self.stream.flush()
+        await anyio.to_thread.run_sync(sys.stdout.buffer.flush)
 
     async def close(self):
         await self.stream.aclose()

From f90147c1621c432547aaa3c5fce1616d0bbfe61e Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Fri, 13 Mar 2026 10:14:18 +0000
Subject: [PATCH 20/22] Refactored `StdioServer` and `StdioClient` to pass and
 use captured `stdout` buffer, ensuring isolated binary protocol handling on
 `stdout`.

---
 lf_toolkit/io/stdio_server.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/lf_toolkit/io/stdio_server.py b/lf_toolkit/io/stdio_server.py
index d35f232..7400c17 100644
--- a/lf_toolkit/io/stdio_server.py
+++ b/lf_toolkit/io/stdio_server.py
@@ -1,5 +1,6 @@
 import sys
 
+from typing import BinaryIO
 from typing import Optional
 
 import anyio
@@ -15,9 +16,10 @@
 
 class StdioClient(StreamIO):
 
-    def __init__(self):
+    def __init__(self, stdout_buffer: BinaryIO):
+        self._stdout_buffer = stdout_buffer
         self.stream = StapledByteStream(
-            FileWriteStream(sys.stdout.buffer),
+            FileWriteStream(stdout_buffer),
             FileReadStream(sys.stdin.buffer),
         )
 
@@ -26,7 +28,7 @@ async def read(self, size: int) -> bytes:
 
     async def write(self, data: bytes):
         await self.stream.send(data)
-        await anyio.to_thread.run_sync(sys.stdout.buffer.flush)
+        await anyio.to_thread.run_sync(self._stdout_buffer.flush)
 
     async def close(self):
         await self.stream.aclose()
@@ -35,15 +37,20 @@ async def close(self):
 class StdioServer(StreamServer):
 
     _client: StdioClient
+    _stdout_buffer: BinaryIO
 
     def __init__(self, handler: Optional[Handler] = None):
         super().__init__(handler)
-
+        # Capture the real stdout buffer before redirecting sys.stdout.
+        # Any print() in user code after this point goes to stderr,
+        # keeping the binary Content-Length-framed protocol on fd 1 clean.
+        self._stdout_buffer = sys.stdout.buffer
+        sys.stdout = sys.stderr
 
     def wrap_io(self, client: StreamIO) -> StreamIO:
         return PrefixStreamIO(client)
 
     async def run(self):
         print("StdioServer started", file=sys.stderr, flush=True)
-        self._client = StdioClient()
-        await self._handle_client(self._client)
+        self._client = StdioClient(self._stdout_buffer)
+        await self._handle_client(self._client)
\ No newline at end of file

From 1c71198bbe61b9c108cdadc560847eda820de05d Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Fri, 13 Mar 2026 10:14:25 +0000
Subject: [PATCH 21/22] Renamed `file_server.py` to `file_server_test.py` for
 clarity and consistency in test file naming.

---
 tests/io/{file_server.py => file_server_test.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/io/{file_server.py => file_server_test.py} (100%)

diff --git a/tests/io/file_server.py b/tests/io/file_server_test.py
similarity index 100%
rename from tests/io/file_server.py
rename to tests/io/file_server_test.py

From 140cf493bd3ab32aa379953fd4ab940d45cb1ecd Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Fri, 13 Mar 2026 13:12:53 +0000
Subject: [PATCH 22/22] Removed explicit `stdout_buffer` handling in
 `StdioServer` and `StdioClient`, directly using `sys.stdout.buffer` for
 simplicity and consistency.

---
 lf_toolkit/io/stdio_server.py | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/lf_toolkit/io/stdio_server.py b/lf_toolkit/io/stdio_server.py
index 7400c17..3d6d567 100644
--- a/lf_toolkit/io/stdio_server.py
+++ b/lf_toolkit/io/stdio_server.py
@@ -1,6 +1,5 @@
 import sys
 
-from typing import BinaryIO
 from typing import Optional
 
 import anyio
@@ -16,10 +15,10 @@
 
 class StdioClient(StreamIO):
 
-    def __init__(self, stdout_buffer: BinaryIO):
-        self._stdout_buffer = stdout_buffer
+    def __init__(self):
+        self._stdout_buffer = sys.stdout.buffer
         self.stream = StapledByteStream(
-            FileWriteStream(stdout_buffer),
+            FileWriteStream(self._stdout_buffer),
             FileReadStream(sys.stdin.buffer),
         )
 
@@ -37,20 +36,14 @@ async def close(self):
 class StdioServer(StreamServer):
 
     _client: StdioClient
-    _stdout_buffer: BinaryIO
 
     def __init__(self, handler: Optional[Handler] = None):
         super().__init__(handler)
-        # Capture the real stdout buffer before redirecting sys.stdout.
-        # Any print() in user code after this point goes to stderr,
-        # keeping the binary Content-Length-framed protocol on fd 1 clean.
-        self._stdout_buffer = sys.stdout.buffer
-        sys.stdout = sys.stderr
 
     def wrap_io(self, client: StreamIO) -> StreamIO:
         return PrefixStreamIO(client)
 
     async def run(self):
         print("StdioServer started", file=sys.stderr, flush=True)
-        self._client = StdioClient(self._stdout_buffer)
-        await self._handle_client(self._client)
\ No newline at end of file
+        self._client = StdioClient()
+        await self._handle_client(self._client)