|
14 | 14 | from io import BytesIO |
15 | 15 | import aiohttp |
16 | 16 | import PIL.Image |
| 17 | +import base64 |
17 | 18 |
|
18 | 19 | # Import LangChain components with Vertex AI |
19 | 20 | from langchain_google_vertexai import ChatVertexAI |
@@ -136,21 +137,31 @@ def generate_vision_with_langchain(img, prompt): |
136 | 137 | """ |
137 | 138 | Generate a image vision result using LangChain with Vertex AI model. |
138 | 139 | """ |
139 | | - # Convert PIL Image to bytes |
| 140 | + # Convert PIL Image to base64 encoded string |
140 | 141 | img_byte_arr = BytesIO() |
141 | 142 | img.save(img_byte_arr, format=img.format or 'JPEG') |
142 | 143 | img_bytes = img_byte_arr.getvalue() |
| 144 | + base64_image = base64.b64encode(img_bytes).decode('utf-8') |
143 | 145 |
|
144 | | - # Create a message with both text and image using proper Vertex AI format |
| 146 | + # Create a message with both text and image using the correct Vertex AI format |
145 | 147 | message = HumanMessage( |
146 | 148 | content=[ |
147 | 149 | {"type": "text", "text": prompt}, |
148 | | - # Use blob for binary data |
149 | | - {"type": "image_url", "image_url": {"blob": img_bytes}} |
| 150 | + { |
| 151 | + "type": "image", |
| 152 | + "source": { |
| 153 | + "type": "base64", |
| 154 | + "media_type": "image/jpeg", |
| 155 | + "data": base64_image |
| 156 | + } |
| 157 | + } |
150 | 158 | ] |
151 | 159 | ) |
152 | 160 |
|
153 | | - # Call the vision model |
154 | | - response = vision_model.invoke([message]) |
155 | | - |
156 | | - return response.content |
| 161 | + try: |
| 162 | + # Call the vision model |
| 163 | + response = vision_model.invoke([message]) |
| 164 | + return response.content |
| 165 | + except Exception as e: |
| 166 | + print(f"Error in vision model: {str(e)}") |
| 167 | + return f"I encountered an error processing this image: {str(e)}" |
0 commit comments