diff --git a/flowllm/core/enumeration/__init__.py b/flowllm/core/enumeration/__init__.py index f765040..e234c5c 100644 --- a/flowllm/core/enumeration/__init__.py +++ b/flowllm/core/enumeration/__init__.py @@ -1,12 +1,14 @@ """Core enumeration module.""" from .chunk_enum import ChunkEnum +from .content_block_type import ContentBlockType from .http_enum import HttpEnum from .registry_enum import RegistryEnum from .role import Role __all__ = [ "ChunkEnum", + "ContentBlockType", "HttpEnum", "RegistryEnum", "Role", diff --git a/flowllm/core/enumeration/content_block_type.py b/flowllm/core/enumeration/content_block_type.py new file mode 100644 index 0000000..cc9a42f --- /dev/null +++ b/flowllm/core/enumeration/content_block_type.py @@ -0,0 +1,12 @@ +"""Content block type enumeration for multimodal content.""" + +from enum import Enum + + +class ContentBlockType(str, Enum): + """Enumeration of content block types in multimodal responses.""" + + TEXT = "text" + IMAGE_URL = "image_url" + AUDIO = "audio" + VIDEO = "video" diff --git a/flowllm/core/llm/lite_llm.py b/flowllm/core/llm/lite_llm.py index 62c1ecc..24f2962 100644 --- a/flowllm/core/llm/lite_llm.py +++ b/flowllm/core/llm/lite_llm.py @@ -110,7 +110,8 @@ def stream_chat( **self.kwargs, **kwargs, } - logger.info(f"LiteLLM.stream_chat: {chat_kwargs}") + log_kwargs = {k: v for k, v in chat_kwargs.items() if k != "messages"} + logger.info(f"LiteLLM.stream_chat: {log_kwargs}") for i in range(self.max_retries): try: @@ -216,7 +217,8 @@ async def astream_chat( **self.kwargs, **kwargs, } - logger.info(f"LiteLLM.astream_chat: {chat_kwargs}") + log_kwargs = {k: v for k, v in chat_kwargs.items() if k != "messages"} + logger.info(f"LiteLLM.astream_chat: {log_kwargs}") for i in range(self.max_retries): try: diff --git a/flowllm/core/llm/openai_compatible_llm.py b/flowllm/core/llm/openai_compatible_llm.py index 5f28088..22ec71e 100644 --- a/flowllm/core/llm/openai_compatible_llm.py +++ b/flowllm/core/llm/openai_compatible_llm.py @@ -106,7 +106,8 @@ def stream_chat( **self.kwargs, **kwargs, } - logger.info(f"OpenAICompatibleLLM.stream_chat: {chat_kwargs}") + log_kwargs = {k: v for k, v in chat_kwargs.items() if k != "messages"} + logger.info(f"OpenAICompatibleLLM.stream_chat: {log_kwargs}") for i in range(self.max_retries): try: @@ -207,7 +208,8 @@ async def astream_chat( **self.kwargs, **kwargs, } - logger.info(f"OpenAICompatibleLLM.astream_chat: {chat_kwargs}") + log_kwargs = {k: v for k, v in chat_kwargs.items() if k != "messages"} + logger.info(f"OpenAICompatibleLLM.astream_chat: {log_kwargs}") for i in range(self.max_retries): try: diff --git a/flowllm/core/schema/message.py b/flowllm/core/schema/message.py index 20f0680..2587ec5 100644 --- a/flowllm/core/schema/message.py +++ b/flowllm/core/schema/message.py @@ -6,7 +6,7 @@ from pydantic import BaseModel, ConfigDict, Field, model_validator from .tool_call import ToolCall -from ..enumeration import Role +from ..enumeration import Role, ContentBlockType class ContentBlock(BaseModel): @@ -35,7 +35,7 @@ class ContentBlock(BaseModel): model_config = ConfigDict(extra="allow") - type: str = Field(default="") + type: ContentBlockType = Field(default="") content: str | dict | list = Field(default="") @model_validator(mode="before") @@ -44,14 +44,15 @@ def init_block(cls, data: dict): """Initialize content block by extracting content based on type field.""" result = data.copy() content_type = data.get("type", "") - result["content"] = data[content_type] + if content_type and content_type in data: + result["content"] = data[content_type] return result def simple_dump(self) -> dict: """Convert ContentBlock to a simple dictionary format.""" result = { - "type": self.type, - self.type: self.content, + "type": self.type.value, + self.type.value: self.content, **self.model_extra, } diff --git a/tests/test_vision_audio.py b/tests/test_vision_audio.py index 1da70f4..3decc9a 100644 --- a/tests/test_vision_audio.py +++ b/tests/test_vision_audio.py @@ -14,7 +14,7 @@ def encode_image(image_path): return base64.b64encode(image_file.read()).decode("utf-8") # 将xxxx/eagle.png替换为你本地图像的绝对路径 - base64_image = encode_image("/Users/yuli/workspace/ReMe/20251128144329.jpg") + base64_image = encode_image("/Users/yuli/Documents/20251128144329.jpg") llm = OpenAICompatibleLLM(model_name="qwen3-vl-plus") messages = [ @@ -50,7 +50,7 @@ def encode_audio(audio_path): return base64.b64encode(audio_file.read()).decode("utf-8") # 请将 ABSOLUTE_PATH/welcome.mp3 替换为本地音频的绝对路径 - audio_file_path = "/Users/yuli/workspace/ReMe/recordings/111.wav" + audio_file_path = "/Users/yuli/Documents/111.wav" base64_audio = encode_audio(audio_file_path) messages = [