import asyncio from autogen_agentchat.agents import AssistantAgent from autogen_agentchat.messages import MultiModalMessage, TextMessage from autogen_core import Image from autogen_core.models import ModelFamily from autogen_ext.models.ollama import OllamaChatCompletionClient from . import config from .mcp_tools import load_mcp_tools class AvatarAgentService: def __init__(self) -> None: self._model_client = OllamaChatCompletionClient( model=config.OLLAMA_MODEL, model_info={ "vision": True, "function_calling": True, "json_output": True, "family": ModelFamily.UNKNOWN, "structured_output": True, }, ) self._agent: AssistantAgent | None = None self._agent_lock = asyncio.Lock() async def _create_agent(self) -> AssistantAgent: tools = await load_mcp_tools() return AssistantAgent( name="avatar", model_client=self._model_client, system_message=config.SYSTEM_MESSAGE, tools=tools or None, reflect_on_tool_use=bool(tools), ) async def _get_agent(self) -> AssistantAgent: if self._agent is not None: return self._agent async with self._agent_lock: if self._agent is None: self._agent = await self._create_agent() return self._agent async def reply(self, user_text: str, image_b64: str) -> str: agent = await self._get_agent() user_image = Image.from_base64(image_b64) multimodal_task = MultiModalMessage(source="user", content=[user_text, user_image]) ai_response = "" async for message in agent.run_stream(task=multimodal_task): if isinstance(message, TextMessage) and message.source == "avatar": ai_response = message.content return ai_response