57 lines
1.9 KiB
Python
57 lines
1.9 KiB
Python
import asyncio
|
|
|
|
from autogen_agentchat.agents import AssistantAgent
|
|
from autogen_agentchat.messages import MultiModalMessage, TextMessage
|
|
from autogen_core import Image
|
|
from autogen_core.models import ModelFamily
|
|
from autogen_ext.models.ollama import OllamaChatCompletionClient
|
|
|
|
from . import config
|
|
from .mcp_tools import load_mcp_tools
|
|
|
|
|
|
class AvatarAgentService:
|
|
def __init__(self) -> None:
|
|
self._model_client = OllamaChatCompletionClient(
|
|
model=config.OLLAMA_MODEL,
|
|
model_info={
|
|
"vision": True,
|
|
"function_calling": True,
|
|
"json_output": True,
|
|
"family": ModelFamily.UNKNOWN,
|
|
"structured_output": True,
|
|
},
|
|
)
|
|
self._agent: AssistantAgent | None = None
|
|
self._agent_lock = asyncio.Lock()
|
|
|
|
async def _create_agent(self) -> AssistantAgent:
|
|
tools = await load_mcp_tools()
|
|
return AssistantAgent(
|
|
name="avatar",
|
|
model_client=self._model_client,
|
|
system_message=config.SYSTEM_MESSAGE,
|
|
tools=tools or None,
|
|
reflect_on_tool_use=bool(tools),
|
|
)
|
|
|
|
async def _get_agent(self) -> AssistantAgent:
|
|
if self._agent is not None:
|
|
return self._agent
|
|
async with self._agent_lock:
|
|
if self._agent is None:
|
|
self._agent = await self._create_agent()
|
|
return self._agent
|
|
|
|
async def reply(self, user_text: str, image_b64: str) -> str:
|
|
agent = await self._get_agent()
|
|
user_image = Image.from_base64(image_b64)
|
|
multimodal_task = MultiModalMessage(source="user", content=[user_text, user_image])
|
|
|
|
ai_response = ""
|
|
async for message in agent.run_stream(task=multimodal_task):
|
|
if isinstance(message, TextMessage) and message.source == "avatar":
|
|
ai_response = message.content
|
|
|
|
return ai_response
|