feat: 添加天气、网页搜索mcp

2026-03-05 18:45:04 +08:00
parent 9375dc29ee
commit 5b2573c0ca
12 changed files with 313 additions and 2 deletions
--- a/index.html
+++ b/index.html
@@ -3,7 +3,7 @@
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>VLM 虚拟主播</title>
+    <title>VLM </title>
    <style>
        body { font-family: sans-serif; display: flex; flex-direction: column; align-items: center; background-color: #f0f2f5; margin-top: 50px; }
        .container { display: flex; gap: 20px; }
@@ -21,7 +21,7 @@
 </head>
 <body>

-    <h2>🤖 多模态虚拟主播 (VLM)</h2>
+    <h2>🤖 VLM</h2>
    
    <div class="container">
        <div class="video-box">
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,23 @@
+# FastAPI 和服务器
+fastapi>=0.104.0
+uvicorn[standard]>=0.24.0
+websockets>=12.0
+
+# AutoGen 相关
+autogen-agentchat>=0.4.0
+autogen-core>=0.4.0
+autogen-ext[ollama]>=0.4.0
+
+# 语音处理
+openai-whisper>=20231117
+pyttsx3>=2.90
+edge-tts>=6.1.9
+
+# MCP 工具依赖
+httpx>=0.25.0
+beautifulsoup4>=4.12.0
+lxml>=5.0.0
+
+# 其他工具
+python-multipart>=0.0.6
+pydantic>=2.5.0
--- a/server/pycache/agent_service.cpython-312.pyc
+++ b/server/pycache/agent_service.cpython-312.pyc
--- a/server/agent_service.py
+++ b/server/agent_service.py
@@ -5,6 +5,7 @@ from autogen_core.models import ModelFamily
 from autogen_ext.models.ollama import OllamaChatCompletionClient

 from . import config
+from .mcp_tools import get_weather, web_search


 class AvatarAgentService:
@@ -19,10 +20,18 @@ class AvatarAgentService:
                "structured_output": True,
            },
        )
+
+        # 定义可用的 MCP 工具
+        tools = [
+            get_weather,
+            web_search,
+        ]
+
        self._agent = AssistantAgent(
            name="avatar",
            model_client=model_client,
            system_message=config.SYSTEM_MESSAGE,
+            tools=tools,
        )

    async def reply(self, user_text: str, image_b64: str) -> str:
--- a/server/mcp_tools/init.py
+++ b/server/mcp_tools/init.py
@@ -0,0 +1,6 @@
+"""MCP 工具模块"""
+
+from .weather import get_weather
+from .web_search import web_search
+
+__all__ = ["get_weather", "web_search"]
--- a/server/mcp_tools/pycache/init.cpython-312.pyc
+++ b/server/mcp_tools/pycache/init.cpython-312.pyc
--- a/server/mcp_tools/pycache/weather.cpython-312.pyc
+++ b/server/mcp_tools/pycache/weather.cpython-312.pyc
--- a/server/mcp_tools/pycache/web_search.cpython-312.pyc
+++ b/server/mcp_tools/pycache/web_search.cpython-312.pyc
--- a/server/mcp_tools/weather.py
+++ b/server/mcp_tools/weather.py
@@ -0,0 +1,92 @@
+"""天气查询工具"""
+import httpx
+from typing import Optional
+
+
+async def get_weather(
+    city: str,
+    units: str = "metric",
+    language: str = "zh_cn"
+) -> dict:
+    """
+    获取指定城市的天气信息
+
+    Args:
+        city: 城市名称，支持中文城市名（如"北京"、"上海"）或英文城市名（如"Beijing"、"London"）
+        units: 单位系统，"metric"（摄氏度）或 "imperial"（华氏度），默认为 "metric"
+        language: 返回结果的语言，默认为中文（zh_cn）
+
+    Returns:
+        dict: 包含天气信息的字典，包括：
+            - city: 城市名称
+            - temperature: 当前温度
+            - description: 天气描述
+            - humidity: 湿度百分比
+            - wind_speed: 风<><E9A38E>
+            - feels_like: 体感温度
+            - temp_min: 最低温度
+            - temp_max: 最高温度
+
+    Example:
+        >>> result = await get_weather("北京")
+        >>> print(result['temperature'])
+        15
+
+        >>> result = await get_weather("Tokyo", units="metric")
+        >>> print(result['description'])
+        晴朗
+    """
+    # 使用 OpenWeatherMap API 的免费端点
+    # 注意：实际使用需要申请 API key，这里使用 wttr.in 作为免费替代
+    base_url = "https://wttr.in"
+
+    params = {
+        "format": "j1",  # JSON 格式
+        "lang": language,
+    }
+
+    # 处理单位系统
+    if units == "imperial":
+        params["u"] = "f"  # 华氏度
+    else:
+        params["u"] = "c"  # 摄氏度（默认）
+
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            response = await client.get(f"{base_url}/{city}", params=params)
+            response.raise_for_status()
+            data = response.json()
+
+            # 解析当前天气
+            current = data.get("current_condition", [{}])[0]
+
+            # 解析位置信息
+            location = data.get("nearest_area", [{}])[0]
+
+            return {
+                "city": location.get("areaName", [{}])[0].get("value", city),
+                "country": location.get("country", [{}])[0].get("value", ""),
+                "temperature": float(current.get("temp_C", 0) if units == "metric" else current.get("temp_F", 0)),
+                "description": current.get("weatherDesc", [{}])[0].get("value", "未知"),
+                "humidity": int(current.get("humidity", 0)),
+                "wind_speed": float(current.get("windspeedKmph", 0)),
+                "feels_like": float(current.get("FeelsLikeC", 0) if units == "metric" else current.get("FeelsLikeF", 0)),
+                "temp_min": float(current.get("temp_C", 0) if units == "metric" else current.get("temp_F", 0)),  # wttr.in 不提供 min/max，使用当前温度
+                "temp_max": float(current.get("temp_C", 0) if units == "metric" else current.get("temp_F", 0)),
+                "uv_index": int(current.get("uvIndex", 0)),
+            }
+    except httpx.TimeoutException:
+        return {
+            "error": "请求超时，请稍后重试",
+            "city": city
+        }
+    except httpx.HTTPError as e:
+        return {
+            "error": f"获取天气信息失败: {str(e)}",
+            "city": city
+        }
+    except (KeyError, IndexError, ValueError) as e:
+        return {
+            "error": f"解析天气数据失败: {str(e)}",
+            "city": city
+        }
--- a/server/mcp_tools/web_search.py
+++ b/server/mcp_tools/web_search.py
@@ -0,0 +1,181 @@
+"""网络搜索工具"""
+import httpx
+from typing import List, Dict, Optional
+
+
+async def web_search(
+    query: str,
+    num_results: int = 10,
+    language: str = "zh-CN"
+) -> dict:
+    """
+    执行网络搜索并返回结果
+
+    Args:
+        query: 搜索查询关键词
+        num_results: 返回结果数量，默认为 10，最多 20
+        language: 搜索语言，默认为中文（zh-CN），也支持 "en-US" 等
+
+    Returns:
+        dict: 包含搜索结果的字典，包括：
+            - query: 搜索关键词
+            - total_results: 估计的总结果数
+            - results: 结果列表，每个结果包含：
+                - title: 网页标题
+                - url: 网页链接
+                - snippet: 网页摘要
+                - source: 来源网站
+
+    Example:
+        >>> result = await web_search("Python 异步编程")
+        >>> for item in result['results']:
+        ...     print(f"{item['title']}: {item['url']}")
+
+        >>> result = await web_search("latest AI news", num_results=5)
+        >>> print(len(result['results']))
+        5
+    """
+    # 限制结果数量
+    num_results = min(max(1, num_results), 20)
+
+    # 使用 DuckDuckGo HTML 版本进行搜索（免费，无需 API key）
+    base_url = "https://html.duckduckgo.com/html/"
+
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+    }
+
+    params = {
+        "q": query,
+        "kl": language,
+    }
+
+    try:
+        async with httpx.AsyncClient(timeout=15.0) as client:
+            response = await client.post(base_url, data=params, headers=headers)
+            response.raise_for_status()
+
+            # 解析 HTML 结果
+            from bs4 import BeautifulSoup
+            soup = BeautifulSoup(response.text, 'html.parser')
+
+            results = []
+            result_divs = soup.find_all('div', class_='result')
+
+            for div in result_divs[:num_results]:
+                try:
+                    # 提取标题和链接
+                    title_elem = div.find('a', class_='result__a')
+                    if not title_elem:
+                        continue
+
+                    title = title_elem.get_text(strip=True)
+                    url = title_elem.get('href', '')
+
+                    # 提取摘要
+                    snippet_elem = div.find('a', class_='result__snippet')
+                    snippet = snippet_elem.get_text(strip=True) if snippet_elem else ""
+
+                    # 提取来源
+                    source_elem = div.find('span', class_='result__url')
+                    source = source_elem.get_text(strip=True) if source_elem else ""
+
+                    results.append({
+                        "title": title,
+                        "url": url,
+                        "snippet": snippet,
+                        "source": source,
+                    })
+                except Exception:
+                    continue
+
+            return {
+                "query": query,
+                "total_results": len(results),
+                "results": results,
+            }
+
+    except httpx.TimeoutException:
+        return {
+            "error": "请求超时，请稍后重试",
+            "query": query,
+            "results": []
+        }
+    except httpx.HTTPError as e:
+        return {
+            "error": f"搜索失败: {str(e)}",
+            "query": query,
+            "results": []
+        }
+    except Exception as e:
+        return {
+            "error": f"解析搜索结果失败: {str(e)}",
+            "query": query,
+            "results": []
+        }
+
+
+async def search_url(url: str) -> dict:
+    """
+    获取指定 URL 的网页内容摘要
+
+    Args:
+        url: 要获取的网页 URL
+
+    Returns:
+        dict: 包含网页内容的字典，包括：
+            - url: 网页 URL
+            - title: 网页标题
+            - content: 网页正文内容（纯文本）
+            - error: 错误信息（如果失败）
+
+    Example:
+        >>> result = await search_url("https://example.com")
+        >>> print(result['title'])
+        Example Domain
+    """
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+    }
+
+    try:
+        async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
+            response = await client.get(url, headers=headers)
+            response.raise_for_status()
+
+            # 解析 HTML
+            from bs4 import BeautifulSoup
+            soup = BeautifulSoup(response.text, 'html.parser')
+
+            # 提取标题
+            title = soup.find('title')
+            title_text = title.get_text(strip=True) if title else ""
+
+            # 提取主要内容（移除脚本和样式）
+            for script in soup(["script", "style"]):
+                script.decompose()
+
+            # 获取正文
+            content = soup.get_text(separator='\n', strip=True)
+
+            return {
+                "url": url,
+                "title": title_text,
+                "content": content[:5000],  # 限制长度
+            }
+
+    except httpx.TimeoutException:
+        return {
+            "error": "请求超时",
+            "url": url
+        }
+    except httpx.HTTPError as e:
+        return {
+            "error": f"获取网页失败: {str(e)}",
+            "url": url
+        }
+    except Exception as e:
+        return {
+            "error": f"解析网页失败: {str(e)}",
+            "url": url
+        }
--- a/temp_audio.webm
+++ b/temp_audio.webm
--- a/test_audio.webm
+++ b/test_audio.webm