feat: 添加天气、网页搜索mcp
This commit is contained in:
181
server/mcp_tools/web_search.py
Normal file
181
server/mcp_tools/web_search.py
Normal file
@@ -0,0 +1,181 @@
|
||||
"""网络搜索工具"""
|
||||
import httpx
|
||||
from typing import List, Dict, Optional
|
||||
|
||||
|
||||
async def web_search(
|
||||
query: str,
|
||||
num_results: int = 10,
|
||||
language: str = "zh-CN"
|
||||
) -> dict:
|
||||
"""
|
||||
执行网络搜索并返回结果
|
||||
|
||||
Args:
|
||||
query: 搜索查询关键词
|
||||
num_results: 返回结果数量,默认为 10,最多 20
|
||||
language: 搜索语言,默认为中文(zh-CN),也支持 "en-US" 等
|
||||
|
||||
Returns:
|
||||
dict: 包含搜索结果的字典,包括:
|
||||
- query: 搜索关键词
|
||||
- total_results: 估计的总结果数
|
||||
- results: 结果列表,每个结果包含:
|
||||
- title: 网页标题
|
||||
- url: 网页链接
|
||||
- snippet: 网页摘要
|
||||
- source: 来源网站
|
||||
|
||||
Example:
|
||||
>>> result = await web_search("Python 异步编程")
|
||||
>>> for item in result['results']:
|
||||
... print(f"{item['title']}: {item['url']}")
|
||||
|
||||
>>> result = await web_search("latest AI news", num_results=5)
|
||||
>>> print(len(result['results']))
|
||||
5
|
||||
"""
|
||||
# 限制结果数量
|
||||
num_results = min(max(1, num_results), 20)
|
||||
|
||||
# 使用 DuckDuckGo HTML 版本进行搜索(免费,无需 API key)
|
||||
base_url = "https://html.duckduckgo.com/html/"
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||
}
|
||||
|
||||
params = {
|
||||
"q": query,
|
||||
"kl": language,
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=15.0) as client:
|
||||
response = await client.post(base_url, data=params, headers=headers)
|
||||
response.raise_for_status()
|
||||
|
||||
# 解析 HTML 结果
|
||||
from bs4 import BeautifulSoup
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
results = []
|
||||
result_divs = soup.find_all('div', class_='result')
|
||||
|
||||
for div in result_divs[:num_results]:
|
||||
try:
|
||||
# 提取标题和链接
|
||||
title_elem = div.find('a', class_='result__a')
|
||||
if not title_elem:
|
||||
continue
|
||||
|
||||
title = title_elem.get_text(strip=True)
|
||||
url = title_elem.get('href', '')
|
||||
|
||||
# 提取摘要
|
||||
snippet_elem = div.find('a', class_='result__snippet')
|
||||
snippet = snippet_elem.get_text(strip=True) if snippet_elem else ""
|
||||
|
||||
# 提取来源
|
||||
source_elem = div.find('span', class_='result__url')
|
||||
source = source_elem.get_text(strip=True) if source_elem else ""
|
||||
|
||||
results.append({
|
||||
"title": title,
|
||||
"url": url,
|
||||
"snippet": snippet,
|
||||
"source": source,
|
||||
})
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return {
|
||||
"query": query,
|
||||
"total_results": len(results),
|
||||
"results": results,
|
||||
}
|
||||
|
||||
except httpx.TimeoutException:
|
||||
return {
|
||||
"error": "请求超时,请稍后重试",
|
||||
"query": query,
|
||||
"results": []
|
||||
}
|
||||
except httpx.HTTPError as e:
|
||||
return {
|
||||
"error": f"搜索失败: {str(e)}",
|
||||
"query": query,
|
||||
"results": []
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"error": f"解析搜索结果失败: {str(e)}",
|
||||
"query": query,
|
||||
"results": []
|
||||
}
|
||||
|
||||
|
||||
async def search_url(url: str) -> dict:
|
||||
"""
|
||||
获取指定 URL 的网页内容摘要
|
||||
|
||||
Args:
|
||||
url: 要获取的网页 URL
|
||||
|
||||
Returns:
|
||||
dict: 包含网页内容的字典,包括:
|
||||
- url: 网页 URL
|
||||
- title: 网页标题
|
||||
- content: 网页正文内容(纯文本)
|
||||
- error: 错误信息(如果失败)
|
||||
|
||||
Example:
|
||||
>>> result = await search_url("https://example.com")
|
||||
>>> print(result['title'])
|
||||
Example Domain
|
||||
"""
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
|
||||
response = await client.get(url, headers=headers)
|
||||
response.raise_for_status()
|
||||
|
||||
# 解析 HTML
|
||||
from bs4 import BeautifulSoup
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# 提取标题
|
||||
title = soup.find('title')
|
||||
title_text = title.get_text(strip=True) if title else ""
|
||||
|
||||
# 提取主要内容(移除脚本和样式)
|
||||
for script in soup(["script", "style"]):
|
||||
script.decompose()
|
||||
|
||||
# 获取正文
|
||||
content = soup.get_text(separator='\n', strip=True)
|
||||
|
||||
return {
|
||||
"url": url,
|
||||
"title": title_text,
|
||||
"content": content[:5000], # 限制长度
|
||||
}
|
||||
|
||||
except httpx.TimeoutException:
|
||||
return {
|
||||
"error": "请求超时",
|
||||
"url": url
|
||||
}
|
||||
except httpx.HTTPError as e:
|
||||
return {
|
||||
"error": f"获取网页失败: {str(e)}",
|
||||
"url": url
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"error": f"解析网页失败: {str(e)}",
|
||||
"url": url
|
||||
}
|
||||
Reference in New Issue
Block a user