使用 Ray Serve 部署多个 MCP 服务#
本教程将使用 Ray Serve 部署两个 MCP 服务—Brave Search 和 Fetch—,并利用自动伸缩、CPU 分配以及无缝多服务路由等功能。
结合 Anyscale,这种设置允许您以最小的开销运行生产级服务,按需自动配置计算资源,并在不停机的情况下部署更新。无论您是扩展单个模型还是在多个模型之间路由,这种模式都提供了一条清晰、可扩展的部署路径。
添加更多 MCP 服务也非常简单—只需为每个新服务调用 build_mcp_deployment 并将其绑定到路由器中即可。
以下架构图说明了使用 Ray Serve 部署多个 MCP Docker 镜像

先决条件#
Ray [Serve],已包含在基础 Docker 镜像中
Podman
在您的环境中设置的 Brave API 密钥 (
BRAVE_API_KEY)MCP Python 库。
依赖项#
为 Ray Serve 部署构建 Docker 镜像
在本教程中,您需要使用 本代码库中包含的 Dockerfile 为 Anyscale 上的部署构建 Docker 镜像。
原因是,当您从工作区终端运行 apt-get install -y podman(例如安装系统软件包)时,它仅存在于 Ray 头节点中,而不会传播到您的 Ray 工作节点。
构建 Docker 镜像后,请导航到工作区的 **Dependencies** 选项卡,选择您刚刚创建的相应镜像,并设置 **BRAVE_API_KEY** 环境变量。
注意 此 Docker 镜像仅用于使用 Ray Serve 部署 MCP。请确保您的 MCP Docker 镜像(如 docker.io/mcp/brave-search)已发布到您自己的私有注册表或公共注册表。
常见问题#
FileNotFoundError: [Errno 2] No such file or directory
通常表示 Podman 未正确安装。请验证 Podman 的安装。
KeyError: ‘BRAVE_API_KEY’
请确保您已在环境中导出 BRAVE_API_KEY 或将其包含在您的依赖项配置中。
1. 创建部署文件#
将以下代码保存为 multi_mcp_ray_serve.py
import asyncio
import logging
import os
from contextlib import AsyncExitStack
from typing import Any, Dict, List, Optional
from fastapi import FastAPI, HTTPException, Request
from ray import serve
from ray.serve.handle import DeploymentHandle
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
logger = logging.getLogger("multi_mcp_serve")
def _podman_args(
image: str,
*,
extra_args: Optional[List[str]] = None,
env: Optional[Dict[str, str]] = None,
) -> List[str]:
args = ["run", "-i", "--rm"]
for key, value in (env or {}).items():
if key.upper() == "PATH":
continue
args += ["-e", f"{key}={value}"]
if extra_args:
args += extra_args
args.append(image)
return args
class _BaseMCP:
_PODMAN_ARGS: List[str] = []
_ENV: Dict[str, str] = {}
def __init__(self):
self._ready = asyncio.create_task(self._startup())
async def _startup(self):
params = StdioServerParameters(
command="podman",
args=self._PODMAN_ARGS,
env=self._ENV,
)
self._stack = AsyncExitStack()
stdin, stdout = await self._stack.enter_async_context(stdio_client(params))
self.session = await self._stack.enter_async_context(ClientSession(stdin, stdout))
await self.session.initialize()
logger.info("%s replica ready", type(self).__name__)
async def _ensure_ready(self):
await self._ready
async def list_tools(self) -> List[Dict[str, Any]]:
await self._ensure_ready()
resp = await self.session.list_tools()
return [
{"name": t.name, "description": t.description, "input_schema": t.inputSchema}
for t in resp.tools
]
async def call_tool(self, tool_name: str, tool_args: Dict[str, Any]) -> Any:
await self._ensure_ready()
return await self.session.call_tool(tool_name, tool_args)
async def __del__(self):
if hasattr(self, "_stack"):
await self._stack.aclose()
def build_mcp_deployment(
*,
name: str,
docker_image: str,
num_replicas: int = 3,
num_cpus: float = 0.5,
autoscaling_config: Optional[Dict[str, Any]] = None,
server_command: Optional[str] = None,
extra_podman_args: Optional[List[str]] = None,
env: Optional[Dict[str, str]] = None,
) -> serve.Deployment:
"""
- If autoscaling_config is provided, Ray Serve will autoscale between
autoscaling_config['min_replicas'] and ['max_replicas'].
- Otherwise it will launch `num_replicas` fixed replicas.
"""
deployment_env = env or {}
podman_args = _podman_args(docker_image, extra_args=extra_podman_args, env=deployment_env)
if server_command:
podman_args.append(server_command)
# Build kwargs for the decorator:
deploy_kwargs: Dict[str, Any] = {
"name": name,
"ray_actor_options": {"num_cpus": num_cpus},
}
if autoscaling_config:
deploy_kwargs["autoscaling_config"] = autoscaling_config
else:
deploy_kwargs["num_replicas"] = num_replicas
@serve.deployment(**deploy_kwargs)
class MCP(_BaseMCP):
_PODMAN_ARGS = podman_args
_ENV = deployment_env
return MCP
# -------------------------
# HTTP router code
# -------------------------
api = FastAPI()
@serve.deployment
@serve.ingress(api)
class Router:
def __init__(self,
brave_search: DeploymentHandle,
fetch: DeploymentHandle) -> None:
self._mcps = {"brave_search": brave_search, "fetch": fetch}
@api.get("/{mcp_name}/tools")
async def list_tools_http(self, mcp_name: str):
handle = self._mcps.get(mcp_name)
if not handle:
raise HTTPException(404, f"MCP {mcp_name} not found")
try:
return {"tools": await handle.list_tools.remote()}
except Exception as exc:
logger.exception("Listing tools failed")
raise HTTPException(500, str(exc))
@api.post("/{mcp_name}/call")
async def call_tool_http(self, mcp_name: str, request: Request):
handle = self._mcps.get(mcp_name)
if not handle:
raise HTTPException(404, f"MCP {mcp_name} not found")
body = await request.json()
tool_name = body.get("tool_name")
tool_args = body.get("tool_args")
if tool_name is None or tool_args is None:
raise HTTPException(400, "Missing 'tool_name' or 'tool_args'")
try:
result = await handle.call_tool.remote(tool_name, tool_args)
return {"result": result}
except Exception as exc:
logger.exception("Tool call failed")
raise HTTPException(500, str(exc))
# -------------------------
# Binding deployments
# -------------------------
if "BRAVE_API_KEY" not in os.environ:
raise RuntimeError("BRAVE_API_KEY must be set before `serve run`.")
# Example: autoscaling BraveSearch between 1 and 5 replicas,
# targeting ~10 concurrent requests per replica.
BraveSearch = build_mcp_deployment(
name="brave_search",
docker_image="docker.io/mcp/brave-search",
env={"BRAVE_API_KEY": os.environ["BRAVE_API_KEY"]},
num_cpus=0.2,
autoscaling_config={
"min_replicas": 1,
"max_replicas": 5,
"target_ongoing_requests": 10,
},
)
# Example: keep Fetch at a fixed 2 replicas.
Fetch = build_mcp_deployment(
name="fetch",
docker_image="docker.io/mcp/fetch",
num_replicas=2,
num_cpus=0.2,
)
brave_search_handle = BraveSearch.bind()
fetch_handle = Fetch.bind()
app = Router.bind(brave_search_handle, fetch_handle)
您可以以编程方式运行应用程序,在工作区中启动它
serve.run(app)
或者,您可以使用下一节所示的命令行来运行它。
注意
在 Ray 集群上,请使用 **Podman** 而不是 Docker 来运行和管理容器。这种方法符合 Ray Serve 多应用容器部署文档 中提供的指南。
此外,对于像
"docker.io/mcp/brave-search"这样的镜像,请明确包含 **"docker.io/"** 前缀,以确保 Podman 正确识别镜像 URI。本教程仅将
num_cpus参数传递给ray_actor_options。您可以随意修改代码,以包含此处概述的其他支持的参数https://docs.rayai.org.cn/en/latest/serve/resource-allocation.html-#
自动伸缩参数在
autoscaling_config中作为示例提供。有关在 Ray Serve 部署中配置自动伸缩的更多详细信息,请参阅https://docs.rayai.org.cn/en/latest/serve/configure-serve-deployment.html
https://docs.rayai.org.cn/en/latest/serve/autoscaling-guide.html
https://docs.rayai.org.cn/en/latest/serve/advanced-guides/advanced-autoscaling.html#serve-advanced-autoscaling
2. 在工作区中使用 Ray Serve 运行服务#
您可以在终端中运行以下命令,使用 Ray Serve 部署服务
serve run multi_mcp_ray_serve:app
这将在 https://:8000 上启动服务。
e. 测试服务#
import requests
from pprint import pprint
# Configuration.
BASE_URL = "https://:8000" # Local tooling API base URL
def list_tools(service: str):
"""
Retrieve the list of available tools for a given service.
"""
url = f"{BASE_URL}/{service}/tools"
response = requests.get(url)
response.raise_for_status()
return response.json()["tools"]
def call_tool(service: str, tool_name: str, tool_args: dict):
"""
Invoke a specific tool on a given service with the provided arguments.
"""
url = f"{BASE_URL}/{service}/call"
payload = {"tool_name": tool_name, "tool_args": tool_args}
response = requests.post(url, json=payload)
response.raise_for_status()
return response.json()["result"]
# List Brave Search tools.
print("=== Brave Search: Available Tools ===")
brave_tools = list_tools("brave_search")
pprint(brave_tools)
# Run a query via Brave Search.
search_tool = brave_tools[0]["name"]
print(f"\nUsing tool '{search_tool}' to search for best tacos in Los Angeles...")
search_result = call_tool(
service="brave_search",
tool_name=search_tool,
tool_args={"query": "best tacos in Los Angeles"}
)
print("Web Search Results:")
pprint(search_result)
# List Fetch tools.
print("\n=== Fetch Service: Available Tools ===")
fetch_tools = list_tools("fetch")
pprint(fetch_tools)
# Fetch a URL.
fetch_tool = fetch_tools[0]["name"]
print(f"\nUsing tool '{fetch_tool}' to fetch https://example.com...")
fetch_result = call_tool(
service="fetch",
tool_name=fetch_tool,
tool_args={"url": "https://example.com"}
)
print("Fetch Results:")
pprint(fetch_result)
6. 使用 Anyscale 服务进行生产部署#
对于生产部署,请使用 Anyscale Service 将 Ray Serve 应用程序部署到专用集群,而无需修改代码。Anyscale 可确保可伸缩性、容错性和负载均衡,使服务能够抵抗节点故障、高流量和滚动更新。
使用以下命令部署服务
anyscale service deploy multi_mcp_ray_serve:app --name=multi_mcp_tool_service
注意
此 Anyscale 服务从工作区拉取关联的依赖项、计算配置和服务配置。要显式定义这些,您可以使用 -f 标志从 config.yaml 文件部署。有关详细信息,请参阅 ServiceConfig 参考。
5. 查询生产服务#
部署时,您会将服务公开到一个可公开访问的 IP 地址,您可以向该地址发送请求。
在上一单元格的输出中,复制您的 API_KEY 和 BASE_URL。例如,这些值看起来如下:
BASE_URL = “https://multi-mcp-tool-service-jgz99.cld-kvedzwag2qa8i5bj.s.anyscaleuserdata.com”
TOKEN = “z3RIKzZwHDF9sV60o7M48WsOY1Z50dsXDrWRbxHYtPQ”
在以下 Python 请求对象中填写 BASE_URL 和 API_KEY 的占位符值
import requests
from pprint import pprint
# Configuration
BASE_URL = "https://multi-mcp-tool-service-jgz99.cld-kvedzwag2qa8i5bj.s.anyscaleuserdata.com" # Replace with your own URL
TOKEN = "z3RIKzZwHDF9sV60o7M48WsOY1Z50dsXDrWRbxHYtPQ" # Replace with your own token
HEADERS = {
"Authorization": f"Bearer {TOKEN}"
}
def list_tools(service: str):
"""
Retrieve the list of available tools for a given service.
"""
url = f"{BASE_URL}/{service}/tools"
response = requests.get(url, headers=HEADERS)
response.raise_for_status()
return response.json()["tools"]
def call_tool(service: str, tool_name: str, tool_args: dict):
"""
Invoke a specific tool on a given service with the provided arguments.
"""
url = f"{BASE_URL}/{service}/call"
payload = {"tool_name": tool_name, "tool_args": tool_args}
response = requests.post(url, json=payload, headers=HEADERS)
response.raise_for_status()
return response.json()["result"]
# List Brave Search tools.
print("=== Brave Search: Available Tools ===")
brave_tools = list_tools("brave_search")
pprint(brave_tools)
# Perform a search for "best tacos in Los Angeles".
search_tool = brave_tools[0]["name"]
print(f"\nUsing tool '{search_tool}' to search for best tacos in Los Angeles...")
search_result = call_tool(
service="brave_search",
tool_name=search_tool,
tool_args={"query": "best tacos in Los Angeles"}
)
print("Web Search Results:")
pprint(search_result)
# List Fetch tools.
print("\n=== Fetch Service: Available Tools ===")
fetch_tools = list_tools("fetch")
pprint(fetch_tools)
# Fetch the content of example.com
fetch_tool = fetch_tools[0]["name"]
print(f"\nUsing tool '{fetch_tool}' to fetch https://example.com...")
fetch_result = call_tool(
service="fetch",
tool_name=fetch_tool,
tool_args={"url": "https://example.com"}
)
print("Fetch Results:")
pprint(fetch_result)