15 lines
384 B
Bash
15 lines
384 B
Bash
#!/bin/bash
|
|
# 启动 vLLM 服务器脚本
|
|
# 用法: bash start_vllm.sh
|
|
|
|
python -m vllm.entrypoints.openai.api_server \
|
|
--model Qwen/Qwen3-VL-8B-Instruct \
|
|
--trust-remote-code \
|
|
--host 0.0.0.0 \
|
|
--port 8000 \
|
|
--gpu-memory-utilization 0.85 \
|
|
--max-model-len 32000 \
|
|
--enable-auto-tool-choice \
|
|
--tool-call-parser hermes \
|
|
--uvicorn-log-level warning
|