#!/bin/bash # 启动 vLLM 服务器脚本 # 用法: bash start_vllm.sh python -m vllm.entrypoints.openai.api_server \ --model Qwen/Qwen3-VL-8B-Instruct \ --trust-remote-code \ --port 8000 \ --gpu-memory-utilization 0.85 \ --max-model-len 32000 \ --enable-auto-tool-choice \ --tool-call-parser hermes \ --uvicorn-log-level warning