Skip to content

Docker — LLM 服务容器化部署

LLM API 服务 Dockerfile

dockerfile
# Dockerfile
FROM python:3.11-slim

# 设置工作目录
WORKDIR /app

# 安装依赖(利用缓存层)
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt \
    -i https://pypi.tuna.tsinghua.edu.cn/simple

# 复制代码
COPY . .

# 环境变量
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1

# 暴露端口
EXPOSE 8000

# 健康检查
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
    CMD curl -f http://localhost:8000/health || exit 1

# 启动命令
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]

GPU 支持(本地模型推理)

dockerfile
# GPU Dockerfile
FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04

RUN apt-get update && apt-get install -y \
    python3.11 python3-pip curl \
    && rm -rf /var/lib/apt/lists/*

WORKDIR /app

COPY requirements-gpu.txt .
RUN pip3 install --no-cache-dir -r requirements-gpu.txt \
    -i https://pypi.tuna.tsinghua.edu.cn/simple

COPY . .

EXPOSE 8000
CMD ["python3", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

Docker Compose — 完整 LLM 服务栈

yaml
# docker-compose.yml
version: '3.8'

services:
  # LLM API 服务
  llm-api:
    build: .
    ports:
      - "8000:8000"
    environment:
      - DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY}
      - REDIS_URL=redis://redis:6379
      - MONGODB_URL=mongodb://mongo:27017
    depends_on:
      - redis
      - mongo
    volumes:
      - ./models:/app/models  # 挂载本地模型
    restart: unless-stopped
    networks:
      - llm-network

  # Redis(会话存储 + 缓存)
  redis:
    image: redis:7-alpine
    ports:
      - "6379:6379"
    volumes:
      - redis_data:/data
    command: redis-server --appendonly yes --requirepass ${REDIS_PASSWORD}
    networks:
      - llm-network

  # MongoDB(对话记录持久化)
  mongo:
    image: mongo:7
    ports:
      - "27017:27017"
    environment:
      - MONGO_INITDB_ROOT_USERNAME=admin
      - MONGO_INITDB_ROOT_PASSWORD=${MONGO_PASSWORD}
    volumes:
      - mongo_data:/data/db
    networks:
      - llm-network

  # Nginx 反向代理
  nginx:
    image: nginx:alpine
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf
      - ./ssl:/etc/nginx/ssl
    depends_on:
      - llm-api
    networks:
      - llm-network

  # Chroma 向量数据库
  chroma:
    image: chromadb/chroma:latest
    ports:
      - "8001:8000"
    volumes:
      - chroma_data:/chroma/chroma
    networks:
      - llm-network

volumes:
  redis_data:
  mongo_data:
  chroma_data:

networks:
  llm-network:
    driver: bridge

Nginx 配置

nginx
# nginx.conf
upstream llm_api {
    server llm-api:8000;
}

server {
    listen 80;
    server_name your-domain.com;

    # SSE 流式响应配置
    location /chat/stream {
        proxy_pass http://llm_api;
        proxy_http_version 1.1;
        proxy_set_header Connection '';
        proxy_buffering off;
        proxy_cache off;
        proxy_read_timeout 300s;
        chunked_transfer_encoding on;
    }

    location / {
        proxy_pass http://llm_api;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_read_timeout 120s;
    }
}

常用命令

bash
# 构建并启动
docker compose up -d --build

# 查看日志
docker compose logs -f llm-api

# 进入容器调试
docker compose exec llm-api bash

# 扩容(3 个 API 实例)
docker compose up -d --scale llm-api=3

# 停止并清理
docker compose down -v

.env 文件

bash
# .env
DASHSCOPE_API_KEY=sk-xxx
REDIS_PASSWORD=your-redis-password
MONGO_PASSWORD=your-mongo-password
SERVICE_API_KEY=your-service-key

安全注意

  • 永远不要将 .env 文件提交到 Git
  • 生产环境使用 Docker Secrets 或 Kubernetes Secrets 管理密钥
  • GPU 容器需要安装 nvidia-container-toolkit

本站内容由 褚成志 整理编写,仅供学习参考