Kubernetes — LLM 服务编排
LLM API 服务部署
yaml
# deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-api
namespace: finance-ai
spec:
replicas: 3
selector:
matchLabels:
app: llm-api
template:
metadata:
labels:
app: llm-api
spec:
containers:
- name: llm-api
image: your-registry/llm-api:v1.0.0
ports:
- containerPort: 8000
env:
- name: DASHSCOPE_API_KEY
valueFrom:
secretKeyRef:
name: llm-secrets
key: dashscope-api-key
- name: REDIS_URL
value: "redis://redis-service:6379"
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "2Gi"
cpu: "1000m"
livenessProbe:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 5
periodSeconds: 5
---
apiVersion: v1
kind: Service
metadata:
name: llm-api-service
namespace: finance-ai
spec:
selector:
app: llm-api
ports:
- port: 80
targetPort: 8000
type: ClusterIPSecrets 管理
bash
# 创建 Secret
kubectl create secret generic llm-secrets \
--from-literal=dashscope-api-key=sk-xxx \
--from-literal=redis-password=your-password \
-n finance-aiHPA 自动扩缩容
yaml
# hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: llm-api-hpa
namespace: finance-ai
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: llm-api
minReplicas: 2
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80Ingress 配置
yaml
# ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: llm-api-ingress
namespace: finance-ai
annotations:
nginx.ingress.kubernetes.io/proxy-read-timeout: "300"
nginx.ingress.kubernetes.io/proxy-buffering: "off" # SSE 流式
spec:
rules:
- host: api.finance-ai.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: llm-api-service
port:
number: 80常用命令
bash
# 部署
kubectl apply -f deployment.yaml -n finance-ai
# 查看状态
kubectl get pods -n finance-ai
kubectl logs -f deployment/llm-api -n finance-ai
# 滚动更新
kubectl set image deployment/llm-api llm-api=your-registry/llm-api:v1.1.0 -n finance-ai
# 扩容
kubectl scale deployment llm-api --replicas=5 -n finance-ai