OpenAI SDK 规范
统一接口设计
OpenAI SDK 已成为 LLM API 的事实标准,国内主流模型均兼容。
python
from openai import OpenAI, AsyncOpenAI
# 各平台配置
PROVIDERS = {
"openai": {
"api_key": "sk-xxx",
"base_url": None # 默认
},
"qwen": {
"api_key": "sk-xxx",
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1"
},
"deepseek": {
"api_key": "sk-xxx",
"base_url": "https://api.deepseek.com"
},
"kimi": {
"api_key": "sk-xxx",
"base_url": "https://api.moonshot.cn/v1"
},
"ollama": {
"api_key": "ollama",
"base_url": "http://localhost:11434/v1"
}
}
def get_client(provider: str) -> OpenAI:
config = PROVIDERS[provider]
return OpenAI(**{k: v for k, v in config.items() if v is not None})完整 API 参考
python
client = get_client("qwen")
# Chat Completions
response = client.chat.completions.create(
model="qwen-turbo",
messages=[
{"role": "system", "content": "系统提示"},
{"role": "user", "content": "用户消息"},
{"role": "assistant", "content": "助手回复"}, # 多轮历史
{"role": "user", "content": "继续提问"}
],
temperature=0.7,
max_tokens=2048,
stream=False,
tools=[...],
response_format={"type": "json_object"}
)
# 响应结构
print(response.id) # 请求ID
print(response.model) # 实际使用的模型
print(response.choices[0].message.content) # 回复内容
print(response.choices[0].finish_reason) # stop/length/tool_calls
print(response.usage.prompt_tokens) # 输入token数
print(response.usage.completion_tokens) # 输出token数
print(response.usage.total_tokens) # 总token数Embeddings API
python
response = client.embeddings.create(
model="text-embedding-v3",
input=["文本1", "文本2"], # 支持批量
dimensions=512 # 可选维度
)
embeddings = [item.embedding for item in response.data]
print(f"向量维度: {len(embeddings[0])}")异步客户端
python
import asyncio
from openai import AsyncOpenAI
async_client = AsyncOpenAI(
api_key="sk-xxx",
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
)
async def batch_process(queries: list[str]) -> list[str]:
"""并发处理多个查询"""
tasks = [
async_client.chat.completions.create(
model="qwen-turbo",
messages=[{"role": "user", "content": q}]
)
for q in queries
]
responses = await asyncio.gather(*tasks)
return [r.choices[0].message.content for r in responses]
# 并发处理 10 个查询
queries = [f"分析第{i}个企业的风险" for i in range(10)]
results = asyncio.run(batch_process(queries))