LoRA / QLoRA — 低资源微调
LoRA vs QLoRA
| 方案 | 显存需求(7B) | 训练速度 | 效果 |
|---|---|---|---|
| 全量微调 | ~80GB | 慢 | 最好 |
| LoRA (fp16) | ~14GB | 中 | 好 |
| QLoRA (4bit) | ~6GB | 较慢 | 接近 LoRA |
QLoRA 完整流程
python
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
from datasets import Dataset
import torch
# 1. 量化配置
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True
)
# 2. 加载模型
model_name = "Qwen/Qwen2-7B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
model = prepare_model_for_kbit_training(model)
# 3. LoRA 配置
peft_config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
# 4. 准备数据集
def format_chat(example):
return {
"text": f"<|im_start|>system\n你是金融助手<|im_end|>\n"
f"<|im_start|>user\n{example['instruction']}<|im_end|>\n"
f"<|im_start|>assistant\n{example['output']}<|im_end|>"
}
raw_data = [
{"instruction": "解释不良贷款率", "output": "不良贷款率是..."},
{"instruction": "分析信贷风险", "output": "信贷风险主要包括..."},
]
dataset = Dataset.from_list(raw_data).map(format_chat)
# 5. 训练
training_args = TrainingArguments(
output_dir="./qlora-output",
num_train_epochs=3,
per_device_train_batch_size=2,
gradient_accumulation_steps=8,
learning_rate=2e-4,
bf16=True,
logging_steps=10,
save_strategy="epoch",
warmup_ratio=0.05,
lr_scheduler_type="cosine",
report_to="none"
)
trainer = SFTTrainer(
model=model,
args=training_args,
train_dataset=dataset,
peft_config=peft_config,
dataset_text_field="text",
max_seq_length=2048,
tokenizer=tokenizer
)
trainer.train()
trainer.save_model("./qlora-weights")合并权重并导出
python
from peft import PeftModel
# 加载基础模型(fp16)
base_model = AutoModelForCausalLM.from_pretrained(
model_name, torch_dtype=torch.float16, device_map="auto"
)
# 加载并合并 LoRA
model = PeftModel.from_pretrained(base_model, "./qlora-weights")
merged_model = model.merge_and_unload()
# 保存合并后的模型
merged_model.save_pretrained("./merged-model")
tokenizer.save_pretrained("./merged-model")
# 可直接用 vLLM 加载
# python -m vllm.entrypoints.openai.api_server --model ./merged-model