参考:https://github.com/huggingface/peft
/hiyouga/LLaMA-Factory
1、LLaMA-Factory可视化界面微调chatglm2
类似工具还有流萤,注意是做中文微调训练这块;来训练微调的chatglm2需要完整最新文件,不能是量化后的模型;另外测试下来显卡资源要大于20来G才能顺利,这边T4单卡训练中间显存不足,需要开启4bit量化才行
1)下载github代码
2)运行web界面
CUDA_VISIBLE_DEVICES=0 python src/train_web.py
- 1
- 2
3)chatglm2代码下载最新的:/THUDM/chatglm2-6b/tree/main
4)配置好参数即可点击开始训练
显卡使用:
显示3epco需要20小时:
多卡训练
/hiyouga/LLaMA-Factory#distributed-training
2、自有数据集构建lora微调
参考:/hai4321/article/details/132072382
数据少训练几分钟,但效果不算很好,需要调高学习率与增加训练次数
self_cognition.json 下载地址:/hiyouga/LLaMA-Factory/blob/main/data/self_cognition.json
/hiyouga/LLaMA-Factory/blob/main/data/
instruction格式:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
# 读取self_cognition自我认知解析并写入转换新文件
import json
# 处理content和summary
def process_data(item):
# print(type(item),item)
# 将instruction对应到content,output对应到summary
# item['instruction'] = item['instruction'].replace(' ', '')
# item['input'] = item['input'].replace(' ', '')
item['output'] = item['output'].replace(' <NAME>', '小**能').replace('<AUTHOR>', '**智能')
return item
# 读取self_cognition文件中的JSON列表
with open(r"C:\Users\loong\Downloads\self_cognition.json", 'r', encoding='utf-8') as f:
data = (f)
new_json = []
for item in data:
process_item = process_data(item)
new_json.append(process_item)
# 将一行JSON对象写入文件
# ('{"instruction":"'+process_item['instruction']+'","input":"'+process_item['input']+'","output":"'+process_item['output']+'"}')
# ('\n')
json_str = (new_json,indent=4, ensure_ascii=False) #indent参数——输出格式规范化
print(json_str)
with open(r'C:\Users\loong\Downloads\self_cognition_train2.json', 'w', encoding='utf-8') as json_file:
json_file.write(json_str)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
微调后加载推理:
template 值写成chatglm2,不然会乱码输出;lora微调的权重保存在saves文件夹下:saves/ChatGLM2-6B-Chat/lora/test
python src/cli_demo.py \
--model_name_or_path /mnt/data/chatglm/chatglm2-6b_new \
--template chatglm2 \
--finetuning_type lora \
--checkpoint_dir /mnt/data**/LLaMA-Factory-main/saves/ChatGLM2-6B-Chat/lora/test
- 1
- 2
- 3
- 4
- 5
3、LoRA训练微调模型 简单案例
一般用于大模型微调,下面案例是用于文本nlp模型
**可以安装peft库,里面封装了lora
import argparse
import os
import torch
from torch.optim import AdamW
from torch.utils.data import DataLoader
from peft import (
get_peft_config,
get_peft_model,
get_peft_model_state_dict,
set_peft_model_state_dict,
LoraConfig,
PeftType,
PrefixTuningConfig,
PromptEncoderConfig,
)
import evaluate
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed
from tqdm import tqdm
batch_size = 32
model_name_or_path = "roberta-large"
task = "mrpc"
peft_type = PeftType.LORA
device = "cuda"
num_epochs = 10
if any(k in model_name_or_path for k in ("gpt", "opt", "bloom")):
padding_side = "left"
else:
padding_side = "right"
##加载tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, padding_side=padding_side)
if getattr(tokenizer, "pad_token_id") is None:
tokenizer.pad_token_id = tokenizer.eos_token_id
##加载微调数据
datasets = load_dataset("glue", task)
metric = evaluate.load("glue", task)
def tokenize_function(examples):
# max_length=None => use the model max length (it's actually the default)
outputs = tokenizer(examples["sentence1"], examples["sentence2"], truncation=True, max_length=None)
return outputs
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)
# We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the
# transformers library
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
def collate_fn(examples):
return tokenizer.pad(examples, padding="longest", return_tensors="pt")
# Instantiate dataloaders.
train_dataloader = DataLoader(tokenized_datasets["train"], shuffle=True, collate_fn=collate_fn, batch_size=batch_size)
eval_dataloader = DataLoader(
tokenized_datasets["validation"], shuffle=False, collate_fn=collate_fn, batch_size=batch_size
)
##lore模型配置及融合进原模型
peft_config = LoraConfig(task_type="SEQ_CLS", inference_mode=False, r=8, lora_alpha=16, lora_dropout=0.1)
lr = 3e-4
model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, return_dict=True)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
##加优化器进行训练
optimizer = AdamW(params=model.parameters(), lr=lr)
# Instantiate scheduler
lr_scheduler = get_linear_schedule_with_warmup(
optimizer=optimizer,
num_warmup_steps=0.06 * (len(train_dataloader) * num_epochs),
num_training_steps=(len(train_dataloader) * num_epochs),
)
# model.to(device) ##GPU上运行
for epoch in range(num_epochs):
model.train()
for step, batch in enumerate(tqdm(train_dataloader)):
# batch.to(device) ##GPU上运行
outputs = model(**batch)
loss = outputs.loss
loss.backward()
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
model.eval()
for step, batch in enumerate(tqdm(eval_dataloader)):
# batch.to(device) ##GPU上运行
with torch.no_grad():
outputs = model(**batch)
predictions = outputs.logits.argmax(dim=-1)
predictions, references = predictions, batch["labels"]
metric.add_batch(
predictions=predictions,
references=references,
)
eval_metric = metric.compute()
print(f"epoch {epoch}:", eval_metric)
##保存lore模型
tokenizer.save_pretrained('roberta-large-lore')
model.save_pretrained('roberta-large-lore')
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
- 100
- 101
- 102
- 103
- 104
- 105
- 106
- 107
- 108
- 109
- 110
- 111
- 112
- 113
- 114
- 115
- 116
- 117
- 118
- 119
- 120
- 121
- 122
- 123
训练过程
保存的模型
加载lore推理
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
peft_model_id = "roberta-large-lore" ##上面保存lore模型的文件夹
config = PeftConfig.from_pretrained(peft_model_id)
inference_model = AutoModelForSequenceClassification.from_pretrained(config.base_model_name_or_path)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
# Load the Lora model
inference_model = PeftModel.from_pretrained(inference_model, peft_model_id)
# inference_model.to(device) ##GPU上运行
inference_model.eval()
for step, batch in enumerate(tqdm(eval_dataloader)):
# batch.to(device) ##GPU上运行
with torch.no_grad():
outputs = inference_model(**batch)
predictions = outputs.logits.argmax(dim=-1)
predictions, references = predictions, batch["labels"]
metric.add_batch(
predictions=predictions,
references=references,
)
eval_metric = metric.compute()
print(eval_metric)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
加载案例2:
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
from peft import PeftModel
tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/baichuan-7B", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("baichuan-inc/baichuan-7B", device_map="auto", trust_remote_code=True)
model = PeftModel.from_pretrained(model, "hiyouga/baichuan-7b-sft")
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
query = "晚上睡不着怎么办"
inputs = tokenizer(["<human>:{}\n<bot>:".format(query)], return_tensors="pt")
inputs = ("cuda")
generate_ids = (**inputs, max_new_tokens=256, streamer=streamer)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
案例3:
参考:/hiyouga/LLaMA-Factory/blob/cae47379079ff811aa385c297481a27020a8da6b/scripts/loftq_init.py#L13
from peft import AutoPeftModelForCausalLM, PeftModel
from transformers import AutoTokenizer,AutoModelForCausalLM
import torch
tokenizer = AutoTokenizer.from_pretrained("/ai/loong/Qwen1.5-7B-Chat")
model = AutoModelForCausalLM.from_pretrained("/ai/loong/Qwen1.5-7B-Chat", trust_remote_code=True, torch_dtype="auto")
model = PeftModel.from_pretrained(model, "/ai/loong/output/checkpoint-300")
%%timeit -n 3 -r 1
model.eval()
inputs = tokenizer("你是谁", return_tensors="pt")
outputs = model.generate(input_ids=inputs["input_ids"], max_new_tokens=500)
print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
参考:https:///p/638058537
from peft import PeftModel
from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer
import torch
# create tokenizer
base_model = "timdettmers/guanaco-33b-merged"
tokenizer = LlamaTokenizer.from_pretrained(base_model)
# base model
model = LlamaForCausalLM.from_pretrained(
base_model,
torch_dtype=torch.float16,
device_map="auto",
)
# LORA PEFT adapters
adapter_model = "lyogavin/Anima33B"
model = PeftModel.from_pretrained(
model,
adapter_model,
#torch_dtype=torch.float16,
)
model.eval()
# prompt
prompt = "中国的首都是哪里?"
inputs = tokenizer(prompt, return_tensors="pt")
# Generate
generate_ids = model.generate(**inputs, max_new_tokens=30)
print(tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0])
# output: '中国的首都是哪里?\n中国的首都是北京。\n北京位于中国北部,是中国历史悠'
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37