I have thePpython 3 code below. I'm using it to peft fine-tune a flan-t5 model with lora to summarize a text. I've first reduced the precision with bits and bytes so that the model can fit on my single GPU. When I evaluate the original model using rouge score against human baseline, and then compare it to each of the peft adapter models I have below, they're getting the exact same rogue scores. I'm wondering if since I've reduced the precision for the model weights so much with bits and bytes does peft fine-tuning have no effect? Can you see any other reason peft would have no effect for the range of rank, epochs, and max_steps I've used below?
code:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np
import datetime
import logging
import time
# ### Load Dataset and LLM
huggingface_dataset_name = "knkarthick/dialogsum"
dataset = load_dataset(huggingface_dataset_name)
dataset
# need huggingface apikey
from config import api_key
apikey=api_key
# loading pretrained model
# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
from torch import cuda, bfloat16
import transformers
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
bnb_config = transformers.BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type='nf4',
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=bfloat16
)
model_name='google/flan-t5-base'
model_id='google/flan-t5-base'
hf_auth = apikey
model_config = transformers.AutoConfig.from_pretrained(
model_id,
use_auth_token=hf_auth
)
original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name,
trust_remote_code=True,
config=model_config,
quantization_config=bnb_config,
device_map='auto',
use_auth_token=hf_auth,
cache_dir='/home/username/stuff/username_storage/LLM/weights/huggingface/hub/',
torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)
index = 200
dialogue = dataset['test'][index]['dialogue']
summary = dataset['test'][index]['summary']
prompt = f"""
Summarize the following conversation.
{dialogue}
Summary:
"""
inputs = tokenizer(prompt, return_tensors='pt')
output = tokenizer.decode(
original_model.generate(
inputs["input_ids"].cuda(),
max_new_tokens=200,
)[0],
skip_special_tokens=True
)
dash_line = '-'.join('' for x in range(100))
# updated 11/1/23 to ensure using gpu
def tokenize_function(example):
start_prompt = 'Summarize the following conversation.\n\n'
end_prompt = '\n\nSummary: '
prompt = [start_prompt + dialogue + end_prompt for dialogue in example["dialogue"]]
example['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids .cuda()
example['labels'] = tokenizer(example["summary"], padding="max_length", truncation=True, return_tensors="pt").input_ids .cuda()
return example
# The dataset actually contains 3 diff splits: train, validation, test.
# The tokenize_function code is handling all data across all splits in batches.
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['id', 'topic', 'dialogue', 'summary',])
def pipeline_bnb_peft_lora(rank,
name,
train_epochs,
max_steps,
original_model,
data):
from peft import LoraConfig, get_peft_model, TaskType
lora_config = LoraConfig(
r=rank, # Rank
lora_alpha=32,
target_modules=["q", "v"],
lora_dropout=0.05,
bias="none",
task_type=TaskType.SEQ_2_SEQ_LM # FLAN-T5
)
# Add LoRA adapter layers/parameters to the original LLM to be trained.
peft_model = get_peft_model(original_model,
lora_config)
# ### Train PEFT Adapter
#
# Define training arguments and create `Trainer` instance.
output_dir = f'/home/username/stuff/username_storage/LLM/PEFT/train_args/'+name
peft_training_args = TrainingArguments(
output_dir=output_dir,
per_device_train_batch_size=1,
learning_rate=1e-3, # Higher learning rate than full fine-tuning.
num_train_epochs=train_epochs, # updated 12/19/23 train on higher number of epochs
max_steps=max_steps,
fp16=True
)
peft_trainer = Trainer(
model=peft_model,
args=peft_training_args,
train_dataset=data,
)
peft_trainer.train()
peft_model_path="/home/username/stuff/username_storage/LLM/PEFT/"+name
peft_trainer.model.save_pretrained(peft_model_path)
tokenizer.save_pretrained(peft_model_path)
# adding a timestamp to logname
ts=str(datetime.datetime.now().isoformat())
# logging.basicConfig(filename='example.log',level=logging.DEBUG)
logging.basicConfig(filename='/mnt/data/sda/user_storage/username_storage/LLM/error_logs'+ts+'.log', level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(name)s %(message)s')
logger=logging.getLogger(__name__)
rank_list=[4,8,16,32]
epoch_list=[1,5,10,20]
max_step_list=[1,5,10,50]
# test rank
for x in rank_list:
try:
pipeline_bnb_peft_lora(rank=x,
name='testrank011224_'+str(x),
train_epochs=1,
max_steps=1,
original_model=original_model,
data=tokenized_datasets["train"])
except Exception as err:
logger.error('pipeline_bnb_peft_lora '+name+' failed: '+str(err))
pass
# test epoch
for x in epoch_list:
try:
pipeline_bnb_peft_lora(rank=4,
name='testepoch011224_'+str(x),
train_epochs=x,
max_steps=1,
original_model=original_model,
data=tokenized_datasets["train"])
except Exception as err:
logger.error('pipeline_bnb_peft_lora '+name+' failed: '+str(err))
pass
# test max_steps
for x in max_step_list:
try:
pipeline_bnb_peft_lora(rank=4,
name='testmaxsteps011224_'+str(x),
train_epochs=1,
max_steps=x,
original_model=original_model,
data=tokenized_datasets["train"])
except Exception as err:
logger.error('pipeline_bnb_peft_lora '+name+' failed: '+str(err))
pass