using peft after bits and bytes seems to have no effect on LLM

47 Views Asked by At

I have thePpython 3 code below. I'm using it to peft fine-tune a flan-t5 model with lora to summarize a text. I've first reduced the precision with bits and bytes so that the model can fit on my single GPU. When I evaluate the original model using rouge score against human baseline, and then compare it to each of the peft adapter models I have below, they're getting the exact same rogue scores. I'm wondering if since I've reduced the precision for the model weights so much with bits and bytes does peft fine-tuning have no effect? Can you see any other reason peft would have no effect for the range of rank, epochs, and max_steps I've used below?

code:

from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np
import datetime
import logging

import time


# ### Load Dataset and LLM


huggingface_dataset_name = "knkarthick/dialogsum"

dataset = load_dataset(huggingface_dataset_name)

dataset




# need huggingface apikey
from config import api_key

apikey=api_key


# loading pretrained model 

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library

from torch import cuda, bfloat16
import transformers

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'


bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

model_name='google/flan-t5-base'

model_id='google/flan-t5-base'

hf_auth = apikey
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)





original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, 
                 trust_remote_code=True,
    config=model_config,
                 quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth,
    cache_dir='/home/username/stuff/username_storage/LLM/weights/huggingface/hub/',
torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)





index = 200

dialogue = dataset['test'][index]['dialogue']
summary = dataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation.

{dialogue}

Summary:
"""

inputs = tokenizer(prompt, return_tensors='pt')
output = tokenizer.decode(
    original_model.generate(
        inputs["input_ids"].cuda(),
        max_new_tokens=200,
    )[0],
    skip_special_tokens=True
)

dash_line = '-'.join('' for x in range(100))


# updated 11/1/23 to ensure using gpu
def tokenize_function(example):
    start_prompt = 'Summarize the following conversation.\n\n'
    end_prompt = '\n\nSummary: '
    prompt = [start_prompt + dialogue + end_prompt for dialogue in example["dialogue"]]
    example['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids    .cuda()
    example['labels'] = tokenizer(example["summary"], padding="max_length", truncation=True, return_tensors="pt").input_ids    .cuda()

    return example

# The dataset actually contains 3 diff splits: train, validation, test.
# The tokenize_function code is handling all data across all splits in batches.
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['id', 'topic', 'dialogue', 'summary',])




def pipeline_bnb_peft_lora(rank,
                           name,
                          train_epochs,
                          max_steps,
                          original_model,
                          data):
    
    
    from peft import LoraConfig, get_peft_model, TaskType

    lora_config = LoraConfig(

        r=rank, # Rank
        lora_alpha=32,
        target_modules=["q", "v"],
        lora_dropout=0.05,
        bias="none",
        task_type=TaskType.SEQ_2_SEQ_LM # FLAN-T5

    )


    # Add LoRA adapter layers/parameters to the original LLM to be trained.



    peft_model = get_peft_model(original_model,
                                lora_config)
    
    # ### Train PEFT Adapter
    #
    # Define training arguments and create `Trainer` instance.


    output_dir = f'/home/username/stuff/username_storage/LLM/PEFT/train_args/'+name

    peft_training_args = TrainingArguments(
        output_dir=output_dir,

        per_device_train_batch_size=1, 
        learning_rate=1e-3, # Higher learning rate than full fine-tuning.

        num_train_epochs=train_epochs, # updated 12/19/23 train on higher number of epochs
        max_steps=max_steps,
        fp16=True
    )

    peft_trainer = Trainer(
        model=peft_model,
        args=peft_training_args,
        train_dataset=data,
    )



    peft_trainer.train()

    peft_model_path="/home/username/stuff/username_storage/LLM/PEFT/"+name

    peft_trainer.model.save_pretrained(peft_model_path)
    tokenizer.save_pretrained(peft_model_path)



# adding a timestamp to logname
ts=str(datetime.datetime.now().isoformat())  

# logging.basicConfig(filename='example.log',level=logging.DEBUG)
logging.basicConfig(filename='/mnt/data/sda/user_storage/username_storage/LLM/error_logs'+ts+'.log', level=logging.DEBUG, 
                    format='%(asctime)s %(levelname)s %(name)s %(message)s')

logger=logging.getLogger(__name__)


rank_list=[4,8,16,32]
epoch_list=[1,5,10,20]
max_step_list=[1,5,10,50]


# test rank
for x in rank_list:
    
    try:
        
        pipeline_bnb_peft_lora(rank=x,
                           name='testrank011224_'+str(x),
                          train_epochs=1,
                          max_steps=1,
                          original_model=original_model,
                          data=tokenized_datasets["train"])
        
    except Exception as err:
        
        logger.error('pipeline_bnb_peft_lora '+name+' failed: '+str(err))        
        
        
    pass



# test epoch
for x in epoch_list:
    
    try:
        
        pipeline_bnb_peft_lora(rank=4,
                           name='testepoch011224_'+str(x),
                          train_epochs=x,
                          max_steps=1,
                          original_model=original_model,
                          data=tokenized_datasets["train"])
        
    except Exception as err:
        
        logger.error('pipeline_bnb_peft_lora '+name+' failed: '+str(err))        
        
        
    pass


# test max_steps
for x in max_step_list:
    
    try:
        
        pipeline_bnb_peft_lora(rank=4,
                           name='testmaxsteps011224_'+str(x),
                          train_epochs=1,
                          max_steps=x,
                          original_model=original_model,
                          data=tokenized_datasets["train"])
        
    except Exception as err:
        
        logger.error('pipeline_bnb_peft_lora '+name+' failed: '+str(err))        
        
        
    pass
0

There are 0 best solutions below