using peft after bits and bytes seems to have no effect on LLM

Question

using peft after bits and bytes seems to have no effect on LLM

47 Views Asked by user3476463 At 31 January 2024 at 03:05

I have thePpython 3 code below. I'm using it to peft fine-tune a flan-t5 model with lora to summarize a text. I've first reduced the precision with bits and bytes so that the model can fit on my single GPU. When I evaluate the original model using rouge score against human baseline, and then compare it to each of the peft adapter models I have below, they're getting the exact same rogue scores. I'm wondering if since I've reduced the precision for the model weights so much with bits and bytes does peft fine-tuning have no effect? Can you see any other reason peft would have no effect for the range of rank, epochs, and max_steps I've used below?

code:

from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np
import datetime
import logging

import time


# ### Load Dataset and LLM


huggingface_dataset_name = "knkarthick/dialogsum"

dataset = load_dataset(huggingface_dataset_name)

dataset




# need huggingface apikey
from config import api_key

apikey=api_key


# loading pretrained model 

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library

from torch import cuda, bfloat16
import transformers

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'


bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

model_name='google/flan-t5-base'

model_id='google/flan-t5-base'

hf_auth = apikey
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)





original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, 
                 trust_remote_code=True,
    config=model_config,
                 quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth,
    cache_dir='/home/username/stuff/username_storage/LLM/weights/huggingface/hub/',
torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)





index = 200

dialogue = dataset['test'][index]['dialogue']
summary = dataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation.

{dialogue}

Summary:
"""

inputs = tokenizer(prompt, return_tensors='pt')
output = tokenizer.decode(
    original_model.generate(
        inputs["input_ids"].cuda(),
        max_new_tokens=200,
    )[0],
    skip_special_tokens=True
)

dash_line = '-'.join('' for x in range(100))


# updated 11/1/23 to ensure using gpu
def tokenize_function(example):
    start_prompt = 'Summarize the following conversation.\n\n'
    end_prompt = '\n\nSummary: '
    prompt = [start_prompt + dialogue + end_prompt for dialogue in example["dialogue"]]
    example['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids    .cuda()
    example['labels'] = tokenizer(example["summary"], padding="max_length", truncation=True, return_tensors="pt").input_ids    .cuda()

    return example

# The dataset actually contains 3 diff splits: train, validation, test.
# The tokenize_function code is handling all data across all splits in batches.
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['id', 'topic', 'dialogue', 'summary',])




def pipeline_bnb_peft_lora(rank,
                           name,
                          train_epochs,
                          max_steps,
                          original_model,
                          data):
    
    
    from peft import LoraConfig, get_peft_model, TaskType

    lora_config = LoraConfig(

        r=rank, # Rank
        lora_alpha=32,
        target_modules=["q", "v"],
        lora_dropout=0.05,
        bias="none",
        task_type=TaskType.SEQ_2_SEQ_LM # FLAN-T5

    )


    # Add LoRA adapter layers/parameters to the original LLM to be trained.



    peft_model = get_peft_model(original_model,
                                lora_config)
    
    # ### Train PEFT Adapter
    #
    # Define training arguments and create `Trainer` instance.


    output_dir = f'/home/username/stuff/username_storage/LLM/PEFT/train_args/'+name

    peft_training_args = TrainingArguments(
        output_dir=output_dir,

        per_device_train_batch_size=1, 
        learning_rate=1e-3, # Higher learning rate than full fine-tuning.

        num_train_epochs=train_epochs, # updated 12/19/23 train on higher number of epochs
        max_steps=max_steps,
        fp16=True
    )

    peft_trainer = Trainer(
        model=peft_model,
        args=peft_training_args,
        train_dataset=data,
    )



    peft_trainer.train()

    peft_model_path="/home/username/stuff/username_storage/LLM/PEFT/"+name

    peft_trainer.model.save_pretrained(peft_model_path)
    tokenizer.save_pretrained(peft_model_path)



# adding a timestamp to logname
ts=str(datetime.datetime.now().isoformat())  

# logging.basicConfig(filename='example.log',level=logging.DEBUG)
logging.basicConfig(filename='/mnt/data/sda/user_storage/username_storage/LLM/error_logs'+ts+'.log', level=logging.DEBUG, 
                    format='%(asctime)s %(levelname)s %(name)s %(message)s')

logger=logging.getLogger(__name__)


rank_list=[4,8,16,32]
epoch_list=[1,5,10,20]
max_step_list=[1,5,10,50]


# test rank
for x in rank_list:
    
    try:
        
        pipeline_bnb_peft_lora(rank=x,
                           name='testrank011224_'+str(x),
                          train_epochs=1,
                          max_steps=1,
                          original_model=original_model,
                          data=tokenized_datasets["train"])
        
    except Exception as err:
        
        logger.error('pipeline_bnb_peft_lora '+name+' failed: '+str(err))        
        
        
    pass



# test epoch
for x in epoch_list:
    
    try:
        
        pipeline_bnb_peft_lora(rank=4,
                           name='testepoch011224_'+str(x),
                          train_epochs=x,
                          max_steps=1,
                          original_model=original_model,
                          data=tokenized_datasets["train"])
        
    except Exception as err:
        
        logger.error('pipeline_bnb_peft_lora '+name+' failed: '+str(err))        
        
        
    pass


# test max_steps
for x in max_step_list:
    
    try:
        
        pipeline_bnb_peft_lora(rank=4,
                           name='testmaxsteps011224_'+str(x),
                          train_epochs=1,
                          max_steps=x,
                          original_model=original_model,
                          data=tokenized_datasets["train"])
        
    except Exception as err:
        
        logger.error('pipeline_bnb_peft_lora '+name+' failed: '+str(err))        
        
        
    pass

Original Q&A

using peft after bits and bytes seems to have no effect on LLM

There are 0 best solutions below

Related Questions in PYTHON-3.X

Related Questions in LANGCHAIN

Related Questions in LARGE-LANGUAGE-MODEL

Related Questions in PEFT

Trending Questions

Popular # Hahtags

Popular Questions