Been trying to Fine-Tune a T5 model on a certain dataset but even when following the step-by-step guide provided by Huggingface or using my own Trainer Class, the code was not running and always returning the same error when calling the trainer.train()
seemingly when calculating the loss function.
this is my code based on the step by step guide found on the hugging face hub here
So far this is my code:
transformers.logging.set_verbosity_error()
from datasets import load_dataset
canard_train_augm = load_dataset("gaussalgo/Canard_Wiki-augmented", split="train")
canard_test_augm = load_dataset("gaussalgo/Canard_Wiki-augmented", split="test")
from transformers import AutoTokenizer
model_name = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
def preprocess_function(examples):
combined_input = examples["Question"] + ": " + examples["true_contexts"]
return tokenizer(combined_input, examples["Rewrite"],max_length=512, padding="max_length", truncation=True, return_tensors="pt")
tokenized_train = canard_train_augm.map(preprocess_function)
tokenized_test = canard_test_augm.map(preprocess_function)
from transformers import DataCollatorForSeq2Seq
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model_name)
from transformers import DataCollatorForSeq2Seq
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model_name)
import evaluate
metric = evaluate.load("sacrebleu")
import numpy as np
def postprocess_text(preds, labels):
preds = [pred.strip() for pred in preds]
labels = [[label.strip()] for label in labels]
return preds, labels
def compute_metrics(eval_preds):
preds, labels = eval_preds
if isinstance(preds, tuple):
preds = preds[0]
decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)
result = metric.compute(predictions=decoded_preds, references=decoded_labels)
result = {"bleu": result["score"]}
prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
result["gen_len"] = np.mean(prediction_lens)
result = {k: round(v, 4) for k, v in result.items()}
return result
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
training_args = Seq2SeqTrainingArguments(
output_dir="wtf",
evaluation_strategy="epoch",
learning_rate=2e-5,
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
weight_decay=0.01,
save_total_limit=3,
num_train_epochs=2,
predict_with_generate=True,
fp16=True,
)
trainer = Seq2SeqTrainer(
model=model,
args=training_args,
train_dataset=tokenized_train,
eval_dataset=tokenized_test,
tokenizer=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics,
)
trainer.train()
I tried several examples including my own Customized Class for the trainer function but always ended with the same issue even when I tried the same code found in the step-by-step guide provided by huggingface.
The error happens when calling the trainer.train()
returning the following:
ValueError: too many values to unpack (expected 2)
I followed the exact same format as the documentation and I believe it is something that is happening when calling the loss function but was just unable to put my finger to it, if anyone can help that would be great.