I am trying to build a LORA model for sentiment analysis as part of an academic project. However, when training the model, I keep getting an error when computing the loss. I have tried a bunch of different methods for computing the loss, but nothing is working.
Any help would be greatly appreciated. Github link is here; https://github.com/therrief87/udacity/blob/main/v4%20Udacity%20Lightweight%20Fine%20Tuning%20(GPT2)%20Smaller%20Training%20Data%20(1).ipynb
Code for training and computing metrics is also below;
training_args = TrainingArguments(
output_dir='C:/Users/felix/Downloads',
evaluation_strategy='epoch',
save_strategy='epoch',
learning_rate=.2,
per_device_train_batch_size=18,
per_device_eval_batch_size=18,
num_train_epochs=1,
load_best_model_at_end=True,
weight_decay=0.1,
remove_unused_columns=False,
#label_names="labels"
)
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = torch.from_numpy(predictions) # Convert predictions to tensor
labels = torch.from_numpy(labels).long() # Convert labels to tensor
loss = nn.CrossEntropyLoss()(predictions, labels) # Calculate the evaluation loss
accuracy = (torch.argmax(predictions, axis=1) == labels).float().mean() # Calculate the accuracy
# Print the metrics dictionary for debugging
metrics = {"eval_loss": loss.item(), "accuracy": accuracy.item()}
print("Metrics:", metrics)
return metrics
trainer = Trainer(
model=lora_model,
args = training_args,
train_dataset = random_train_samples,
eval_dataset = new_dataset['test'],
tokenizer=tokenizer,
data_collator = DataCollatorWithPadding(tokenizer=tokenizer),
compute_metrics = compute_metrics
)
trainer.train()
ERROR below;
KeyError Traceback (most recent call last)
Cell In[45], line 10
1 trainer = Trainer(
2 model=lora_model,
3 args = training_args,
(...)
8 compute_metrics = compute_metrics
9 )
---> 10 trainer.train()
File ~\anaconda3\Lib\site-packages\transformers\trainer.py:1555, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1553 hf_hub_utils.enable_progress_bars()
1554 else:
-> 1555 return inner_training_loop(
1556 args=args,
1557 resume_from_checkpoint=resume_from_checkpoint,
1558 trial=trial,
1559 ignore_keys_for_eval=ignore_keys_for_eval,
1560 )
File ~\anaconda3\Lib\site-packages\transformers\trainer.py:1944, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1941 self.control.should_training_stop = True
1943 self.control = self.callback_handler.on_epoch_end(args, self.state, self.control)
-> 1944 self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_eval)
1946 if DebugOption.TPU_METRICS_DEBUG in self.args.debug:
1947 if is_torch_tpu_available():
1948 # tpu-comment: Logging debug metrics for PyTorch/XLA (compile, execute times, ops, etc.)
File ~\anaconda3\Lib\site-packages\transformers\trainer.py:2267, in Trainer._maybe_log_save_evaluate(self, tr_loss, model, trial, epoch, ignore_keys_for_eval)
2264 self.lr_scheduler.step(metrics[metric_to_check])
2266 if self.control.should_save:
-> 2267 self._save_checkpoint(model, trial, metrics=metrics)
2268 self.control = self.callback_handler.on_save(self.args, self.state, self.control)
File ~\anaconda3\Lib\site-packages\transformers\trainer.py:2383, in Trainer._save_checkpoint(self, model, trial, metrics)
2381 if not metric_to_check.startswith("eval_"):
2382 metric_to_check = f"eval_{metric_to_check}"
-> 2383 metric_value = metrics[metric_to_check]
2385 operator = np.greater if self.args.greater_is_better else np.less
2386 if (
2387 self.state.best_metric is None
2388 or self.state.best_model_checkpoint is None
2389 or operator(metric_value, self.state.best_metric)
2390 ):
KeyError: 'eval_loss'
Thank you for any help.