I am using hugging face Roberta to classify multi-class dataset, but now I got an error “Target size (torch.Size([8])) must be the same as input size (torch.Size([8, 15]))”. I am not sure what should I do now, could anyone provide some suggestions? Below is my codes and you can also find the error message in the bottom:

from datasets import load_dataset
from transformers import RobertaTokenizerFast, Trainer, DataCollatorWithPadding


dataset = load_dataset('csv', data_files=data_path,split = 'train')
train_testvalid = dataset.train_test_split(test_size=0.2)
test_valid = train_testvalid['test'].train_test_split(test_size=0.2)

checkpoint = "roberta-base"
tokenizer = RobertaTokenizerFast.from_pretrained(checkpoint)


def tokenization(example):
    return tokenizer(example['text'], truncation=True,max_length = 256, padding = True)


train_data = test_valid['train']
test_data= test_valid['test']

train_data = train_data.map(tokenization, batched = True)
test_data = test_data.map(tokenization, batched = True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)




train_data = train_data.remove_columns(["Unnamed: 0", "text"])
test_data = test_data.remove_columns(["Unnamed: 0", "text"])
train_data.set_format("torch")
test_data.set_format("torch")
train_data.column_names



from transformers import TrainingArguments

training_args = TrainingArguments("test-trainer")




from transformers import RobertaForSequenceClassification

model = RobertaForSequenceClassification.from_pretrained(checkpoint, num_labels=15)



from sklearn.metrics import accuracy_score, precision_recall_fscore_support
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='micro')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }




from transformers import Trainer

trainer = Trainer(
    model,
    training_args,
    compute_metrics=compute_metrics,
    train_dataset=train_data,
    eval_dataset=test_data,
    data_collator=data_collator,
    tokenizer=tokenizer,
)

trainer.train()

ValueError                                Traceback (most recent call last)

<ipython-input-9-3435b262f1ae> in <module>
----> 1 trainer.train()

8 frames

/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in binary_cross_entropy_with_logits(input, target, weight, size_average, reduce, reduction, pos_weight)
   3146 
   3147     if not (target.size() == input.size()):
-> 3148         raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
   3149 
   3150     return torch.binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction_enum)

ValueError: Target size (torch.Size([8])) must be the same as input size (torch.Size([8, 15]))
1

There are 1 best solutions below

0
On

I got the same error. For me the issue was that my "label" column was a Float-Type. Converting it to Int solved this problem for me:

df.label = df.label.astype(int)