I'm working on a sentiment analysis project and I'm running into the issue "ValueError: Expected input batch_size (168) to match target batch_size (1)." in loss = criterion(output.view(-1, output.shape[-1]), hidden_emotion.repeat(output.shape[1], 1).view(-1)) I've tried a lot of things and couldn't solve it, does anyone know how to fix it
My code is as follows:
nlp = spacy.load("en_core_web_sm")
def tokenize(text):
return [token.text for token in nlp.tokenizer(text)]
TEXT = Field(sequential=True, tokenize=tokenize, lower=True)
LABEL = Field(sequential=False, use_vocab=False, dtype=torch.float)
with open("AI_daliydia.json", "r") as file:
data = json.load(file)
dialogs = []
hide_emotion = []
emotions = []
for row in data["rows"]:
dialog = row["row"]["dialog"]
hide_emotion_dialog = row["row"]["hide_emotion"]
emotion_dialog = row["row"]["emotion"]
dialogs.extend(dialog)
hide_emotion.extend(hide_emotion_dialog)
emotions.extend(emotion_dialog)
fields = [("text", TEXT), ("hide_emotion", LABEL), ("emotion", LABEL)]
examples = [torchtext.data.Example.fromlist([dialogs[i], hide_emotion[i], emotions[i]], fields) for i in
range(len(dialogs))]
dataset = torchtext.data.Dataset(examples, fields)
train_data, val_data, test_data = dataset.split(split_ratio=[0.7, 0.2, 0.1])
BATCH_SIZE = 185
train_iterator, val_iterator, test_iterator = BucketIterator.splits(
(train_data, val_data, test_data),
batch_size=BATCH_SIZE,
sort_key=lambda x: len(x.text),
sort_within_batch=False
)
TEXT.build_vocab(train_data, vectors=None, unk_init=None, min_freq=1,
specials_first=False, vectors_cache=None, specials=['<unk>', '<pad>', '<sos>', '<eos>'])
class SentimentAnalysisModel(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, num_layers=1, bidirectional=False):
super(SentimentAnalysisModel, self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers, bidirectional=bidirectional,
batch_first=True)
self.fc1 = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, 128)
self.fc2 = nn.Linear(128, output_dim)
def forward(self, text):
embedded = self.embedding(text)
output, _ = self.rnn(embedded)
output = self.fc1(output)
output = F.relu(output)
output = self.fc2(output)
return output
model = SentimentAnalysisModel(vocab_size=len(TEXT.vocab),
embedding_dim=100,
hidden_dim=256,
output_dim=8,
bidirectional=True)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
def train(model, iterator, optimizer, criterion):
model.train()
epoch_loss = 0
for batch in iterator:
text = batch.text
hidden_emotion = batch.hide_emotion.view(-1, 1).long()
optimizer.zero_grad()
output = model(text)
print(f"output.shape: {output.shape}")
print(f"hidden_emotion.shape: {hidden_emotion.shape}")
loss = criterion(output.view(-1, output.shape[-1]), hidden_emotion.repeat(output.shape[1], 1).view(-1))
loss.backward()
optimizer.step()
epoch_loss += loss.item()
return epoch_loss / len(iterator)
def evaluate(model, iterator, criterion):
model.eval()
epoch_loss = 0
with torch.no_grad():
for batch in iterator:
text = batch.text
hidden_emotion = batch.hide_emotion.view(-1, 1).long()
output = model(text)
loss = criterion(output.view(-1, output.shape[-1]), hidden_emotion.squeeze())
epoch_loss += loss.item()
return epoch_loss / len(iterator)
N_EPOCHS = 10
for epoch in range(N_EPOCHS):
train_loss = train(model, train_iterator, optimizer, criterion)
val_loss = evaluate(model, val_iterator, criterion)
print(f'Epoch: {epoch + 1:02}')
print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
print(f'\t Val. Loss: {val_loss:.3f} | Val. PPL: {math.exp(val_loss):7.3f}')
test_loss = evaluate(model, test_iterator, criterion)
print(f'Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f}')
Full error log:
output.shape: torch.Size([30, 7, 8])
hidden_emotion.shape: torch.Size([7, 1])
Traceback (most recent call last):
File "c:\Users\user\OneDrive\桌面\Zu_bot\AI\Zu_AI(en2).py", line 136, in <module>
train_loss = train(model, train_iterator, optimizer, criterion)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\user\OneDrive\桌面\Zu_bot\AI\Zu_AI(en2).py", line 104, in train
loss = criterion(output.view(-1, output.shape[-1]), hidden_emotion.repeat(output.shape[1], 1).view(-1))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\user\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\user\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\loss.py", line 1174, in forward
return F.cross_entropy(input, target, weight=self.weight,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\user\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\functional.py", line 3029, in cross_entropy
return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: Expected input batch_size (210) to match target batch_size (49).
Please let me know how to fix this. Thanks, Vinny