I used a pretrained Roberta model,and this is my roberta model.the pretrained model is https://huggingface.co/rinna/japanese-roberta-base And I install the sentencepiece.
!pip install sentencepiece
class RoBERTaClass(torch.nn.Module):
def __init__(self, pretrained, drop_rate, otuput_size):
super().__init__()
self.roberta = RobertaForMaskedLM.from_pretrained(pretrained)
self.drop = torch.nn.Dropout(drop_rate)
self.fc = torch.nn.Linear(768, otuput_size)
def forward(self, ids, mask):
_, out = self.roberta(ids, attention_mask=mask,return_dict=False)
out = self.fc(self.drop(out))
return out
/usr/local/lib/python3.7/dist-packages/transformers/tokenization_utils_base.py:2269: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).
FutureWarning,
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-20-49ec9d027d5c> in <module>()
12
13 # モデルの学習
---> 14 log = train_model(dataset_train, dataset_valid, BATCH_SIZE, model, criterion, optimizer, NUM_EPOCHS, device=device)
15
16 # 正解率の算出
2 frames
<ipython-input-16-7edee8da91ec> in forward(self, ids, mask)
7
8 def forward(self, ids, mask):
----> 9 _, out = self.roberta(ids, attention_mask=mask,return_dict=False)
10 out = self.fc(self.drop(out))
11 return out
ValueError: not enough values to unpack (expected 2, got 1)
what should i do? i dont know where should i fix.
and i fix the model another problem i have.
class RoBERTaClass(torch.nn.Module):
def __init__(self, pretrained, drop_rate, otuput_size):
super().__init__()
self.roberta = RobertaForMaskedLM.from_pretrained(pretrained)
self.drop = torch.nn.Dropout(drop_rate)
self.fc = torch.nn.Linear(768, otuput_size) # ROBERTAの出力に合わせて768次元を指定
def forward(self, ids, mask):
out = self.roberta(ids, attention_mask=mask,return_dict=False)
out = self.fc(self.drop(out))
return out
TypeError Traceback (most recent call last)
<ipython-input-24-49ec9d027d5c> in <module>()
12
13 # モデルの学習
---> 14 log = train_model(dataset_train, dataset_valid, BATCH_SIZE, model, criterion, optimizer, NUM_EPOCHS, device=device)
15
16 # 正解率の算出
5 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in dropout(input, p, training, inplace)
1277 if p < 0.0 or p > 1.0:
1278 raise ValueError("dropout probability has to be between 0 and 1, " "but got {}".format(p))
-> 1279 return _VF.dropout_(input, p, training) if inplace else _VF.dropout(input, p, training)
1280
1281
TypeError: dropout(): argument 'input' (position 1) must be Tensor, not tuple
and this is my creatdataset class.Where do i need to fix.thank you for answer.
class CreateDataset(Dataset):
def __init__(self, X, y, tokenizer, max_len):
self.X = X
self.y = y
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self): # len(Dataset)で返す値を指定
return len(self.y)
def __getitem__(self, index): # Dataset[index]で返す値を指定
text = self.X[index]
inputs = self.tokenizer.encode_plus(
text,
add_special_tokens=True,
max_length=self.max_len,
pad_to_max_length=True
)
ids = inputs['input_ids']
mask = inputs['attention_mask']
return {
'ids': torch.LongTensor(ids),
'mask': torch.LongTensor(mask),
'labels': torch.Tensor(self.y[index])
}