I have a list and I want to convert it to a huggingface dataset for training model, I follow some tips and here is my code,
from datasets import Dataset
class MkqaChineseDataset(Dataset):
def __init__(self, data):
# super().__init__() if add this, it shows super().__init__() TypeError: __init__() missing 1 required positional argument: 'arrow_table'
self.data = data
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
sample = self.data[idx]
return {
"input_ids": sample["input_ids"],
"attention_mask": sample["attention_mask"],
"labels":sample["input_ids"]
}
buffer_test = [
{'input_ids': torch.Tensor([9437,29,210]), 'attention_mask': torch.Tensor([1, 1, 1])},
{'input_ids': torch.Tensor([37,9,211]), 'attention_mask': torch.Tensor([1, 1, 1])},
{'input_ids': torch.Tensor([937,19,212]), 'attention_mask': torch.Tensor([1, 1, 1])}
]
print(buffer_test)
mkqa = MkqaChineseDataset(buffer_test)
res = isinstance(mkqa, Dataset)
print(res)
However, it shows attributes error:
self.data = data
AttributeError: can't set attribute
You can use
Dataset.from_list
e.g. in your case