Load accuracy metric with evaluate ,sometime mistakes happen: TypeError: 'NoneType' object is not callable

583 Views Asked by At

I'm using Bert and other encoder models for text classification tasks,but when I try to load accuracy metric with evaluate in huggingface,sometime mistakes happen: TypeError: 'NoneType' object is not callable. I am searching for a long time on net. But no use. Please help or try to give some ideas how to achieve this. Thanks in advance.

This is the cause of the code error:

─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /home/ubuntu/Bill_PyCharm/absa-three/yasi_encoder/yasi_roberta.py:94 in <module>                 │
│                                                                                                  │
│    91 test_dataset = datasets.Dataset.from_dict(train_ds.get_dataset())                          │
│    92                                                                                            │
│    93 """## Train Loop"""                                                                        │
│ ❱  94 accuracy = evaluate.load("accuracy")                                                       │
│    95 # accuracy = evaluate.load("../evaluate/accuracy.py")                                      │
│    96 # recall = evaluate.load("recall")                                                         │
│    97 # precision = evaluate.load("precision")                                                   │
│                                                                                                  │
│ /home/ubuntu/anaconda3/lib/python3.9/site-packages/evaluate/loading.py:778 in load               │
│                                                                                                  │
│   775 │   │   path, module_type=module_type, revision=revision, download_config=download_confi   │
│   776 │   ).module_path                                                                          │
│   777 │   evaluation_cls = import_main_class(evaluation_module)                                  │
│ ❱ 778 │   evaluation_instance = evaluation_cls(                                                  │
│   779 │   │   config_name=config_name,                                                           │
│   780 │   │   process_id=process_id,                                                             │
│   781 │   │   num_process=num_process,                                                           │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
TypeError: 'NoneType' object is not callable

This is the source code:

"""# Loading the Libraries & Models"""
import pandas as pd`enter code here`
import numpy as np
import evaluate
import torch
from datasets import Dataset
import datasets
from torch.utils.data import Dataset
from transformers import (AutoTokenizer,
                          AutoModelForSequenceClassification,
                          TrainingArguments,
                          Trainer)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device: {device}")

check_point = "xlm-roberta-base"
# check_point = "hfl/chinese-roberta-wwm-ext"
output_dir = "./models/yasi/" + check_point
tokenizer = AutoTokenizer.from_pretrained(check_point)
model = AutoModelForSequenceClassification.from_pretrained(check_point, num_labels=2).to(device)

import pandas as pd
from datasets import Dataset
from sklearn.model_selection import train_test_split

import json

# 读取配置文件
config_file = '../HyperParameter/config.json'
with open(config_file, 'r') as f:
    config = json.load(f)
# 从配置中获取需要的值
batch_size = config['batch_size']
dataset = config['dataset']
epoch = config['epoch']

# 读取数据集
data = pd.read_csv(dataset, sep='\t') # from datasets import Dataset

# 随机划分数据集
train_data, remaining_data = train_test_split(data, test_size=0.2, random_state=42)
dev_data, test_data = train_test_split(remaining_data, test_size=0.5, random_state=42)

# 将划分后的数据集转换为Dataset对象
train_df = Dataset.from_pandas(train_data)
dev_df = Dataset.from_pandas(dev_data)
test_df = Dataset.from_pandas(test_data)


class YasiDataset(Dataset):
    def __init__(self, df, tokenizer: AutoTokenizer):
        super(YasiDataset).__init__()

        self.sentence = []
        self.labels = []

        # 读取每一行内容
        for row in df:
            # 提取content列的内容(假设是第二列)
            content = row["sentence"]
            labels = row["label"]
            # 将content添加到sentence_pairs列表中
            self.sentence.append(content)
            self.labels.append(labels)

        self.labels = torch.tensor(self.labels)
        self.tokenizer_output = tokenizer(self.sentence,
                                          padding=True,
                                          truncation=True,
                                          max_length=512,  # 最大长度
                                          return_tensors='pt',
                                          return_token_type_ids=True,
                                          return_attention_mask=True,
                                          )
        self.tokenizer_output['labels'] = self.labels

    def __len__(self):
        return len(self.tokenizer_output.shape[0])

    def get_dataset(self):
        return self.tokenizer_output


train_ds = YasiDataset(train_df, tokenizer)
dev_ds = YasiDataset(dev_df, tokenizer)
test_ds = YasiDataset(test_df, tokenizer)

train_dataset = datasets.Dataset.from_dict(train_ds.get_dataset())
dev_dataset = datasets.Dataset.from_dict(dev_ds.get_dataset())
test_dataset = datasets.Dataset.from_dict(test_ds.get_dataset())

"""## Train Loop"""
accuracy = evaluate.load("accuracy")

I am searching for a long time on net. But no use. Please help or try to give some ideas how to achieve this.

1

There are 1 best solutions below

0
bill yao On BEST ANSWER

now,I find what's wrong with this problem.the evaluate version of my computer is evaluate 0.1.2. we should update the version of the evaluate. use the code as follow: pip install --upgrade evaluate