tensorflow bert 'tuple' object has no attribute problem

16 Views Asked by At

from tensorflow_text.tools.wordpiece_vocab import bert_vocab_from_dataset as bert_vocab import pathlib import tensorflow as tf

BATCH_SIZE = 1028

text_dataset = tf.keras.utils.text_dataset_from_directory(pathlib.Path('path').parent, labels="inferred", label_mode="int", batch_size=BATCH_SIZE) text_examples = tf.keras.utils.text_dataset_from_directory(pathlib.Path('anotherpath').parent, subset='both', labels="inferred", label_mode="int", batch_size=BATCH_SIZE, validation_split=0.2, seed=2)

bert_tokenizer_params=dict(lower_case=True) reserved_tokens=["[PAD]", "[UNK]", "[START]", "[END]"]

bert_vocab_args = dict( # Arguments for text.BertTokenizer bert_tokenizer_params=bert_tokenizer_params, vocab_size=1048576, reserved_tokens=reserved_tokens, # Arguments for wordpiece_vocab.wordpiece_tokenizer_learner_lib.learn learn_params={}, )

train_examles = text_dataset, text_examples myvocab = bert_vocab.bert_vocab_from_dataset( train_examles, **bert_vocab_args)

def write_vocab_file(filepath, vocab): with open(filepath, 'w') as f: for token in vocab: print(token, file=f)

write_vocab_file('vocab.txt', myvocab)

Traceback (most recent call last): File ".py", line 28, in myvocab = bert_vocab.bert_vocab_from_dataset( File ".venv\lib\site-packages\tensorflow_text\tools\wordpiece_vocab\bert_vocab_from_dataset.py", line 82, in bert_vocab_from_dataset element_spec = dataset.element_spec AttributeError: 'tuple' object has no attribute 'element_spec'

try write vocab from https://www.tensorflow.org/text/guide/subwords_tokenizer?hl=ru, but cant.

0

There are 0 best solutions below