I'm facing difficulties in training multiple models using Keras and Transformers. Below are the code snippets of the models and the errors encountered during training:
- Model 1: LSTM Model
(X_train, y_train), (X_test, y_test) = keras.datasets.imdb.load_data()
X_train = X_train[:2500]
y_train = y_train[:2500]
X_test = X_test[:500]
y_test = y_test[:500]
def dekodeeri(tekstijada):
# Abifunktsioon numbritest tagasi tähtede saamiseks
word_index = keras.datasets.imdb.get_word_index()
index_word = {0: "<PAD>", 1: "<START>", 2: "<UNK>", 3: "<UNUSED>"}
index_word[1] = "[START]"
index_word[2] = "[OOV]"
for (word, i) in word_index.items():
index_word[i + 3] = word
return " ".join(index_word[i] for i in tekstijada)
print(X_train.shape,y_train.shape)
print(X_test.shape,y_test.shape)
print()
print(X_train[0])
print(dekodeeri(X_train[0]))
print(y_train[0])
max_features = 100000
maxlen = 500
# Padding sequences
print('Pad sequences (samples x time)')
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)
model = Sequential()
model.add(Embedding(max_features, 256))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# Train the model
model.fit(X_train, y_train, batch_size=64, epochs=5, validation_data=(X_test, y_test))
- Model 2
import tensorflow as tf
from transformers import TFDistilBertModel, DistilBertConfig
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling1D
# Define input shape
input_layer = Input(shape=(maxlen,), dtype=tf.int32)
# Load DistilBERT model
config = DistilBertConfig(dropout=0.2, attention_dropout=0.2)
distil_bert_model = TFDistilBertModel.from_pretrained('distilbert-base-uncased', config=config)
# Freeze DistilBERT layers
for layer in distil_bert_model.layers:
layer.trainable = False
# Get DistilBERT output
distil_bert_output = distil_bert_model(input_layer)[0]
# Add pooling layer
pooled_output = GlobalAveragePooling1D()(distil_bert_output)
# Add dense layer for classification
output_layer = Dense(1, activation='sigmoid')(pooled_output)
# Create model
model_2_1 = Model(inputs=input_layer, outputs=output_layer)
# Compile the model
model_2_1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# Train the model
model_2_1.fit(X_train_padded, y_train, batch_size=64, epochs=5, validation_data=(X_test_padded, y_test))
Error Encountered:
InvalidArgumentError: Graph execution error. InvalidArgumentError Traceback (most recent call last) in <cell line: 36>() 34 model_2_1.summary() 35 # Hindamistulemuste saamine ---> 36 model_2_1.fit(X_train_padded, y_train, batch_size=64, epochs=5, validation_data=(X_test_padded, y_test)) 1 frames /usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name) 52 try:
Similar issues were encountered with Model 3 and Model 4 as well.
I've ensured that the input data shapes, data types, and preprocessing steps are correct