Keras tuner is crashing Google Colab Pro

364 Views Asked by At

Google Colab Pro crashes and restarts the kernel.

It worked for a while, running three different trials, before it crashed. Since then it crashes immediately.

Here is the code:

import os

import pandas as pd   

train_info = pd.read_csv("/content/drive/MyDrive/train_info.csv")
test_info = pd.read_csv("/content/drive/MyDrive/test_info.csv")

train_folder = "/content/train"
test_folder = "/content/test/"


import keras

import kerastuner

from kerastuner.tuners import BayesianOptimization
from kerastuner.engine.hypermodel import HyperModel
from kerastuner.engine.hyperparameters import HyperParameters

from keras.layers import Input, Lambda, Dense, Flatten, BatchNormalization, 
Dropout, PReLU, GlobalAveragePooling2D, LeakyReLU, MaxPooling2D
from keras.models import Model
from keras.applications.resnet_v2 import ResNet152V2, preprocess_input
from keras import applications

from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.losses import sparse_categorical_crossentropy

from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint,
EarlyStopping, TensorBoard

import tensorflow_addons as tfa

from sklearn.metrics import confusion_matrix
import numpy as np
import matplotlib.pyplot as plt

num_classes = 423
epochs = 1
batch_size = 32
img_height = 224
img_width = 224
IMAGE_SIZE = [img_height, img_width]

_train_generator = ImageDataGenerator(
        rotation_range=180,
        zoom_range=0.2,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.3,
        horizontal_flip=True,
        vertical_flip=True,
        preprocessing_function=preprocess_input)


_val_generator = ImageDataGenerator(
        preprocessing_function=preprocess_input)


train_generator = _train_generator.flow_from_dataframe(dataframe = train_info, 
directory = train_folder, x_col = "filename", 
y_col = "artist", seed = 42,
batch_size = batch_size, shuffle = True, 
class_mode="sparse", target_size = IMAGE_SIZE)

valid_generator = _val_generator.flow_from_dataframe(dataframe = test_info, 
directory = test_folder, x_col = "filename", 
y_col = "artist", seed = 42,
batch_size = batch_size, shuffle = True, 
class_mode="sparse", target_size = IMAGE_SIZE)

hp = HyperParameters()

def model_builder(hp):

  dropout_1 = hp.Float('dropout_1',
          min_value=0.0,
          max_value=0.5,
          default=0.25,
          step=0.05)
  dropout_2 = hp.Float('dropout_2',
          min_value=0.0,
          max_value=0.5,
          default=0.25,
          step=0.05)
 
  input_tensor = Input(shape=train_generator.image_shape)

  base_model = applications.ResNet152(weights='imagenet', 
  include_top=False, input_tensor=input_tensor)

  for layer in base_model.layers[:]:
    layer.trainable = True

  x = Flatten()(base_model.output)

  x = Dense(units=hp.Int('units_' + "dense1", 0, 1024, step=256), 
      kernel_regularizer=keras.regularizers.L2(l2=0.01), 
      kernel_initializer=keras.initializers.HeNormal(), 
      kernel_constraint=keras.constraints.UnitNorm(axis=0))(x)
  
  x = PReLU()(x)
  x = BatchNormalization()(x)
  x = Dropout(rate=dropout_1)(x)

  x = Dense(units=hp.Int('units_' + "dense2", 0, 512, step=32), 
      kernel_regularizer=keras.regularizers.L2(l2=0.01),           
      kernel_initializer=keras.initializers.HeNormal(), 
      kernel_constraint=keras.constraints.UnitNorm(axis=0))(x)

  x = PReLU()(x)
  x = BatchNormalization()(x)
  x = Dropout(rate=dropout_2)(x)
  
  predictions = Dense(num_classes, activation= 'softmax')(x)
  model = Model(inputs = base_model.input, outputs = predictions)

  opt = tfa.optimizers.LazyAdam(lr=0.000074)

  model.compile(
  loss='sparse_categorical_crossentropy',
  optimizer=opt,
  metrics=['accuracy']
  )
  return model
 
earlyStopping = EarlyStopping(monitor='val_loss', patience=5, 
verbose=0, mode='min')

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, 
verbose=1, min_delta=1e-4, mode='min')


from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

tuner = BayesianOptimization(
    model_builder,
    max_trials=100,
    executions_per_trial=1,
    hyperparameters=hp,
    allow_new_entries=True,
    directory = "/content/drive/MyDrive/output",
    project_name = "ArtNet23_new",
    objective='val_accuracy')


tuner.search(train_generator,
             validation_data=valid_generator,
             epochs=1,
             verbose=2)

The kernel keeps crashing and restarting. Here is the session log:


Dec 14, 2020, 6:26:08 PM    
WARNING WARNING:
root:kernel 33453c78-47cf-4698-a36b-130de118e8d7 restarted
Dec 14, 2020, 6:26:08 PM    
INFO    KernelRestarter: restarting kernel (1/5), keep random ports
Dec 14, 2020, 6:26:07 PM    
WARNING 2020-12-14 17:26:07.984952: 
F ./tensorflow/core/util/gpu_launch_config.h:129] 
Check failed: work_element_count > 0 (0 vs. 0)

Anyone know how to solve this? I have had this problem for a couple of weeks now.

1

There are 1 best solutions below

0
On

Depending on the outcome you're looking to achieve, you'll need to decrease your total memory usage/footprint. How ever you want to 'trim' the model is entirely up to you.

I would suggest starting here. (This is extreme but you can quickly determine if memory is the issue)

  # these should all be your initial settings + scaling
  # Change scalar to any multiple of 2 and it will reduce 
  # the memory consumption of your network.
  # EX:
  #    scalar     = 4 = neurons_1 = 256
  # neurons       = total nodes
  # energy        = steps
  # splitting     = neuron scaling
  # lack of sleep = step scaling 
  scalar     = 1
  neurons_1  = 1024                / scalar
  neurons_2  = 512                / scalar
  energy_1   = 256               / scalar
  energy_2   = 32               / scalar
  1splitting = neurons_1       / 8   if   neurons_1 >=  8 else 4
  2splitting = neurons_2      / 4   if    neurons_2  >=  4 else 2
  lack_of_sleep = 1splitting / 4   if     1splitting  >=  8 else 4
  x = Dense(units=hp.Int(
      'units_' + "dense1",
               0,
      (neurons_1/1splitting),
      step=energy_1/lack_of_sleep), 
      kernel_regularizer=keras.regularizers.L2(l2=0.01), 
      kernel_initializer=keras.initializers.HeNormal(), 
      kernel_constraint=keras.constraints.UnitNorm(axis=0))(x)
  x = PReLU()(x)
  x = BatchNormalization()(x)
  x = Dropout(rate=dropout_1)(x)
  x = Dense(units=hp.Int(
      'units_' + "dense2", 
               0, 
      (neurons_2/2splitting),
      step=energy_2/lack_of_sleep), 
      kernel_regularizer=keras.regularizers.L2(l2=0.01),           
      kernel_initializer=keras.initializers.HeNormal(), 
      kernel_constraint=keras.constraints.UnitNorm(axis=0))(x)

Start with scalar = 8. If it works, change scalar to 4. if it works, change scalar to 2. if it doesn't work, delete this test code and then adjust your parameters accordingly.