I intend to develop a model to test whether PBT is working correctly or not and want to find the optimal hidden layer size via PBT in ray tune, but the hidden layer sizes found by PBT are not optimal. The results are not even suboptimal. I have selected the stop parameter as Val_loss = 5, but the returned model val_loss is more than 20. Also, it is noticeable that the whole computations for train date (700000,7) and ground truth (700000, 4) are less than 5 minutes (for 1 epoch).
The network is
def build_model(self, config):
[x_acc, x_gyro, mag, x_fs], [quat] = training_date()
Att_quat = Att_q(quat)
[self.x_gyro_t, self.x_acc_t, mag, self.fs_t], [q_t] = data_oxiod_test()
self.Att_quat_t = Att_q(q_t)
self.x_acc, self.x_gyro, self.x_fs, self.Att_quat = shuffle(
x_acc, x_gyro, x_fs, Att_quat)
accl = Input((3))
gyrol = Input((3))
fsl = Input((1))
Acc = Reshape((3, 1))(accl)
Gyro = Reshape((3, 1))(gyrol)
ALSTM = LSTM(config["ALSTM"], return_sequences=True)(Acc)
if config["ALSTM_num"] > 1:
for i in range(config["ALSTM_num"]-1):
ALSTM = LSTM(config["ALSTM_%d" % i],
return_sequences=True)(ALSTM)
GLSTM = LSTM(config["GLSTM"], return_sequences=True)(Gyro)
if config["GLSTM_num"] > 1:
for i in range(config["GLSTM_num"]-1):
GLSTM = LSTM(config["GLSTM_%d" % i],
return_sequences=True)(GLSTM)
fsdense = Dense(config["fsdense"])(fsl)
AG = concatenate([ALSTM, GLSTM], axis=2)
AG = Dropout(0.2)(AG)
AG = LSTM(config["AGLSTM"], return_sequences=True)(AG)
AG = Dropout(0.2)(AG)
AGDense = Dense(config["AGDense"])(AG)
AGDense = Flatten()(AGDense)
AGF = concatenate([AGDense, fsdense], axis=1)
quat_p = Dense(4, activation="linear")(AGF)
model = Model(inputs=[accl, gyrol, fsl], outputs=quat_p)
model.compile(optimizer=keras.optimizers.Adam(
learning_rate=config['lr']),
loss=QQuat_mult,
metrics=[Quat_error_angle]
)
return model
And the PBT parameters are as follows
perturbation_interval = 6
pbt = PopulationBasedTraining(
perturbation_interval=perturbation_interval,
hyperparam_mutations={
# "dropout": lambda: np.random.uniform(0, 1),
"lr": lambda: np.random.uniform(0.00001, 0.1),
"AGDense": tune.choice(mp.arange(32, 256, 5)),
"AGLSTM": tune.choice(mp.arange(32, 256, 5)),
"ALSTM": tune.choice(mp.arange(32, 256, 5)),
"GLSTM": tune.choice(mp.arange(32, 256, 5)),
"fsdense": tune.choice(mp.arange(32, 256, 5)),
"ALSTM_num": tune.choice([0, 1, 2]),
"ALSTM_0": tune.choice(mp.arange(32, 256, 5)),
"ALSTM_1": tune.choice(mp.arange(32, 256, 5)),
"GLSTM_num": tune.choice([0, 1, 2]),
"GLSTM_0": tune.choice(mp.arange(32, 256, 5)),
"GLSTM_1": tune.choice(mp.arange(32, 256, 5)),
},
synch=False,
)
resources_per_trial = {"cpu": 4, "gpu": 1}
tuner = tune.Tuner(
tune.with_resources(
BroadModel,
resources=resources_per_trial),
run_config=air.RunConfig(
name="BroadPBT",
stop={"ErrorAngle": 5}
),
tune_config=tune.TuneConfig(
reuse_actors=False,
scheduler=pbt,
metric="ErrorAngle",
# mode is a f
mode="min",
num_samples=2,
),
param_space={
"finish_fast": True,
"batch_size": 500,
"epochs": 1,
"dropout": 0.2,
"lr": lambda: np.random.uniform(0.00001, 0.1),
"AGDense": tune.choice(mp.arange(32, 256, 5)),
"AGLSTM": tune.choice(mp.arange(32, 256, 5)),
"ALSTM": tune.choice(mp.arange(32, 256, 5)),
"GLSTM": tune.choice(mp.arange(32, 256, 5)),
"fsdense": tune.choice(mp.arange(32, 256, 5)),
"ALSTM_num": tune.choice([0, 1, 2]),
"ALSTM_0": tune.choice(mp.arange(32, 256, 5)),
"ALSTM_1": tune.choice(mp.arange(32, 256, 5)),
"GLSTM_num": tune.choice([0, 1, 2]),
"GLSTM_0": tune.choice(mp.arange(32, 256, 5)),
"GLSTM_1": tune.choice(mp.arange(32, 256, 5)),
},
)
pbt_results = tuner.fit()
The results are far from the optimal value, have I done anything wrong?