Ray Tune | Find optimal network hidden size using PBT

133 Views Asked by At

I intend to develop a model to test whether PBT is working correctly or not and want to find the optimal hidden layer size via PBT in ray tune, but the hidden layer sizes found by PBT are not optimal. The results are not even suboptimal. I have selected the stop parameter as Val_loss = 5, but the returned model val_loss is more than 20. Also, it is noticeable that the whole computations for train date (700000,7) and ground truth (700000, 4) are less than 5 minutes (for 1 epoch).

The network is

def build_model(self, config):
        [x_acc, x_gyro, mag, x_fs], [quat] = training_date()
        Att_quat = Att_q(quat)
        [self.x_gyro_t, self.x_acc_t, mag, self.fs_t], [q_t] = data_oxiod_test()
        self.Att_quat_t = Att_q(q_t)

        self.x_acc, self.x_gyro, self.x_fs, self.Att_quat = shuffle(
            x_acc, x_gyro, x_fs, Att_quat)
        accl = Input((3))
        gyrol = Input((3))
        fsl = Input((1))

        Acc = Reshape((3, 1))(accl)
        Gyro = Reshape((3, 1))(gyrol)

        ALSTM = LSTM(config["ALSTM"], return_sequences=True)(Acc)
        if config["ALSTM_num"] > 1:
            for i in range(config["ALSTM_num"]-1):
                ALSTM = LSTM(config["ALSTM_%d" % i],
                             return_sequences=True)(ALSTM)
        GLSTM = LSTM(config["GLSTM"], return_sequences=True)(Gyro)
        if config["GLSTM_num"] > 1:
            for i in range(config["GLSTM_num"]-1):
                GLSTM = LSTM(config["GLSTM_%d" % i],
                             return_sequences=True)(GLSTM)

        fsdense = Dense(config["fsdense"])(fsl)

        AG = concatenate([ALSTM, GLSTM], axis=2)
        AG = Dropout(0.2)(AG)
        AG = LSTM(config["AGLSTM"], return_sequences=True)(AG)
        AG = Dropout(0.2)(AG)
        AGDense = Dense(config["AGDense"])(AG)
        AGDense = Flatten()(AGDense)
        AGF = concatenate([AGDense, fsdense], axis=1)
        quat_p = Dense(4, activation="linear")(AGF)
        model = Model(inputs=[accl, gyrol, fsl], outputs=quat_p)
        model.compile(optimizer=keras.optimizers.Adam(
            learning_rate=config['lr']),
            loss=QQuat_mult,
            metrics=[Quat_error_angle]
        )
        return model

And the PBT parameters are as follows

perturbation_interval = 6
    pbt = PopulationBasedTraining(
        perturbation_interval=perturbation_interval,
        hyperparam_mutations={
            # "dropout": lambda: np.random.uniform(0, 1),
            "lr": lambda: np.random.uniform(0.00001, 0.1),
            "AGDense": tune.choice(mp.arange(32, 256, 5)),
            "AGLSTM": tune.choice(mp.arange(32, 256, 5)),
            "ALSTM": tune.choice(mp.arange(32, 256, 5)),
            "GLSTM": tune.choice(mp.arange(32, 256, 5)),
            "fsdense": tune.choice(mp.arange(32, 256, 5)),
            "ALSTM_num": tune.choice([0, 1, 2]),
            "ALSTM_0": tune.choice(mp.arange(32, 256, 5)),
            "ALSTM_1": tune.choice(mp.arange(32, 256, 5)),
            "GLSTM_num": tune.choice([0, 1, 2]),
            "GLSTM_0": tune.choice(mp.arange(32, 256, 5)),
            "GLSTM_1": tune.choice(mp.arange(32, 256, 5)),
        },
        synch=False,
    )
    resources_per_trial = {"cpu": 4, "gpu": 1}
    tuner = tune.Tuner(
        tune.with_resources(
            BroadModel,
            resources=resources_per_trial),
        run_config=air.RunConfig(
            name="BroadPBT",
            stop={"ErrorAngle": 5}
        ),
        tune_config=tune.TuneConfig(
            reuse_actors=False,
            scheduler=pbt,
            metric="ErrorAngle",
            # mode is a f
            mode="min",
            num_samples=2,



        ),
        param_space={
            "finish_fast": True,
            "batch_size": 500,
            "epochs": 1,
            "dropout": 0.2,
            "lr": lambda: np.random.uniform(0.00001, 0.1),
            "AGDense": tune.choice(mp.arange(32, 256, 5)),
            "AGLSTM": tune.choice(mp.arange(32, 256, 5)),
            "ALSTM": tune.choice(mp.arange(32, 256, 5)),
            "GLSTM": tune.choice(mp.arange(32, 256, 5)),
            "fsdense": tune.choice(mp.arange(32, 256, 5)),
            "ALSTM_num": tune.choice([0, 1, 2]),
            "ALSTM_0": tune.choice(mp.arange(32, 256, 5)),
            "ALSTM_1": tune.choice(mp.arange(32, 256, 5)),
            "GLSTM_num": tune.choice([0, 1, 2]),
            "GLSTM_0": tune.choice(mp.arange(32, 256, 5)),
            "GLSTM_1": tune.choice(mp.arange(32, 256, 5)),

        },
    )
    pbt_results = tuner.fit()

The results are far from the optimal value, have I done anything wrong?

0

There are 0 best solutions below