ML Flow experiement remaining time?

18 Views Asked by At

I am fitting a imblearn pipeline using Random undersampler, Feature selector and MLPClassifier on my data in databricks. Is there a way to know how much time will it take for fit to complete?

data specs - 10M rows, 5K unique features trimmed down with selectKBest to below parameter grid. Total 108 fits as per gridSearch.

param_grid = [
    {
        'fs__k' : [500,1000,1500],
        'mlp__hidden_layer_sizes': [(100,50,25,10,5),(200,50,10,5,2)],
        'mlp__activation': ['relu'],
        'mlp__solver': ['adam'],
        'mlp__alpha': np.logspace(-1, 3, 6) 
    }
]

experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)
print("Name: {}".format(experiment.name))
print("Experiment_id: {}".format(experiment.experiment_id))
print("Artifact Location: {}".format(experiment.artifact_location))
print("Tags: {}".format(experiment.tags))
print("Lifecycle_stage: {}".format(experiment.lifecycle_stage))
print("Creation timestamp: {}".format(experiment.creation_time))

with mlflow.start_run(run_name=RUN_NAME):

    #define content in pipe
    random_state = 45
    rus = RandomUnderSampler(sampling_strategy='majority',replacement=False ,random_state=random_state)
    fs = SelectKBest(score_func=mutual_info_classif)
    mlp = MLPClassifier(early_stopping=True, verbose=True)

    basepipe = Pipeline([('rus',rus),('fs',fs),('mlp',mlp)], verbose=True)
    sampled_data, sampled_y_train = basepipe.named_steps['rus'].fit_resample(X=x_train_transformed,y=y_train)
    sampled_data = basepipe.named_steps['fs'].fit_transform(X=sampled_data,y=sampled_y_train)

    #test_data
    sampled_data_test = basepipe.named_steps['fs'].transform(X=x_test_transformed)

    #fit
    mlp_grid = GridSearchCV(basepipe, param_grid=param_grid, scoring=scorer, n_jobs=-1, cv=3,verbose=5)
    print("Fitting the Grid and Estimator")
    mlp_grid.fit(X=x_train_transformed,y=y_train)

    #predict
    print("Prediction on test set with best fitted estimator")
    y_pred_mlp = mlp_grid.predict(x_test_transformed)
    
    #score
    class_report_mlp = metrics.classification_report(y_true=y_test, y_pred=y_pred_mlp)
    print("Classification Report MLP:\n", class_report_mlp)
    best_score_mlp = metrics.precision_score(y_test, y_pred_mlp, labels = ['0'], pos_label='0')
    print("Best Score MLP :\n", best_score_mlp, '\n', "*"*80)
    
    #mlflow log
    mlflow.log_param('training data shape',x_train_transformed.shape)
    mlflow.log_param('test data shape',x_test_transformed.shape)
    mlflow.log_param('cat_features', CAT_COL)
    mlflow.log_param('num_features', NUM_COL)
    mlflow.log_param('multi_cat_features', MULTI_LABEL_COL)
    mlflow.log_param('best_features_no', mlp_grid.best_params_['fs__k'])
    mlflow.log_param('best_layer', mlp_grid.best_params_['mlp__hidden_layer_sizes'])
    mlflow.log_param('best_activation', mlp_grid.best_params_['mlp__activation'])
    mlflow.log_param('best_solver', mlp_grid.best_params_['mlp__solver'])
    mlflow.log_param('best_alpha', mlp_grid.best_params_['mlp__alpha'])
    mlflow.log_metric('best_precision_minority_class', best_score_mlp)
    mlflow.sklearn.log_model(mlp_grid.best_estimator_,"best_model")
    mlflow.end_run()

0

There are 0 best solutions below