I'm programming on python multivariate regression models where i need to use regularization. I'm using sklearn functions Ridge, Lasso, ElasticNet y HuberRegressor with GridSearchCV to find the best parameters of fitting, the i extract that best parameters (last 3 lines of the code). But now, with the fitted model, i have two main doubts:
- how can i predict new data only with the fitted parameters? Is it the same of OLS linear regression, and i multiply the coefficients with the data? Note: i need to store the information of the fitted model in SQL and then use this in other notebooks.
- Do I have to normalize data before doing this?, i normalize the data before fit the model (Normalize=True). To do this, do i need to store the mean and standard deviation of the data used to fit the model, and then normalize the new data with this?
Python's script
for Reg_model in ['Ridge','Lasso','ElasticNet','HuberRegressor']: #,'SCAD'
if Reg_model == 'Ridge':
#I need normalize data to do ridge, will normalize=True
model = Ridge(normalize = True)
param_grid = {'alpha': np.logspace(-5, 3, 10)}
model = GridSearchCV(estimator = model, param_grid = param_grid,scoring = 'neg_root_mean_squared_error',cv=time_split,verbose= 0, refit=True)
#We cand add this to GridSearchCV n_jobs = multiprocessing.cpu_count() - 1, return_train_score = True
elif Reg_model == 'Lasso':
#I need normalize data to do Lasso, will normalize=True
model = Lasso(normalize = True)
param_grid = {'alpha': np.logspace(-5, 3, 10)}
model = GridSearchCV(estimator = model, param_grid = param_grid,scoring = 'neg_root_mean_squared_error',cv=time_split,verbose= 0, refit=True)
#We cand add this to GridSearchCV n_jobs = multiprocessing.cpu_count() - 1, return_train_score = True
elif Reg_model == 'ElasticNet':
#I need normalize data to do ElasticNet, will normalize=True
model = ElasticNet(normalize = True)
param_grid = {'alpha': np.logspace(-5, 3, 10), 'l1_ratio': np.logspace(0, 3, 10)}
model = GridSearchCV(estimator = model, param_grid = param_grid,scoring = 'neg_root_mean_squared_error',cv=time_split,verbose= 0, refit=True)
#We cand add this to GridSearchCV n_jobs = multiprocessing.cpu_count() - 1, return_train_score = True
elif Reg_model == 'HuberRegressor':
#I need normalize data to do HubberRegressor, will normalize=True
model = HuberRegressor()
param_grid = {'alpha': np.logspace(-5, 3, 10), 'epsilon': np.logspace(0, 3, 10)}
model = GridSearchCV(estimator = model, param_grid = param_grid,scoring = 'neg_root_mean_squared_error',cv=time_split,verbose= 0, refit=True)
#We cand add this to GridSearchCV n_jobs = multiprocessing.cpu_count() - 1, return_train_score = True
#print(model.outliers_) This Attribute belongs to HuberRegressor
model.fit(X = X_train, y = y_train)
print(model.best_estimator_)
print(model.best_estimator_.coef_)
print(model.best_estimator_.intercept_)