I am applying a randomized search on hyper parameters of anSGDClassifier. However, I am not sure why randomizedsearch_estimator.fit(x_train, y_train) is not outputting correct values.
from constants import (SPLITS_NUM, SEED, N_JOBS, PROBLEM_METRIC)
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import (KFold, RandomizedSearchCV)
from scipy.stats import (randint, uniform)
def randomized_search(estimator, param_distributions, x_train, y_train,
x_validation, y_validation):
kfold = KFold(n_splits=SPLITS_NUM, shuffle=True, random_state=SEED)
randomizedsearch_estimator = RandomizedSearchCV(estimator,
param_distributions,
cv=kfold,
return_train_score=True,
n_jobs=N_JOBS,
scoring=PROBLEM_METRIC)
search = randomizedsearch_estimator.fit(x_train, y_train)
print(f"Best estimator:\n{search.best_estimator_} \
\nBest parameters:\n{search.best_params_} \
\nBest cross-validation score: {search.best_score_:.3f} \
\nBest test score: {search.score(x_validation, y_validation):.3f}\n\n")
def searching_list():
return [(SGDClassifier(random_state=SEED, learning_rate='optimal', class_weight='balanced'), {
'alpha': uniform(0.15, 0.25),
'l1_ratio': uniform(0.002, 0.008),
'max_iter': randint(45000, 55000),
'tol': uniform(0.04, 0.12),
'epsilon': uniform(45000, 55000),
'power_t': uniform(-100000, -50000),
'loss': [
'hinge', 'log_loss', 'modified_huber', 'squared_hinge', 'perceptron', 'squared_error',
'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'
],
'penalty': ['l2', 'l1', 'elasticnet']
})]
def parameter_initializer(features_train, target_train, features_validation,
target_validation):
for model, distribution in searching_list():
randomized_search(model, distribution, features_train, target_train, features_validation,
target_validation)
Traceback (most recent call last):
File "c:\Users\username\Desktop\some-calculator\graph-analyzer\graph_analyzer.py", line 197, in <module>
main()
File "c:\Users\username\Desktop\some-calculator\graph-analyzer\graph_analyzer.py", line 191, in main
predicted_class = node_class_predictor(new_graph)
File "c:\Users\username\Desktop\some-calculator\graph-analyzer\utilities_module.py", line 3379, in node_class_predictor
x_test, y_test, clf = custom_classifier(emb_df)
File "c:\Users\username\Desktop\some-calculator\graph-analyzer\utilities_module.py", line 3337, in custom_classifier
parameter_initializer(x_train, y_train, x_test, y_test)
File "c:\Users\username\Desktop\some-calculator\graph-analyzer\utilities_module.py", line 3129, in parameter_initializer
randomized_search(model, distribution, features_train, target_train, features_validation,
File "c:\Users\username\Desktop\some-calculator\graph-analyzer\utilities_module.py", line 3079, in randomized_search
search = randomizedsearch_estimator.fit(x_train, y_train)
File "C:\ProgramData\Anaconda3\envs\tf\lib\site-packages\sklearn\model_selection\_search.py", line 875, in fit
self._run_search(evaluate_candidates)
File "C:\ProgramData\Anaconda3\envs\tf\lib\site-packages\sklearn\model_selection\_search.py", line 1749, in _run_search
evaluate_candidates(
File "C:\ProgramData\Anaconda3\envs\tf\lib\site-packages\sklearn\model_selection\_search.py", line 811, in evaluate_candidatesates
candidate_params = list(candidate_params)
File "C:\ProgramData\Anaconda3\envs\tf\lib\site-packages\sklearn\model_selection\_search.py", line 324, in __iter__
params[k] = v.rvs(random_state=rng)
File "C:\ProgramData\Anaconda3\envs\tf\lib\site-packages\scipy\stats\_distn_infrastructure.py", line 473, in rvs
return self.dist.rvs(*self.args, **kwds)
File "C:\ProgramData\Anaconda3\envs\tf\lib\site-packages\scipy\stats\_distn_infrastructure.py", line 1068, in rvs
raise ValueError("Domain error in arguments.")
ValueError: Domain error in arguments.
when i got domain error it was due to me entering hyperparameter ranges in reverse order e.g., optimize__lr:reciprocal(3e-3,3e-4) and when changed to reciprocal(3e-4,3e-1) it was solved. so i suggest tinkering with the hyperparameter values and reversing them