-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhyperparam_optim.py
More file actions
106 lines (88 loc) · 4.53 KB
/
hyperparam_optim.py
File metadata and controls
106 lines (88 loc) · 4.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import pandas as pd
from typing import List, Dict
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import HistGradientBoostingRegressor
from main_funcs import cross_val_QWK
import optuna
import json
def objective(trial, reg : str, X : pd.DataFrame, y : pd.Series, num_cols : List[str], cat_cols : List[str], cv : int, verbose : bool=False) -> float:
'''
This function defines hyperparameter optimization using Optuna for a given classifier model.
Currently, the list of regressors include
(i) XGBoost
(ii) LightGBM
(iii) CatBoost
(iv) HistGradBoost
'''
if reg == 'XGBoost':
# Hyperparameters for XGBoost regressor
params = {
'objective' : trial.suggest_categorical('objective', ['reg:squarederror', 'reg:absoluteerror', 'reg:pseudohubererror']),
'n_estimators' : trial.suggest_int('n_estimators', 100, 500),
'max_depth' : trial.suggest_int('max_depth', 5, 10),
'learning_rate' : trial.suggest_float('learning_rate', 1e-3, 0.1, log=True),
'gamma' : trial.suggest_float('gamma', 0.0, 5.0),
'reg_alpha': trial.suggest_float('reg_alpha', 1, 5),
'reg_lambda': trial.suggest_float('reg_lambda', 1, 5),
'subsample' : trial.suggest_float('subsample', 0.5, 1.0),
'colsample_bytree' : trial.suggest_float('colsample_bytree', 0.5, 1.0)
}
model = XGBRegressor(**params)
elif reg == 'LightGBM':
# Hyperparameters for LightGBM regressor
params = {
'objective' : trial.suggest_categorical('objective', ['regression', 'poisson', 'quantile']),
'verbosity' : -1,
'n_estimators' : trial.suggest_int('n_estimators', 100, 500),
'max_depth' : trial.suggest_int('max_depth', 5, 10),
'learning_rate' : trial.suggest_float('learning_rate', 1e-3, 0.1, log=True),
'reg_alpha': trial.suggest_float('reg_alpha', 1, 5),
'reg_lambda': trial.suggest_float('reg_lambda', 1, 5),
'subsample' : trial.suggest_float('subsample', 0.5, 1.0),
'colsample_bytree' : trial.suggest_float('colsample_bytree', 0.5, 1.0)
}
model = LGBMRegressor(**params)
elif reg == 'CatBoost':
# Hyperparameters for CatBoost regressor
params = {
'objective' : trial.suggest_categorical('objective', ['RMSE', 'Poisson', 'Quantile']),
'iterations': trial.suggest_int('iterations', 200, 500),
'depth': trial.suggest_int('depth', 4, 10),
'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, log=True),
'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1, 5),
'subsample': trial.suggest_float('subsample', 0.5, 1.0)
}
model = CatBoostRegressor(**params, verbose=0)
elif reg == 'HistGradBoost':
# Hyperparameters for HistGradBoost regressor
params = {
'loss' : trial.suggest_categorical('loss', ['squared_error', 'absolute_error', 'poisson']),
'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, log=True),
'max_iter' : trial.suggest_int('max_iter', 100, 500),
'max_leaf_nodes' : trial.suggest_int('max_leaf_nodes', 25, 50),
'max_depth' : trial.suggest_int('max_depth', 5, 15),
'l2_regularization' : trial.suggest_float('l2_regularization', 1, 5),
'max_features' : trial.suggest_float('max_features', 0.5, 1.0)
}
model = HistGradientBoostingRegressor(**params)
else:
raise ValueError(f'Unsupported regressor type: {reg}')
val_QWK = cross_val_QWK(model, X, y, num_cols, cat_cols, cv, optimize_mode=True)
return val_QWK
def hyperparam_optim(reg : str, X : pd.DataFrame, y : pd.Series, num_cols : List[str], cat_cols : List[str], n_trials : int=50, cv : int=5) -> Dict[str, any]:
'''
This function performs hyperparameter optimization by using the objective function previously defined. The output returned is a json file
containing the best parameters.
'''
study = optuna.create_study(direction='maximize')
study.optimize(
lambda trial: objective(trial, reg, X, y, num_cols, cat_cols, cv),
n_trials=n_trials
)
print(f'Best parameters for {reg}: {study.best_params}')
print(f'Best QWK score: {study.best_value}')
with open('./model_hyperparameters/' + reg + '.txt', 'w') as f:
json.dump(study.best_params, f)
return study.best_params