-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_lgbm_optuna.py
96 lines (80 loc) · 2.58 KB
/
run_lgbm_optuna.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import argparse
from datetime import datetime as dt
from lightgbm import LGBMRegressor
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.model_selection import KFold, learning_curve
import yaml
# from models import lgbm as my_lgbm
from cv import r2_cv
from param_tuning.optimizer import LGBMRegressorOptimizer
from preprocessing import load_x, load_y
from utils import print_exit, print_float
# Don't define any function in this file,
# thus don't define main function.
# use var `now` in config file and submit file.
now = dt.now().strftime('%Y-%m-%d-%H-%M-%S')
parser = argparse.ArgumentParser()
parser.add_argument('--config', default='./configs/default.yml')
options = parser.parse_args()
with open(options.config, 'r') as file:
config = yaml.safe_load(file)
features = config['extracted_features']
col_id_name = config['col_id_name']
col_target_name = config['col_target_name']
dropped_ids = config['dropped_ids']
lgbm_params = config['lgbm_params']
n_folds = config['cv']['n_folds']
param_candidates = config['lgbm_params']['candidates']
Xs = load_x(features, dropped_ids)
X_train_all = Xs['train']
X_test = Xs['test']
y_train_all = load_y(col_id_name, col_target_name, dropped_ids)
reg_params = lgbm_params['instance']
regressor_with_param_candidates = LGBMRegressor(
random_state=reg_params['random_state'],
silent=reg_params['silent'],
)
optimizer = LGBMRegressorOptimizer(
regressor_with_param_candidates,
X_train_all,
y_train_all,
n_folds,
param_candidates
)
best_params = optimizer.optimize()
regressor_with_optimized_params = LGBMRegressor(
boosting_type=best_params['boosting_type'],
learning_rate=best_params['learning_rate'],
lambda_l1=best_params['lambda_l1'],
lambda_l2=best_params['lambda_l2'],
# default params
random_state=reg_params['random_state'],
silent=reg_params['silent'],
)
# Train
regressor_with_optimized_params.fit(X_train_all, y_train_all)
# Predict
y_pred_logarithmic = regressor_with_optimized_params.predict(X_test)
y_pred = np.expm1(y_pred_logarithmic)
# Evaluate
scores = r2_cv(regressor_with_optimized_params, X_train_all, y_train_all, n_folds)
score = scores.mean()
sub_df = pd.DataFrame(
pd.read_feather('data/input/test.feather')[col_id_name]
)
sub_df[col_target_name] = y_pred
sub_df.to_csv(
'./data/output/sub_{time}_{score:.5f}.csv'.format(
time=now,
score=score,
),
index=False
)
config_file_name = './configs/{time}_{score:.5f}.yml'.format(
time=now,
score=score,
)
with open(config_file_name, 'w') as file:
yaml.dump(config, file)