核心函数
def train_model(x_train, y_train, x_test, model_name, cv_state=True):
'''
Parameters
----------
x_train : 训练集 x, np.array类型二维数组, [samples_train,features_train]
y_train : 训练集 y np.array类型一维数组, [samples_train]
x_test : 测试集 x np.array类型二维数组, [samples_test,features_test]
model_name : 选用什么模型 :random forest / XGB
cv_state: 是否需要模型因子挑选
Returns
-------
y_predictoftest: 根据测试集 x 得到的预报测试集y
'''
if cv_state: #cv_state=True 则使用gridsearchCv 挑选最优参数
if model_name == 'random forest':
model = RandomForestRegressor(random_state=1, criterion='mse')
# 挑选参数
paremeters = [{"max_features": range(1, 32, 3),
"min_samples_leaf": range(1, 20, 3),
"max_depth": range(1, 20, 3)
}]
grid = GridSearchCV(model, paremeters, cv=10, scoring="neg_mean_squared_error",verbose=10)
grid.fit(x_train, y_train)
print('best_params_=', grid.best_params_)
print('best_score_=', grid.best_score_)
model = RandomForestRegressor(random_state=1, criterion='mse',
max_features=grid.best_params_['max_features'],
min_samples_leaf=grid.best_params_['min_samples_leaf'],
max_depth=grid.best_params_['max_depth'])
elif model_name == 'XGB':
model = xgb.XGBRegressor(random_state=1)
# 挑选参数
parameters = [{"eta": [0.3, 0.2, 0.1],
"max_depth": [3, 5, 6, 10, 20],
"n_estimators": [100, 200, 500],
'gamma': [0, 0.1, 0.2, 0.5, 1]
}]
grid = GridSearchCV(model, parameters, cv=10, scoring="neg_mean_squared_error",verbose=10)
grid.fit(x_train, y_train)
print('best_params_=', grid.best_params_)
print('best_score_=', grid.best_score_)
model = xgb.XGBRegressor(random_state=1,
eta=grid.best_params_['eta'],
max_depth=grid.best_params_['max_depth'],
n_estimators=grid.best_params_['n_estimators'],
gamma=grid.best_params_['gamma'])
else: #cv_state=False 则根据自己需要修改模型参数后直接推理
if model_name == 'random forest':
model = RandomForestRegressor(random_state=1, criterion='mse', max_depth=7, max_features=31,
min_samples_leaf=10) # random_state=1,criterion='mse',max_depth=7,max_features=31,min_samples_leaf=10
elif model_name == 'XGB':
#model = xgb.XGBRegressor(random_state=1, learning_rate=0.1, max_depth=2, n_estimators=100)
model = xgb.XGBRegressor(random_state=1, gamma=0.1, max_depth=3, n_estimators=100)
regr = model.fit(x_train, y_train)
y_predictoftest = regr.predict(x_test)
return y_predictoftest
在调用train_model前你需要额外做的一些事情:
1. 加载必要的库:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
from sklearn.model_selection import StratifiedKFold
2. 根据自己的数据处理数据
x_train,y_train, x_test,y_test, 类型描述可看train_model中描述
3. 调用函数
y_predictoftest=train_model(x_train,y_train, x_test,'XGB',cv_state=True)
如果想调用XGB,就用’XGB’;
如果想调用random forest,就用’random forest’
cv_state: 是否需要GridSearchCV进行调参。
4. 如果想自己调参,可以参阅官网
scikit-learn 官网Random Forest 部分:
分类器:
sklearn.ensemble.RandomForestClassifier — scikit-learn 1.1.1 documentation
回归器:
sklearn.ensemble.RandomForestRegressor — scikit-learn 1.1.1 documentation
XGBoost :
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。
文章由极客之音整理,本文链接:https://www.bmabk.com/index.php/post/73506.html