1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
| X_train, X_test, y_train, y_test = train_test_split(df_x,
df_y,
test_size=0.3,
random_state=7) #7
def R_2(y, y_pred):
y_mean = mean(y)
sst = sum([(x-y_mean)**2 for x in y])
ssr = sum([(x-y_mean)**2 for x in y_pred])
sse = sum([(x-y)**2 for x,y in zip(y_pred, y)])
return 1-sse/sst
def xgboost_plot(i = '数值', n=0, y_train, y_test, x_train, x_test, model_output = False, m=False, scale = False):
# i 为输出变量名称,可以进行修改
# n为输出变量在df_y中第几列,默认是第一列
# model_ouput是否返回model
# m是否改变模型参数
# scale是否对特征值进行标准化
scaler = StandardScaler()
if scale == True:
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
if m == True:
xgb = XGBRegressor(n_estimators=1000, learning_rate=0.05, n_jobs = 4)
y_train = y_train.iloc[:, n]
y_test = y_test.iloc[:, n]
model_xgb = xgb.fit(x_train, y_train, early_stopping_rounds=5,
eval_set=[(x_test, y_test)], verbose=False)
else:
xgb = XGBRegressor()
y_train = y_train.iloc[:, n]
y_test = y_test.iloc[:, n]
model_xgb = xgb.fit(x_train, y_train) # 是否使用标准化,xgboost 结果都一样
y_pred = model_xgb.predict(x_test)
y_pred_train = model_xgb.predict(x_train)
predictions = [round(value) for value in y_pred]
plt.figure(figsize=(30,9),dpi = 200)
plt.subplot(1,2,1)
ls_x_train = [x for x in range(1, len(y_pred_train.tolist())+1)]
plt.plot(ls_x_train, y_pred_train.tolist(), label = '训练集的预测值' , marker = 'o')
plt.plot(ls_x_train, y_train.tolist(), label = '训练集的真实值',linestyle='--', marker = 'o' )
plt.ylabel(i, fontsize = 15)
plt.legend(fontsize = 15)
plt.xticks(fontsize = 12)
plt.yticks(fontsize = 12)
plt.subplot(1,2,2)
ls_x = [x for x in range(1, len(y_pred.tolist())+1)]
plt.plot(ls_x, y_pred.tolist(), label = '验证集的预测值' , marker = 'o')
plt.plot(ls_x, y_test.tolist(), label = '验证集的真实值',linestyle='--',marker = 'o')
plt.ylabel(i, fontsize = 15)
plt.xticks(fontsize = 12)
plt.yticks(fontsize = 12)
plt.legend(fontsize = 15)
# 绘制特征值图
plot_importance(model_xgb)
plt.show()
r2_train = R_2(y_train, y_pred_train)
r2_test = R_2(y_test, y_pred)
print([r2_train, r2_test])
if model_output==True:
return model_xgb
|