Deep Learning商户续约预测(KNN模型)

代码实现如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split,cross_val_score

from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,recall_score,f1_score,make_scorer

import seaborn as sns

#解决中文乱码问题

plt.rcParams['font.sans-serif'] = ['FZSongYi-Z13S']

#解决负号无法正常显示的问题

plt.rcParams['axes.unicode_minus']=False

1
2
3
4
data=pd.read_csv("华南地区.csv")

data

1
2
3
4
5
6
7
8
9
10
11
12
#划分训练集与测试集

x=data[["注册时长","营收收入","成本"]]

y=data[["是否续约"]]

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)

print(len(x_train))

print(len(x_test))

1
2
3
4
5
6
7
8
9
10
#创建KNN模型

knnModel=KNeighborsClassifier(n_neighbors=3) #把K设置为3

knnModel.fit(x_train,y_train)

score=knnModel.score(x_test,y_test) #模型得分

print("模型得分:{}".format(score))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#模型评估

#使用测试集预测结果

y_test_predict=knnModel.predict(x_test)

y_test_predict

#混淆矩阵

c_matrix=confusion_matrix(y_true=y_test,y_pred=y_test_predict,labels=["续约","不续约"])

sns.heatmap(c_matrix,annot=True) #用热力图展示混淆矩阵

#计算准确率

acc=accuracy_score(y_true=y_test,y_pred=y_test_predict)

#计算精确率

precision=precision_score(y_true=y_test,y_pred=y_test_predict,pos_label="续约")

#计算召回率

recall=recall_score(y_true=y_test,y_pred=y_test_predict,pos_label="续约")

#计算f1

F1=f1_score(y_true=y_test,y_pred=y_test_predict,pos_label="续约")

print("准确率:{},精确率:{},召回率:{},F1:{}".format(acc,precision,recall,F1))

1
2
3
4
5
6
7
8
9
10
11
12
#K折交叉验证

#10次的得分,默认得分是计算准确度

kcvs=cross_val_score(knnModel,x,y,cv=10) #默认得分是计算准确度

kcvs

kcvs_mean=kcvs.mean() #求出10次得分的平均值

kcvs_mean

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#保存K

ks=[]

#保存准确率

acc_means=[]

#精确率

precision_means=[]

#召回率

recall_means=[]

#F1

f1_means=[]

for k in range(2,30):

ks.append(k) #把当前k值加入数组ks

knnModel=KNeighborsClassifier(n_neighbors=k) #以当前k值生成KNN模型

#计算10折交叉验证的准确率

acc_kcvs=cross_val_score(

knnModel,x,y,cv=10,scoring=make_scorer(accuracy_score)

)

#将10折交叉验证的准确率的均值保存起来

acc_means.append(acc_kcvs.mean())



#计算10折交叉验证的精确率

precision_kcvs=cross_val_score(

knnModel,x,y,cv=10,scoring=make_scorer(precision_score,pos_label="续约")

)

#将10折交叉验证的精确率的均值保存起来

precision_means.append(precision_kcvs.mean())



#计算10折交叉验证的召回率

recall_kcvs=cross_val_score(

knnModel,x,y,cv=10,scoring=make_scorer(recall_score,pos_label="续约")

)

#将10折交叉验证的召回率的均值保存起来

recall_means.append(recall_kcvs.mean())



#计算10折交叉验证的F1

f1_kcvs=cross_val_score(

knnModel,x,y,cv=10,scoring=make_scorer(f1_score,pos_label="续约")

)

#将10折交叉验证的F1的均值保存起来

f1_means.append(f1_kcvs.mean())

#生成对应的评分模型

score=pd.DataFrame({

"k":ks,

"precision":precision_means,

"acc":acc_means,

"recall":recall_means,

"f1":f1_means

})

#figsize=(10,5)设定画布大小

score.plot(x="k",y=["precision",'acc',"recall","f1"],figsize=(10,5))

#设定x轴刻度

plt.xticks(range(2,30))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#预测未知数据

knnModel=KNeighborsClassifier(n_neighbors=17)#使用K=17创建KNN模型

knnModel.fit(x,y)#使用华南数据进行训练

#使用华北数据进行预测

data_north=pd.read_csv("华北地区.csv")

print(data_north.describe())

data_north["预测预约"]=knnModel.predict(data_north[["注册时长","营收收入","成本"]]) #预测是否续约

data_north

尾巴