python实现KS曲线,相关使用方法请参考上篇博客-R语言实现KS曲线
代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
####################### PlotKS ##########################
def PlotKS(preds, labels, n, asc):
# preds is score: asc=1
# preds is prob: asc=0
pred = preds # 预测值
bad = labels # 取1为bad, 0为good
ksds = DataFrame({ 'bad' : bad, 'pred' : pred})
ksds[ 'good' ] = 1 - ksds.bad
if asc = = 1 :
ksds1 = ksds.sort_values(by = [ 'pred' , 'bad' ], ascending = [ True , True ])
elif asc = = 0 :
ksds1 = ksds.sort_values(by = [ 'pred' , 'bad' ], ascending = [ False , True ])
ksds1.index = range ( len (ksds1.pred))
ksds1[ 'cumsum_good1' ] = 1.0 * ksds1.good.cumsum() / sum (ksds1.good)
ksds1[ 'cumsum_bad1' ] = 1.0 * ksds1.bad.cumsum() / sum (ksds1.bad)
if asc = = 1 :
ksds2 = ksds.sort_values(by = [ 'pred' , 'bad' ], ascending = [ True , False ])
elif asc = = 0 :
ksds2 = ksds.sort_values(by = [ 'pred' , 'bad' ], ascending = [ False , False ])
ksds2.index = range ( len (ksds2.pred))
ksds2[ 'cumsum_good2' ] = 1.0 * ksds2.good.cumsum() / sum (ksds2.good)
ksds2[ 'cumsum_bad2' ] = 1.0 * ksds2.bad.cumsum() / sum (ksds2.bad)
# ksds1 ksds2 -> average
ksds = ksds1[[ 'cumsum_good1' , 'cumsum_bad1' ]]
ksds[ 'cumsum_good2' ] = ksds2[ 'cumsum_good2' ]
ksds[ 'cumsum_bad2' ] = ksds2[ 'cumsum_bad2' ]
ksds[ 'cumsum_good' ] = (ksds[ 'cumsum_good1' ] + ksds[ 'cumsum_good2' ]) / 2
ksds[ 'cumsum_bad' ] = (ksds[ 'cumsum_bad1' ] + ksds[ 'cumsum_bad2' ]) / 2
# ks
ksds[ 'ks' ] = ksds[ 'cumsum_bad' ] - ksds[ 'cumsum_good' ]
ksds[ 'tile0' ] = range ( 1 , len (ksds.ks) + 1 )
ksds[ 'tile' ] = 1.0 * ksds[ 'tile0' ] / len (ksds[ 'tile0' ])
qe = list (np.arange( 0 , 1 , 1.0 / n))
qe.append( 1 )
qe = qe[ 1 :]
ks_index = Series(ksds.index)
ks_index = ks_index.quantile(q = qe)
ks_index = np.ceil(ks_index).astype( int )
ks_index = list (ks_index)
ksds = ksds.loc[ks_index]
ksds = ksds[[ 'tile' , 'cumsum_good' , 'cumsum_bad' , 'ks' ]]
ksds0 = np.array([[ 0 , 0 , 0 , 0 ]])
ksds = np.concatenate([ksds0, ksds], axis = 0 )
ksds = DataFrame(ksds, columns = [ 'tile' , 'cumsum_good' , 'cumsum_bad' , 'ks' ])
ks_value = ksds.ks. max ()
ks_pop = ksds.tile[ksds.ks.idxmax()]
print ( 'ks_value is ' + str (np. round (ks_value, 4 )) + ' at pop = ' + str (np. round (ks_pop, 4 )))
# chart
plt.plot(ksds.tile, ksds.cumsum_good, label = 'cum_good' ,
color = 'blue' , linestyle = '-' , linewidth = 2 )
plt.plot(ksds.tile, ksds.cumsum_bad, label = 'cum_bad' ,
color = 'red' , linestyle = '-' , linewidth = 2 )
plt.plot(ksds.tile, ksds.ks, label = 'ks' ,
color = 'green' , linestyle = '-' , linewidth = 2 )
plt.axvline(ks_pop, color = 'gray' , linestyle = '--' )
plt.axhline(ks_value, color = 'green' , linestyle = '--' )
plt.axhline(ksds.loc[ksds.ks.idxmax(), 'cumsum_good' ], color = 'blue' , linestyle = '--' )
plt.axhline(ksds.loc[ksds.ks.idxmax(), 'cumsum_bad' ], color = 'red' , linestyle = '--' )
plt.title( 'KS=%s ' % np. round (ks_value, 4 ) +
'at Pop=%s' % np. round (ks_pop, 4 ), fontsize = 15 )
return ksds
####################### over ##########################
|
作图效果如下:
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。
原文链接:https://www.cnblogs.com/bigdatafengkong/p/9079093.html