风控IV、AUC、KS python计算代码

一、IV计算代码




def cal_iv(df, label_col, feat_cols, bin=10):



        eps = 0.0000000000001




        target = label_col




        re_list = []



        for col in feat_cols:



            ivs = []



            df2 = df[[col, label_col]]



            df2.dropna()




            if len(df2[col].drop_duplicates()) < 10:



                continue




            else:



                df2 = df2.sort_values(by=col, ascending=True)



                count_0 = float(df2[df2[target] == 0].shape[0])



                count_1 = float(df2[df2[target] == 1].shape[0])



                df2['indexn'] = range(len(df2))



                divs = int(len(df2) / bin)



                up = (len(df2) / divs)



                df2['group'] = [int(item / divs) for item in df2['indexn']]



                df3 = df2[['group', target]]



                tmpdata = []



                for i in range(up):



                    df4 = df3[df3['group'] == i]



                    yi = df4[df4[target] == 1].shape[0] + eps



                    (df4)



                    if yi<1:



                        continue




                    else:



                        df4 = (tmpdata,axis=0)



                        tmpdata = []



                    ni = df4[df4[target] == 0].shape[0] + eps



                    iv = (yi / count_1 - ni / count_0) * ((yi / count_1) / (ni / count_0))



                    if iv > 1:



                        print('group',i, df4.shape[0], df4[target].mean())



                    (iv)



            iv = round(sum(ivs), 3)



            re_list.append({'feature': col, 'iv': iv})



        df_re = (re_list)[['feature', 'iv']]



        return df_re.sort_values(by='iv', ascending=False)

方法调用：

cal_iv(df_temp, Y， feas_list, bin=10)

df_temp：数据集，Y：是否逾期标签，feas_list：需要计算IV的变量

2、按月和不同Y 下的 IV计算




def iv_distr_v2(df, flag,  feas=feas_list):



 



    df_mob_iv = ()



# 循环不同的 Y



    for i in  ['mob1_15','mob1_30','mob2_30','mob3_30']:



        print(i)



        df_temp1 = df[df['{}'.format(i)].notnull()]



        for j in list(df_temp1[flag].unique()):



            df_temp2 = df_temp1.loc[df_temp1[flag] == j]



            print(df_temp1.shape,df_temp2.shape)



            iv_temp1 = cal_iv(df_temp2, '{}'.format(i), feas_list, bin=10)



            iv_temp1['month'] = j



            iv_temp1['Y'] = i



 



            df_mob_iv = ([df_mob_iv, iv_temp1], axis=0)



 



    return df_mob_iv

调用代码：

iv_all = iv_distr_v2(df, flag= 'month',feas=feas_list, )

二、KS、AUC调用代码，只对值越大逾期越高的分数有效

import pandas as pd
import numpy as np
from import roc_auc_score,roc_curve

## 统计auc
def cal_auc(df, y_true, y_prob):
try:
return roc_auc_score(df[y_true], df[y_prob])
except:
return

## 统计ks
def cal_ks(df, y_true, y_prob):
try:
fpr, tpr, thre_ = roc_curve(df[y_true], df[y_prob])
ks = max(tpr - fpr)
return ks
except:
return

秒客网

风控IV、AUC、KS python计算代码

相关文章