风控IV、AUC、KS python计算代码

时间:2024-10-30 07:01:31

一、IV计算代码

  1. def cal_iv(df, label_col, feat_cols, bin=10):
  2.         eps = 0.0000000000001
  3.         target = label_col
  4.         re_list = []
  5.         for col in feat_cols:
  6.             ivs = []
  7.             df2 = df[[col, label_col]]
  8.             df2.dropna()
  9.             if len(df2[col].drop_duplicates()) < 10:
  10.                 continue
  11.             else:
  12.                 df2 = df2.sort_values(by=col, ascending=True)
  13.                 count_0 = float(df2[df2[target] == 0].shape[0])
  14.                 count_1 = float(df2[df2[target] == 1].shape[0])
  15.                 df2['indexn'] = range(len(df2))
  16.                 divs = int(len(df2) / bin)
  17.                 up = (len(df2) / divs)
  18.                 df2['group'] = [int(item / divs) for item in df2['indexn']]
  19.                 df3 = df2[['group', target]]
  20.                 tmpdata = []
  21.                 for i in range(up):
  22.                     df4 = df3[df3['group'] == i]
  23.                     yi = df4[df4[target] == 1].shape[0] + eps
  24.                     (df4)
  25.                     if yi<1:
  26.                         continue
  27.                     else:
  28.                         df4 = (tmpdata,axis=0)
  29.                         tmpdata = []
  30.                     ni = df4[df4[target] == 0].shape[0] + eps
  31.                     iv = (yi / count_1 - ni / count_0) * ((yi / count_1) / (ni / count_0))
  32.                     if iv > 1:
  33.                         print('group',i, df4.shape[0], df4[target].mean())
  34.                     (iv)
  35.             iv = round(sum(ivs), 3)
  36.             re_list.append({'feature': col, 'iv': iv})
  37.         df_re = (re_list)[['feature', 'iv']]
  38.         return df_re.sort_values(by='iv', ascending=False)

方法调用:

cal_iv(df_temp, Y, feas_list, bin=10)

df_temp:数据集,Y:是否逾期标签,feas_list:需要计算IV的 变量

2、 按月和不同Y 下的 IV计算

  1. def iv_distr_v2(df, flag, feas=feas_list):
  2. df_mob_iv = ()
  3. # 循环不同的 Y
  4. for i in ['mob1_15','mob1_30','mob2_30','mob3_30']:
  5. print(i)
  6. df_temp1 = df[df['{}'.format(i)].notnull()]
  7. for j in list(df_temp1[flag].unique()):
  8. df_temp2 = df_temp1.loc[df_temp1[flag] == j]
  9. print(df_temp1.shape,df_temp2.shape)
  10. iv_temp1 = cal_iv(df_temp2, '{}'.format(i), feas_list, bin=10)
  11. iv_temp1['month'] = j
  12. iv_temp1['Y'] = i
  13. df_mob_iv = ([df_mob_iv, iv_temp1], axis=0)
  14. return df_mob_iv

调用代码:

iv_all = iv_distr_v2(df, flag= 'month',feas=feas_list, )

二 、KS、AUC调用代码,只对 值越大逾期越高的分数有效

import pandas as pd
import numpy as np
from import roc_auc_score,roc_curve

## 统计auc
def cal_auc(df, y_true, y_prob):
    try:
        return roc_auc_score(df[y_true], df[y_prob])
    except:
        return

## 统计ks
def cal_ks(df, y_true, y_prob):
    try:
        fpr, tpr, thre_ = roc_curve(df[y_true], df[y_prob])
        ks = max(tpr - fpr)
        return ks
    except:
        return