量化交易之python篇 - 统计学习导论python版（第二章习题）

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_boston

from public_module.tqz_extern.tools.pandas_operator.pandas_operator import pandas

# 第二章第八题
def charpter_2_test8():
    college = pd.read_csv(r'data/College.csv')

    college = college.set_index(['Unnamed: 0'], append=True, verify_integrity=True)
    college.rename_axis([None, 'Name'], inplace=True)  # 将 值为None的列名 设置为Name

    # print(college.shape)  # 行列(777, 18)
    # print(college.describe())  # 矩阵简单描述

    college['Elite'] = np.where(college['Top10perc'] > 50, 'Yes', 'No')  # 新建一列Elite, 若Top10perc > 50 则设置为Yes 否则为No
    # print(college['Elite'].value_counts())  # 查看 Elite这一列 各个值 的 出现次数

    college['Enroll'] = pd.cut(college['Enroll'], bins=3, labels=['Low', 'Medium', 'High'])
    college['PhD'] = pd.cut(college['PhD'], bins=5, labels=['Very low', 'Low', 'Medium', 'High', 'Very High'])  # bins: 5 等分
    college['Terminal'] = pd.cut(college['Terminal'], bins=4, labels=['Very low', 'Low', 'High', 'Very High'])

    # Enroll | PhD | Terminal 列值的分布柱状图
    fig = plt.figure()

    plt.subplot(2, 2, 1)
    college['Enroll'].value_counts().plot.bar(title='Enroll')
    plt.subplot(2, 2, 2)
    college['PhD'].value_counts().plot.bar(title='PhD')
    plt.subplot(2, 2, 3)
    college['Terminal'].value_counts().plot.bar(title='Terminal')

    fig.subplots_adjust(hspace=1)

    plt.show()


def charpter_2_test9():
    auto = pd.read_csv(r'data/Auto.csv')

    # print(auto.nunique())  # 各列 不同值的个数;
    # print(auto.info())  # 简单看下各列的值是否为NAN

    auto['horsepower'] = auto['horsepower'].replace('?', np.nan)  # 将df中的 ? 用 np.nan 替代
    auto.dropna(inplace=True)

    info = auto.describe().T
    info['range'] = info['max'] - info['min']
    info = info[['mean', 'range', 'std']]


def charpter_2_test10():
    boston = pd.DataFrame(load_boston().data, columns=load_boston().feature_names)

    boston['target'] = load_boston().target
    # print(boston.shape[0])  # 行
    # print(boston.shape[0])  # 列

    corr_matrix = boston.corr()  # 相关性(默认方法为皮尔逊)
    foo = boston.sort_values(by=['CRIM', 'TAX', 'PTRATIO'], ascending=False)


if __name__ == '__main__':
    pass

秒客网

量化交易之python篇 - 统计学习导论python版（第二章习题）

相关文章

​量化交易之python篇 - 统计学习导论python版（第二章习题）

相关文章

量化交易之python篇 - 统计学习导论python版（第二章习题）