001_python实现数据分析

一、

# coding:utf8

# !/usr/bin/python

# import numpy as np

import pandas as pd

import np

def example2():

    '''

    Describing a numeric ``Series``.

    :return:

    '''

    s = pd.Series([1, 2, 3])

    print s.describe()

    '''

    count    3.0

    mean     2.0

    std      1.0

    min      1.0

    25%      1.5

    50%      2.0

    75%      2.5

    max      3.0

    dtype: float64

    '''

def example3():

    '''

    Describing a categorical ``Series``.

    :return:

    '''

    s = pd.Series(['a', 'a', 'b', 'c'])

    print s.describe()

    '''

    count     4

    unique    3

    top       a

    freq      2

    dtype: object

    '''

def example4():

    '''

    Describing a timestamp ``Series``.

    :return:

    '''

    s = pd.Series([

        np.datetime64("2000-01-01"),

        np.datetime64("2010-01-01"),

        np.datetime64("2010-01-01")

        ])

    print s.describe()

    '''

    count                       3

    unique                      2

    top       2010-01-01 00:00:00

    freq                        2

    first     2000-01-01 00:00:00

    last      2010-01-01 00:00:00

    dtype: object

    '''

def example5():

    '''

    Describing a ``DataFrame``. By default only numeric fields are returned.

    :return:

    '''

    df = pd.DataFrame({'categorical': pd.Categorical(['d', 'e', 'f']),

                       'numeric': [1, 2, 3],

                        'object': ['a', 'b', 'c']})

    print df.describe()

    '''

    #Describing all columns of a ``DataFrame`` regardless of data type.

    print df.describe(include='all')

    #Describing a column from a ``DataFrame`` by accessing it as an attribute.

    print df.numeric.describe()

    #Including only numeric columns in a ``DataFrame`` description.

    print df.describe(include=[np.number])

    #Including only string columns in a ``DataFrame`` description.

    print df.describe(include=[np.object])

    #Including only categorical columns from a ``DataFrame`` description.

    print df.describe(include=['category'])

    #Excluding numeric columns from a ``DataFrame`` description.

    print df.describe(exclude=[np.number])

    #Excluding object columns from a ``DataFrame`` description.

    print df.describe(exclude=[np.object])

    '''

def example1():

    dic1={'000':{'a':1,'b':2,'c':3},'001':{'d':4,'e':5,'f':6}}

    df2=pd.DataFrame(dic1)

    # print df2.describe()

    '''

           000  001

    count  3.0  3.0

    mean   2.0  5.0

    std    1.0  1.0

    min    1.0  4.0

    25%    1.5  4.5

    50%    2.0  5.0

    75%    2.5  5.5

    max    3.0  6.0

    '''

    print "返回非NAN数据项数量=>count()\n{count}\n".format(count = df2.describe().count())

    print "返回中位数,等价第50位百分位数的值=>median()\n{median}\n".format(median = df2.describe().median())

    print "返回数据的众值=>mode()\n{mode}\n".format(mode = df2.describe().mode())

    print "返回数据的标准差(描述离散度)=>std()\n{std}\n".format(std = df2.describe().std())

    print "返回方差=>var()\n{var}\n".format(var = df2.describe().var())

    print "偏态系数(skewness,表示数据分布的对称程度)=>skew()\n{skew}\n".format(skew = df2.describe().skew())

def main():

    example1()

if __name__ == '__main__':

    main()

输出=>

返回非NAN数据项数量=>count()

000    8

001    8

dtype: int64

返回中位数,等价第50位百分位数的值=>median()

000    2.00

001    4.75

dtype: float64

返回数据的众值=>mode()

   000  001

0  1.0  5.0

1  2.0  NaN

2  3.0  NaN

返回数据的标准差(描述离散度)=>std()

000    0.801784

001    1.603567

dtype: float64

返回方差=>var()

000    0.642857

001    2.571429

dtype: float64

偏态系数(skewness,表示数据分布的对称程度)=>skew()

000    0.000000

001   -1.299187

dtype: float64

秒客网

001_python实现数据分析

相关文章