pandas中的series数据类型详解

本文介绍了pandas中的series数据类型详解，分享给大家，具体如下：
				?

									import pandas as pd

									import numpy as np

									import names

									'''

									写在前面的话：

									  1、series与array类型的不同之处为series有索引，而另一个没有;series中的数据必须是一维的，而array类型不一定

									  2、可以把series看成一个定长的有序字典，可以通过shape,index,values等得到series的属性

									'''

									# 1、series的创建

									'''

									（1）由列表或numpy数组创建

									    默认索引为0到N-1的整数型索引，如s1;

									    可以通过设置index参数指定索引，如s2；

									    通过这种方式创建的series，不是array的副本，即对series操作的同时也改变了原先的array数组，如s3

									（2）由字典创建

									    字典的键名为索引，键值为值，如s4；

									'''

									n1 = np.array([1, 4, 5, 67, 7, 43, ])

									s1 = pd.Series(n1)

									# print(s1)

									'''

									  1

									  4

									  5

									  67

									  7

									  43

									dtype: int32

									'''

									s2 = pd.Series(n1, index=['a', 'b', 'c', 'd', 'e', 'f'])

									# print(s2)

									'''

									a   1

									b   4

									c   5

									d  67

									e   7

									f  43

									dtype: int32

									'''

									# print(n1)

									'''

									[ 1 4 5 67 7 43]

									'''

									s1[2] = 100

									s3 = s1

									# print(s3)

									'''

									   1

									   4

									  100

									  67

									   7

									  43

									dtype: int32

									'''

									# print(n1)

									'''

									[ 1  4 100 67  7 43]

									'''

									dict1 = {}

									for i in range(10, 15):

									  # names.get_last_name()，随机生成英文名字

									  dict1[names.get_last_name()] = i

									s4 = pd.Series(dict1)

									# print(s4)

									'''

									Poole   10

									Allen   11

									Davis   12

									Roland  13

									Brehm   14

									dtype: int64

									'''

									# 2、series的索引

									'''

									（1）通过index取值，可以通过下标获取，也可以通过指定索引获取，如s6，s7

									（2）通过.loc[]（显示索引）获取，这种方式只能获取显示出来的索引，无法通过下标获取，如s7（推荐）

									（3）隐式索引，使用整数作为索引值，使用.icol[]，如s9（推荐）

									'''

									s5 = pd.Series(np.array([1, 5, 9, 7, 6, 4, 52, 8]), index=[list('abcdefgh')])

									# print(s5)

									'''

									a   1

									b   5

									c   9

									d   7

									e   6

									f   4

									g  52

									h   8

									dtype: int32

									'''

									s6 = s5[2]

									# print(s6)

									'''

									'''

									s7 = s5['c']

									# print(s7)

									'''

									c  9

									dtype: int32

									'''

									s8 = s5.loc['c']

									# print(s8)

									'''

									c  9

									dtype: int32

									'''

									s9 = s5.iloc[2]

									# print(s9)

									'''

									'''

									# 3、series的切片

									'''

									  1、series的切片和列表的用法类似，不同之处在于建议使用.loc[:]和.iloc[:]，如s10和s11。当然直接使用[:]也可以。

									  2、当遇到特别长的series，我们支取出前5条或后5条数据时可以直接使用.head()或.tail()

									'''

									s5 = pd.Series(np.array([1, 5, 9, 7, 6, 4, 52, 8]), index=[list('abcdefgh')])

									# print(s5)

									'''

									a   1

									b   5

									c   9

									d   7

									e   6

									f   4

									g  52

									h   8

									dtype: int32

									'''

									s10 = s5.loc['b':'g']

									# print(s10)

									'''

									b   5

									c   9

									d   7

									e   6

									f   4

									g  52

									dtype: int32

									'''

									s11 = s5.iloc[1:7]

									# print(s11)

									'''

									b   5

									c   9

									d   7

									e   6

									f   4

									g  52

									dtype: int32

									'''

									# 4、关于NaN

									'''

									  （1）NaN是代表空值， 但不等于None。两者的数据类型不一样，None的类型为<class 'NoneType'>，而NaN的类型为<class 'float'>；

									  （2）可以使用pd.isnull(),pd.notnull()，或自带isnull(),notnull()函数检测缺失数据

									'''

									# print(type(None),type(np.nan))

									'''

									<class 'NoneType'> <class 'float'>

									'''

									s12 = pd.Series([1,2,None,np.nan],index=list('烽火雷电'))

									# print(s12)

									'''

									烽  1.0

									火  2.0

									雷  NaN

									电  NaN

									dtype: float64

									'''

									# print(pd.isnull(s12))

									'''

									烽  False

									火  False

									雷   True

									电   True

									dtype: bool

									'''

									# print(pd.notnull(s12))

									'''

									烽   True

									火   True

									雷  False

									电  False

									dtype: bool

									'''

									# print(s12.notnull())

									'''

									烽   True

									火   True

									雷  False

									电  False

									dtype: bool

									'''

									# print(s12.isnull())

									'''

									烽  False

									火  False

									雷   True

									电   True

									dtype: bool

									'''

									# 取出series中不为空的值

									# print(s12[s12.notnull()])

									'''

									烽  1.0

									火  2.0

									dtype: float64

									'''

									# series的name属性

									'''

									'''

									s12.name = '风水'

									# print(s12)

									'''

									烽  1.0

									火  2.0

									雷  NaN

									电  NaN

									Name: 风水, dtype: float64

									'''
以上就是本文的全部内容，希望对大家的学习有所帮助，也希望大家多多支持服务器之家。
原文链接：https://www.cnblogs.com/xshan/p/10289588.html
秒客网

pandas中的series数据类型详解

相关文章