数据结构是计算机存储和组织数据的方式。Python中有三类四种内建的数据结构,分别是序列(List、Tuple)、映射(Dictionary)以及集合(Set)。
此外,数据分析库Numpy和Pandas还提供了ndarry、Series、DataFrame等数据类型,不同的数据类型在程序中会常常遇到相互转换的情况,以便参数类型之需。
import pandas as pd
import numpy as np
from pandas import Series,DataFrame
arr = np.arange(8)
s = Series(np.arange(8),index = list('abcdefgh'))
df = DataFrame(np.arange(40).reshape(8,5),columns = list('abcde') ,index = range(8))
lst = [['apple','3.5',2],['oringe','9.9','1'],['banana','4.9','1.5'],['grape','12','2']]
#lst1 = [('apple','3.5',2),('oringe','9.9','1'),('banana','4.9','1.5'),('grape','12','2')]
#list转ndarry、series、dataframe类型
list_to_arr = np.array(lst)
list_to_series = Series(lst,index=list('abcd'))
list_to_df = DataFrame(lst,columns = ['name','price','number'],index = range(len(lst)))
#ndarry转list、series、dataframe类型
arr_to_list = arr.tolist()
arr_to_series = Series(arr,index = range(len(arr)))
arr_to_df = DataFrame(arr,columns = ['a'],index = range(len(arr)))
#series转ndarry、list、dataframe类型
series_to_arr = Series.as_matrix(s) #等价于series_to_arr = series.as_matrix()
series_to_list = Series.as_matrix(s).tolist()
series_to_df1= pd.DataFrame([s,s])
series_to_df2 = s.to_frame()
series_to_df3 = pd.concat([s,s], axis=1) #axis=0 type为series
#df转ndarry、list、series类型
df_to_arr1 = DataFrame.as_matrix(df)
#df_to_arr1[:,1]
#df_to_arr1[1,:]
df_to_arr2 = df.values
df_to_arr3 = np.array(df)
df_to_list1 = np.array(df).tolist()
df_to_list2 = [i[0] for i in df.values]
df_to_series = df['a']