python基础(Numpy,Pandas,Matplotlib,

时间:2021-01-03 21:26:49

一.Numpy/Scipy

 

 1 #coding=utf-8
 2 import numpy
 3 import scipy
 4 
 5 x = numpy.ones((3, 4))
 6 print x
 7 """
 8 [[ 1.  1.  1.  1.]
 9  [ 1.  1.  1.  1.]
10  [ 1.  1.  1.  1.]]
11 """
12 
13 y = numpy.array([[1, 2], [3, 4]])
14 print y
15 """
16 [[1 2]
17  [3 4]]
18  """
19 
20 print numpy.linalg.det(y) #-2.0
21 
22 print numpy.arange(1, 5, 0.5) #[ 1.   1.5  2.   2.5  3.   3.5  4.   4.5]
23 
24 a = numpy.array([[5, 5, 5], [5, 5, 5]])
25 b = numpy.array([[2, 2, 2], [2, 2, 2]])
26 print a * b
27 """
28 [[10 10 10]
29  [10 10 10]]
30 """
31 
32 print a.sum() #30
33 print a.sum(axis=0) #[10 10 10]
34 print a.sum(axis=1) #[15 15]
35 
36 a = numpy.array([1, 3, 5])
37 b = numpy.array([2, 4, 6])
38 c = numpy.array([7, 8, 9])
39 print numpy.where(a > 2, b, c) #[7 4 6] Numpy.where函数是三元表达式x if condition else y的矢量化版本a > 2 [False, True, True]

 

 

 1 #coding=utf-8
 2 import numpy
 3 import scipy
 4 
 5 def fun(x, y):
 6     return (x + 1) * (y + 1)
 7 
 8 a = numpy.fromfunction(fun, (9, 9))
 9 print a
10 """
11 [[  1.   2.   3.   4.   5.   6.   7.   8.   9.]
12  [  2.   4.   6.   8.  10.  12.  14.  16.  18.]
13  [  3.   6.   9.  12.  15.  18.  21.  24.  27.]
14  [  4.   8.  12.  16.  20.  24.  28.  32.  36.]
15  [  5.  10.  15.  20.  25.  30.  35.  40.  45.]
16  [  6.  12.  18.  24.  30.  36.  42.  48.  54.]
17  [  7.  14.  21.  28.  35.  42.  49.  56.  63.]
18  [  8.  16.  24.  32.  40.  48.  56.  64.  72.]
19  [  9.  18.  27.  36.  45.  54.  63.  72.  81.]]
20  """
21 
22 a = numpy.array([[1, 2, 3]])
23 b = numpy.array([[3, 4, 5]])
24 print numpy.add(a, b) #[[4 6 8]]
25 print numpy.multiply(a, b) #[[ 3  8 15]]

 

 

 1 #coding=utf-8
 2 from scipy.cluster.vq import *
 3 import numpy as np
 4 import matplotlib.pyplot as plt
 5 
 6 class1 = 1.5 * np.random.randn(100, 2) #标准正态分布randn
 7 class2 = np.random.randn(100, 2) + np.array([8, 8])
 8 
 9 features=np.vstack((class1,class2))
10 centroids,variance=kmeans(features,2)
11 code,distance=vq(features,centroids)
12 
13 plt.figure()
14 ndx=np.where(code==1)[0]
15 plt.plot(features[ndx,0],features[ndx,1],'*')
16 ndx=np.where(code==0)[0]
17 plt.plot(features[ndx,0],features[ndx,1],'r.')
18 plt.plot(centroids[:,0],centroids[:,1],'go')
19 plt.axis('off')
20 plt.show()

 

python基础(Numpy,Pandas,Matplotlib,

 

二.Pandas

 1 #coding=utf-8
 2 from pandas import Series
 3 import pandas as pd
 4 a = Series([3, 5, 7], index=['a', 'b', 'c'])
 5 print a['a'] #3
 6 
 7 data = {'a':1, 'b':2, 'c':3}
 8 sindex = ['a', 'b', 'd']
 9 Ser = Series(data, index=sindex)
10 print Ser
11 """
12 a     1
13 b     2
14 d   NaN
15 dtype: float64
16 """
17 print Series.isnull(Ser)
18 """
19 a    False
20 b    False
21 d     True
22 dtype: bool
23 """
24 print a
25 """
26 a    3
27 b    5
28 c    7
29 dtype: int64
30 """
31 
32 b = {'a':2, 'b':3, 'd':5}
33 print Series(a) + Series(b)
34 """
35 a     5
36 b     8
37 c   NaN
38 d   NaN
39 dtype: float64
40 """
41 data = {'Name':['a', 'b', 'c'], 'Num':[1, 2, 3]}
42 a = pd.DataFrame(data)
43 """
44   Name  Num
45 0    a    1
46 1    b    2
47 2    c    3
48 """
49 print a['Name']
50 print a.Name
51 """
52 0    a
53 1    b
54 2    c
55 Name: Name, dtype: object
56 """
57 print a[0:2]
58 print a[a.index < 2]
59 """
60   Name  Num
61 0    a    1
62 1    b    2
63 """
64 print a.ix[1]
65 """
66 Name    b
67 Num     2
68 """
69 del a['Name']
70 print a
71 """
72    Num
73 0    1
74 1    2
75 2    3
76 """

 

三.Matplotlib

 

  1 #coding=utf-8
  2 import pandas as pd
  3 from matplotlib.finance import quotes_historical_yahoo
  4 from datetime import date
  5 today = date.today()
  6 start = (today.year - 1, today.month, today.day)
  7 quote = quotes_historical_yahoo('AXP', start, today)
  8 fields = ['date', 'open', 'close', 'high', 'low', 'volume']
  9 df = pd.DataFrame(quote, index=range(1, len(quote) + 1), columns=fields)
 10 print df.head(10) #df.tail(10)
 11 """
 12       date       open      close       high        low   volume
 13 1   735663  79.412407  79.097240  79.924559  78.851015  6530200
 14 2   735666  78.939660  79.294224  79.609392  78.742676  5846100
 15 3   735667  78.526003  77.915364  78.565393  77.678990  7525000
 16 4   735668  78.200986  78.250226  78.545699  77.915364  4546200
 17 5   735669  78.890411  80.328364  80.761722  78.821469  9386600
 18 6   735670  80.269272  79.382861  80.417009  78.742676  6919400
 19 7   735673  79.658632  80.269272  80.417009  79.530598  5295100
 20 8   735674  79.973799  79.835914  79.983650  79.333620  4258200
 21 9   735675  79.363166  80.623836  81.076889  79.057843  6449400
 22 10  735676  80.604134  80.308669  80.722325  79.717731  4677000
 23 """
 24 list1 = []
 25 for i in range(0, len(quote)):
 26     x = date.fromordinal(int(quote[i][0]))
 27     y = date.strftime(x, '%y-%m-%d')
 28     list1.append(y)
 29 df = pd.DataFrame(quote, index=list1, columns=fields)
 30 df = df.drop(['date'], axis=1)
 31 print df
 32 """
 33                open      close       high        low    volume
 34 15-03-06  79.412407  79.097240  79.924559  78.851015   6530200
 35 15-03-09  78.939660  79.294224  79.609392  78.742676   5846100
 36 15-03-10  78.526003  77.915364  78.565393  77.678990   7525000
 37 15-03-11  78.200986  78.250226  78.545699  77.915364   4546200
 38 15-03-12  78.890411  80.328364  80.761722  78.821469   9386600
 39 15-03-13  80.269272  79.382861  80.417009  78.742676   6919400
 40 15-03-16  79.658632  80.269272  80.417009  79.530598   5295100
 41 15-03-17  79.973799  79.835914  79.983650  79.333620   4258200
 42 15-04-01  77.128302  77.997907  78.383301  76.930659   6163500
 43 15-04-02  77.997907  78.758811  78.827984  77.642158   5695200
 44 ........
 45 16-03-04  58.439999  58.290001  58.650002  57.810001   5407400
 46 
 47 [252 rows x 5 columns]
 48 """
 49 print df.loc[:, ['open', 'close']]
 50 """
 51                open      close
 52 15-03-09  78.939660  79.294224
 53 15-03-24  80.801118  80.141238
 54 15-04-16  78.492002  79.954528
 55 15-04-17  77.474167  76.406919
 56 15-04-20  76.476093  76.317986
 57 ...             ...        ...
 58 16-03-03  57.160000  58.090000
 59 16-03-04  58.439999  58.290001
 60 
 61 [251 rows x 2 columns]
 62 """
 63 print df.loc['15-03-09':'15-03-20', ['open']]
 64 """
 65                open
 66 15-03-09  78.939660
 67 15-03-10  78.526003
 68 15-03-11  78.200986
 69 15-03-12  78.890411
 70 15-03-13  80.269272
 71 15-03-16  79.658632
 72 15-03-17  79.973799
 73 15-03-18  79.363166
 74 15-03-19  80.604134
 75 15-03-20  80.554894
 76 """
 77 print df.iloc[1:10, 1]
 78 """
 79 15-03-10    77.915364
 80 15-03-11    78.250226
 81 15-03-12    80.328364
 82 15-03-13    79.382861
 83 15-03-16    80.269272
 84 15-03-17    79.835914
 85 15-03-18    80.623836
 86 15-03-19    80.308669
 87 15-03-20    81.451148
 88 """
 89 print df.at['15-03-09', 'open']#78.9396603442
 90 print df.iat[0, 0] #78.9396603442
 91 print len(df[df.close > df.open]) #120
 92 print df.sort(columns='open')
 93 """
 94                open      close       high        low    volume
 95 16-02-11  51.220001  51.110001  51.590000  50.270000   9142900
 96 16-02-12  51.880001  52.660000  52.730000  51.639999   6083400
 97 16-02-09  52.259998  52.630001  53.020000  51.910000   8455800
 98 16-03-04  58.439999  58.290001  58.650002  57.810001   5407400
 99 ...             ...        ...        ...        ...       ...
100 15-03-24  80.801118  80.141238  80.919309  80.042742   5217800
101 15-03-23  81.451148  80.958698  82.278467  80.958698   7291700
102 """
103 print df.sort_index()
104 """
105                open      close       high        low    volume
106 15-03-09  78.939660  79.294224  79.609392  78.742676   5846100
107 15-03-27  77.216088  76.792580  77.452460  76.536504   9022600
108 15-03-30  77.166838  76.871373  77.560799  76.822125   7285200
109 15-03-31  76.970187  77.197475  77.602629  76.703380   5918300
110 ...             ...        ...        ...        ...       ...
111 16-03-02  56.880001  57.119999  57.230000  56.570000   7264700
112 16-03-03  57.160000  58.090000  58.180000  57.160000   6841300
113 16-03-04  58.439999  58.290001  58.650002  57.810001   5407400
114 """
115 
116 import numpy
117 status = numpy.sign(numpy.diff(df.close))# 返回相邻数组元素的差值构成的数组[ 1.,  4.,  7.]->[ 3.,  3.]
118 print status
119 """
120 [-1.  1.  1. -1.  1. -1.  1. -1.  1. -1. -1. -1. -1. -1.  1.  1.  1.  1.
121  -1. -1.  1.  1.  1. -1.  1.  1.  1. -1. -1.  1.  1. -1. -1. -1.  0. -1.
122   1.  1.  1. -1.  1.  1.  1.  1. -1.  1.  1. -1. -1.  1. -1. -1.  1. -1.
123  -1. -1. -1. -1.  1.  1. -1.  1. -1.  1.  1.  1. -1. -1.  1.  1.  1. -1.
124   1.  1. -1. -1.  0. -1.  1.  1. -1. -1. -1. -1.  1.  1.  1.  1. -1.  1.
125   1.  1. -1.  1. -1. -1. -1.  1.  1.  1. -1. -1. -1. -1. -1.  1.  1.  1.
126  -1. -1.  1.  1.  1. -1. -1. -1. -1. -1.  1.  1. -1.  1. -1.  1.  1. -1.
127   1. -1.  1.  1. -1.  1.  1. -1. -1.  1. -1. -1. -1.  1. -1. -1.  1. -1.
128   1.  1. -1.  1.  0.  1. -1. -1. -1.  1.  1. -1. -1. -1. -1.  1. -1. -1.
129   1. -1. -1.  1.  1.  1. -1.  1. -1. -1. -1. -1. -1.  1. -1.  1.  1. -1.
130  -1. -1.  1.  1. -1.  1. -1. -1.  1. -1. -1. -1.  1. -1. -1.  1.  1. -1.
131  -1.  1.  1.  1.  1. -1.  1. -1. -1. -1. -1. -1. -1. -1.  1.  1. -1.  1.
132  -1. -1.  1. -1. -1. -1.  1. -1. -1.  1.  1. -1.  1.  1. -1. -1.  1. -1.
133  -1.  1.  1.  1.  1.  1.  1. -1. -1.  1. -1.  1.  1.  1.  1.  1.]
134 """
135 month = []
136 for m in df.index:
137     month.append(m.split('-')[1])
138 df['month'] = month
139 print df.groupby('month').count()
140 """
141 month   open  close  high  low  volume                              
142 01       19     19    19   19      19
143 02       20     20    20   20      20
144 03       21     21    21   21      21
145 04       21     21    21   21      21
146 05       20     20    20   20      20
147 06       22     22    22   22      22
148 07       22     22    22   22      22
149 08       21     21    21   21      21
150 09       21     21    21   21      21
151 10       22     22    22   22      22
152 11       20     20    20   20      20
153 12       22     22    22   22      22
154 """
155 print df.groupby('month').sum().volume
156 """
157 month
158 01       236344300
159 02       158919800
160 03       152726400
161 04       133853700
162 05       103420200
163 06       141794200
164 07       117895600
165 08       155122400
166 09       110385200
167 10       122095600
168 11       106839600
169 12       124219300
170 """
171 print df.groupby('month').mean()
172 """
173 month     open      close       high        low           volume                                                            
174 01     61.257880  60.911867  61.901466  60.234361  12439173.684211
175 02     53.843500  53.965001  54.380000  53.313500   7945990.000000
176 03     75.035891  75.053161  75.592068  74.455664   7272685.714286
177 04     77.464754  77.617688  78.067080  77.056302   6373985.714286
178 05     78.718298  78.723239  79.189665  78.307704   5171010.000000
179 06     78.962402  78.858566  79.455639  78.498764   6445190.909091
180 07     76.915968  76.806860  77.310469  76.371782   5358890.909091
181 08     77.227940  77.476384  78.140478  76.495359   7386780.952381
182 09     74.696736  74.632971  75.198348  74.107744   5256438.095238
183 10     75.153440  75.341586  75.773063  74.656811   5549800.000000
184 11     72.393654  72.297575  72.752583  71.910767   5341980.000000
185 12     69.739317  69.640658  70.224466  69.159582   5646331.818182
186 """
187 print df.groupby('month').min()
188 print df.groupby('month').max()
189 
190 data = {'Name':['a', 'b', 'c'], 'month':['01', '02', '03']}
191 a = pd.DataFrame(data)
192 print pd.concat([df, a], ignore_index=True)
193 """
194      Name      close       high       low  month      open    volume
195 0    NaN  79.294224  79.609392  78.742676    03  78.939660   5846100
196 1    NaN  77.915364  78.565393  77.678990    03  78.526003   7525000
197 2    NaN  78.250226  78.545699  77.915364    03  78.200986   4546200
198 3    NaN  80.328364  80.761722  78.821469    03  78.890411   9386600
199 ..   ...        ...        ...        ...   ...        ...       ...
200 249  NaN  58.090000  58.180000  57.160000    03  57.160000   6841300
201 250  NaN  58.290001  58.650002  57.810001    03  58.439999   5407400
202 251    a        NaN        NaN        NaN    01        NaN       NaN
203 252    b        NaN        NaN        NaN    02        NaN       NaN
204 253    c        NaN        NaN        NaN    03        NaN       NaN
205 """
206 print pd.merge(df, a, on='month')
207 """
208          open      close       high        low    volume month Name
209 0   78.939660  79.294224  79.609392  78.742676   5846100    03    c
210 1   78.526003  77.915364  78.565393  77.678990   7525000    03    c
211 2   78.200986  78.250226  78.545699  77.915364   4546200    03    c
212 3   78.890411  80.328364  80.761722  78.821469   9386600    03    c
213 4   80.269272  79.382861  80.417009  78.742676   6919400    03    c
214 5   79.658632  80.269272  80.417009  79.530598   5295100    03    c
215 6   79.973799  79.835914  79.983650  79.333620   4258200    03    c
216 7   79.363166  80.623836  81.076889  79.057843   6449400    03    c
217 8   80.604134  80.308669  80.722325  79.717731   4677000    03    c
218 9   80.554894  81.451148  81.805713  80.269272   9338100    03    c
219 10  81.451148  80.958698  82.278467  80.958698   7291700    03    c
220 11  80.801118  80.141238  80.919309  80.042742   5217800    03    c
221 12  80.190478  78.900263  80.239726  78.821469   8908000    03    c
222 13  78.466905  77.294880  78.575245  76.713786  16181300    03    c
223 14  77.216088  76.792580  77.452460  76.536504   9022600    03    c
224 15  77.166838  76.871373  77.560799  76.822125   7285200    03    c
225 16  76.970187  77.197475  77.602629  76.703380   5918300    03    c
226 17  56.029999  56.799999  56.840000  55.619999   9147000    03    c
227 18  56.880001  57.119999  57.230000  56.570000   7264700    03    c
228 19  57.160000  58.090000  58.180000  57.160000   6841300    03    c
229 20  58.439999  58.290001  58.650002  57.810001   5407400    03    c
230 21  67.793285  67.295464  67.882897  66.479038   9248300    01    a
231 22  67.076429  66.260002  67.414943  65.383832  10809200    01    a
232 23  65.239998  64.419998  65.550003  64.239998   9752200    01    a
233 24  63.310001  63.840000  64.250000  63.080002  11323900    01    a
234 25  64.180000  63.630001  64.410004  63.570000  10003600    01    a
235 26  63.740002  64.050003  64.209999  63.099998   8157100    01    a
236 27  64.800003  64.400002  64.900002  63.599998   7560000    01    a
237 28  63.650002  62.849998  64.370003  62.230000  11291200    01    a
238 29  62.849998  63.290001  63.750000  62.369999   6664000    01    a
239 30  62.029999  62.910000  63.049999  61.500000   8643800    01    a
240 31  63.410000  62.639999  63.790001  62.240002   7336000    01    a
241 32  61.540001  63.029999  63.540001  61.290001   9026000    01    a
242 33  63.000000  62.639999  64.320000  62.509998   8832500    01    a
243 34  58.389999  55.060001  58.889999  54.139999  43731600    01    a
244 35  54.459999  55.020000  55.770000  54.139999  18498300    01    a
245 36  55.200001  55.090000  55.740002  54.959999  12834600    01    a
246 37  55.369999  54.520000  55.820000  54.419998  10852000    01    a
247 38  54.680000  52.880001  54.759998  52.150002  17859200    01    a
248 39  53.180000  53.500000  53.709999  53.049999  13920800    01    a
249 40  53.410000  54.700001  54.990002  53.000000   9860300    02    b
250 41  54.330002  53.660000  54.389999  53.490002  11664200    02    b
251 42  54.000000  54.110001  54.290001  52.830002   9728400    02    b
252 43  54.080002  54.380001  54.459999  53.810001   6870600    02    b
253 44  54.720001  53.980000  54.849998  53.810001   9091000    02    b
254 45  53.240002  52.400002  53.450001  52.230000  11815800    02    b
255 46  52.259998  52.630001  53.020000  51.910000   8455800    02    b
256 47  53.000000  52.290001  53.430000  52.279999   7040100    02    b
257 48  51.220001  51.110001  51.590000  50.270000   9142900    02    b
258 49  51.880001  52.660000  52.730000  51.639999   6083400    02    b
259 50  53.009998  53.180000  53.480000  52.730000   6945000    02    b
260 51  53.500000  53.610001  54.000000  53.299999   8610900    02    b
261 52  53.500000  54.150002  54.349998  53.450001   8502900    02    b
262 53  54.250000  54.709999  55.029999  54.029999   6549100    02    b
263 54  54.709999  55.630001  55.630001  54.709999   6535700    02    b
264 55  55.520000  55.110001  55.599998  54.959999   5866700    02    b
265 56  54.450001  54.639999  54.869999  53.560001   5585000    02    b
266 57  54.779999  55.389999  55.389999  54.299999   4379200    02    b
267 58  55.709999  55.380001  55.900002  55.150002   5885500    02    b
268 59  55.299999  55.580002  56.150002  54.810001  10307300    02    b
269 """
 1 import matplotlib.pyplot as plt
 2 closeMeansOK = df.groupby('month').mean().close
 3 listOKIndex = closeMeansOK.index
 4 listOK = []
 5 for i in range(0, 12):
 6     listOK.append(closeMeansOK[i])
 7 plt.figure(figsize=(8, 6), dpi=100) #大小 精度
 8 p1 = plt.subplot(221)
 9 p2 = plt.subplot(222)
10 p3 = plt.subplot(223)
11 p4 = plt.subplot(224)
12 p1.plot(listOKIndex, listOK, 'rD') #r:红,D:宝石
13 
14 t = numpy.arange(0, 4, 0.1)
15 p2.plot(t, t, t, t + 2, t, t ** 2, 'g--') #g:绿色,--:虚线
16 
17 p3.plot(listOKIndex, listOK, 'o') #散点图
18 p3.set_title("123")
19 p3.set_xlabel("x")
20 p3.set_ylabel('y')
21 
22 data = numpy.random.randint(1, 11, 5)
23 x = numpy.arange(len(data))
24 p4.plot(x, data, color = 'r')
25 p4.bar(x, data, alpha = .5, color = 'g')
26 
27 plt.show()

python基础(Numpy,Pandas,Matplotlib,