numpy 数组与类型

时间:2021-05-12 21:27:44

numpy 的基本类型:

     

k = np.arange(5)
print k # 生成 [0 1 2 3 4]
print k.dtype # int32
print k.shape # (5,) 元组表示一维 5 个
m = np.array([np.arange(4),np.arange(4),np.arange(4)])
print m # [[0 1 2 3] [0 1 2 3][0 1 2 3]]
print m.shape # (3, 4)
# 生成 arange 时可以指定类型
k = np.arange(10,dtype=np.uint16)
print k # array([0, 1, 2, 3, 4, 5, 6], dtype=uint16)
h = np.arange(7,dtype='D')
print h # [ 0.+0.j 1.+0.j 2.+0.j 3.+0.j 4.+0.j 5.+0.j 6.+0.j]
# 下面是支持类型
# bool
# Boolean (True or False) stored as a bit
# inti
# Platform integer (normally either int32 or int64)
# int8
# Byte (-128 to 127)
# int16
# Integer (-32768 to 32767)
# int32
# Integer (-2 ** 31 to 2 ** 31 -1)
# int64
# Integer (-2 ** 63 to 2 ** 63 -1)
# uint8
# Unsigned integer (0 to 255)
# uint16
# Unsigned integer (0 to 65535)
# uint32
# Unsigned integer (0 to 2 ** 32 - 1)
# uint64
# Unsigned integer (0 to 2 ** 64 - 1)
# float16
# Half precision float: sign bit, 5 bits exponent, 10 bits mantissa
# float32
# Single precision float: sign bit, 8 bits exponent, 23 bits mantissa
# float64 or float
# Double precision float: sign bit, 11 bits exponent, 52 bits mantissa
# complex64
# Complex number, represented by two 32-bit floats (real and imaginary components)
# complex128 or complex
# Complex number, represented by two 64-bit floats (real and imaginary components)
# 可以使用缩写方式
# integer ---> i
# Unsigned integer ---> u
# Single precision float ---> f
# Double precision float ---> d
# bool ---> b
# complex ---> D
# string ---> S
对多维数组的处理:

numpy 数组与类型

关于转置与修改维数:

numpy 数组与类型

其中关键的多维转一维 ravel() 与 flatten() 区别是 flatten() 会新生成列

矩阵分块,合并

numpy 数组与类型

numpy 数组与类型

数组的切割:

numpy 数组与类型

数组的属性:

numpy 数组与类型

从文件中加载数据,及常用的算术:

import numpy as np
k = np.eye(3) # [[ 1. 0. 0.] [ 0. 1. 0.][ 0. 0. 1.]] 单位矩阵
np.savetxt("hell.txt",k)
# hell.txt 的内容
# 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
# 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00
# 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00
# 返回 第 3,4,7 列,分别给 c,v,k
#c,v,k = np.loadtxt("data.csv",delimiter=',',usecols=(3,4,7),unpack=True)
# data.csv 的内容类似的格式
# AAPL,28-01-2011, ,344.17,344.4,333.53,336.1,21144800
# AAPL2,28-01-20112, ,344.172,344.42,333.532,336.12,211448002
# comma-separated values 逗号分隔的数值
c,v = np.loadtxt("data.csv",delimiter=',',usecols=(6,7),unpack=True)
# 成交量加权平均价(volume-weighted average price)vwap
# 类似于 每股价格 卖出股数
# 2 3
# 3 10
# 4 7
# vwap = (2*3 + 3*10 + 4*7)/(3+10+7) 其中的卖出股数就是 weights
vwamp = np.average(c,weights=v)
print vwamp
# 大概的计算方式是:
sum = 0.0
sum2 = 0.0
for i in range(len(c)):
sum += c[i]*v[i]
sum2 += v[i]
print sum/sum2
# 直接计算数组的平均值
print np.mean(c) == np.average(c,weights=None)
print np.max(c) # 最大值
print np.min(c) # 最小值
print np.ptp(c) # 最大值-最小值
print np.median(c) # 排序的中间值
# 试验一下
test_median = 0.0
sorted_array = np.msort(c)
if len(c)%2 == 0:
test_median = (sorted_array[len(c)/2] + sorted_array[((len(c) - 1)/2)])/2
else:
test_median = sorted_array[len(c)/2]
print np.var(c) # 方差
#验证
print np.mean((c - np.mean(c))**2) # 方差
print np.std(c) # 标准差(Standard Deviation) 准差=方差的算术平方根
print np.diff(c) # 数组后一个减前一个
print np.log([1,np.e,np.e**2]) # 以 e 为底求 log
print np.where(np.diff(c) > 0) # 返回元素大于 0 的位置
print np.sqrt(225) # 开平方

加载文件并处理时间格式:

import numpy as np
import datetime
def datestr2num(s):
#这个时间处理函数是 python 自带,返回星期几, 0 - 6,28-01-2011 这个字符串转换成时间对象
return datetime.datetime.strptime(s,"%d-%m-%Y").date().weekday()
# 指明字符转换的函数 converters
dates,close = np.loadtxt("data.csv",delimiter=',',usecols=(1,6),converters={1:datestr2num},unpack=True)
averages = np.zeros(5)
# dates [ 4. 0. 1. 2. 3. 4. 0. 1. 2. 3. 4. 0. 1. 2. 3. 4. 1. 2. 3. 4. 0. 1. 2. 3. 4. 0. 1. 2. 3. 4.]
for i in range(5):
indices = np.where(dates == i) # 使用 where 函数可以返回指定值的位置,它会自动遍历
prices = np.take(close,indices) # take(数组,位置) 函数是返回指定位置的值,组成的数组
avg = np.mean(prices) # 这样就可以生成平均值
print "Day",i,"prices",prices,"Average",avg
averages[i] = avg
top = np.max(averages)
low = np.min(averages)
print top
print low
print np.argmax(averages) # 返回最大值的位置
print np.argmin(averages) # 反回最小值的位置
比较完整的加载文件、处理文件、保存文件

import numpy as np
from datetime import datetime
def datestr2num(s):
# 返回时间 0-6 表示 monday-friday
return datetime.strptime(s,"%d-%m-%Y").date().weekday()
# data.csv 其中一行格式:AAPL,28-01-2011, ,344.17,344.4,333.53,336.1,21144800,unpack=False 时返回一个二维数组
dates,open,high,low,close = np.loadtxt('data.csv',delimiter=',',usecols=(1,3,4,5,6),converters={1:datestr2num},unpack=True)
close = close[:16]
dates = dates[:16]
# ravel() 将多维转换成一维
first_monday=np.ravel(np.where(dates == 0))[0] # 返回的是位置
last_friday = np.ravel(np.where(dates==4))[-1] # 返回的是位置
weeks_indices = np.arange(first_monday,last_friday+1)
# 一维变成二维
weeks_indices = np.split(weeks_indices,3)
def summarize(a,o,h,l,c):
print a
print o == open
print h == high
print l == low
print c == close
monday_open = o[a[0]]
week_high = np.max(np.take(h,a))
week_low = np.min(np.take(l,a))
friday_close = c[a[-1]]
return ("APPL",monday_open,week_high,week_low,friday_close)
# 详细查看官方文档
weeksummary = np.apply_along_axis(summarize,1,weeks_indices,open,high,low,close)
np.savetxt("weeksummay.csv",weeksummary,delimiter=",",fmt="%s")

参考《NumPy Beginner's Guide, 2nd Edition》