[python]沪深龙虎榜数据进一步处理,计算日后5日的涨跌幅

时间:2023-03-08 20:49:25

沪深龙虎榜数据进一步处理,计算日后5日的涨跌幅

事前数据:

前面处理得到的csv文件

文件名前加入“[wait]”等待程序处理

python代码从雅虎股票历史数据api获取数据,计算后面5日的涨跌幅

雅虎数据api格式:

priceUrl = 'http://table.finance.yahoo.com/table.csv?s={%1}&d={%2}&e={%3}&f={%4}&g=d&a={%5}&b={%6}&c={%7}&ignore=.csv'
# %1:000001.sz
# END: %2:月-1 %3:日 %4:年
# STRAT: %5:月-1 %6:日 %7:年

事前数据截图:

[python]沪深龙虎榜数据进一步处理,计算日后5日的涨跌幅

计算后,再用excel的条件格式并另存为excel文件后截图:

[python]沪深龙虎榜数据进一步处理,计算日后5日的涨跌幅

代码:

 #coding=utf-8

 #读取'[wait]'开头的csv文件
#copyright @ WangXinsheng
#http://www.cnblogs.com/wangxinsheng/
import os
import gzip
import re
import http.cookiejar
import urllib.request
import urllib.parse
import time
import datetime def getOpener(head):
# deal with the Cookies
cj = http.cookiejar.CookieJar()
pro = urllib.request.HTTPCookieProcessor(cj)
opener = urllib.request.build_opener(pro)
header = []
for key, value in head.items():
elem = (key, value)
header.append(elem)
opener.addheaders = header
return opener def ungzip(data):
try: # 尝试解压
print('正在解压.....')
data = gzip.decompress(data)
print('解压完毕!')
except:
print('未经压缩, 无需解压')
return data #常量
header = {
#'Connection': 'Keep-Alive',
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.8',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36',
'Accept-Encoding': 'gzip, deflate',
'Host': 'yahoo.com',
'Referer' : 'http://www.yahoo.com'
}
priceUrl = 'http://table.finance.yahoo.com/table.csv?\
s={%1}&d={%2}&e={%3}\
&f={%4}&g=d&a={%5}&b={%6}&c={%7}&ignore=.csv'
# %1:000001.sz
# END: %2:月-1 %3:日-1 %4:年
# STRAT: %5:月-1 %6:日 %7:年 path=r'.'
files = os.listdir(path)
files.sort()
out=[] for f in files:
if(f.startswith('[wait]') and
f.endswith('.csv')):
#读取文件
print('读取文件:'+path+'/'+f) f=open(path+'/'+f,'rt')
infos = f.readlines()
f.close() i = 0
add = False
for info in infos:
if(i==0):
i=i+1
info = info.replace('\n','')+',"一天后","二天后","三天后","四天后","五天后"\n'
out.append(info)
continue
elif(len(info.split(','))>9):
out.append(info)
continue
else:
#确认需要取的数据范围
tmp = info.split(',')
try:
timeArray = time.strptime(tmp[0], "%Y-%m-%d")
except:
timeArray = time.strptime(tmp[0], "%Y/%m/%d")
timeStamp = int(time.mktime(timeArray))
fromDay = datetime.datetime.utcfromtimestamp(timeStamp)
fromDay = fromDay + datetime.timedelta(days = 1)
endDay = fromDay + datetime.timedelta(days = 15)
code = tmp[1].replace('"','').replace("'","")
if(code.startswith('')):
code = code +'.ss'
else:
code = code +'.sz'
url = priceUrl.replace('{%1}',code).replace('{%2}',str(endDay.month-1))
url = url.replace('{%3}',str(endDay.day)).replace('{%4}',str(endDay.year))
url = url.replace('{%5}',str(fromDay.month-1)).replace('{%6}',str(fromDay.day))
url = url.replace('{%7}',str(fromDay.year))
print('抓取URL: '+url) #通过雅虎获取价格
dd = ''
try:
opener = getOpener(header)
op = opener.open(url)
data = op.read()
data = ungzip(data)
dd = data.decode()
except:
print('网络抓取失败')
out.append(info)
continue
#计算涨跌幅百分比
if(dd!=''):
dataInfo = dd.split('\n')
j=0
dayCount = 0
startPrice = 0
for x in range(len(dataInfo)-1,0,-1):
#处理数据
if(dataInfo[x]==''):
continue
#print(dataInfo[x])
if(dayCount>5):
break
di = dataInfo[x]
if(dayCount==0):
startPrice = float(di.split(',')[4])
elif(int(di.split(',')[5])!=0):
add = True
closeP = float(di.split(',')[4])
info = info.replace('\n','')+',"'+str(round((closeP-startPrice)/startPrice*100,2))+'%['+str(closeP)+']"'
#print(info)
if(dayCount==0 or int(di.split(',')[5])!=0):
dayCount=dayCount+1 if(add):
out.append(info+'\n')
#print(out)
continue
#输出
ff = open(path+'/'+f.name.replace('[wait]','[处理完了]'),'w')
for o in out:
ff.write(o)
ff.close()
print('处理完了\n文件地址:'+path+'/'+f.name.replace('[wait]','[处理完了]'))
else:
continue