第一次爬虫————爬取地震数据

时间:2021-07-22 16:42:51
import urllib.request
import requests
import turtle
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
def readhtml(url):
    try:
        head={}
        data={}
        head['User-Agent']="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"
        req=urllib.request.Request(url,data,head)
        response=urllib.request.urlopen(req)
        html=response.read()
        html=html.decode('utf-8')
        return html
    except:
        return ""
def analyze(html,alist,datalinks):
    soup=BeautifulSoup(html,'html.parser')
    datalinks=soup.find_all('tr')
    for i in datalinks:
        x=i.find_all('td')
        if len(x)==0:
            continue
        blist=[]
        for y in x:
            blist.append(y.string)
        alist.append(blist)
def datacollect(alist,ilist):
        for i in range(30):
            x=alist[i]
            ilist.append(int(x[4]))
def filedeal(alist):
    lines=[]
    g=["震级大小","时间","经度","纬度","震源深度","地点"]
    s='\t'.join([g[0],g[1],g[2],g[3],g[4],g[5]])
    s=s+'\n'
    lines.append(s)
    for i in range(30):
        x=alist[i]
        s='\t'.join([x[0],x[1],x[2],x[3],x[4],x[5]])
        s=s+'\n'
        lines.append(s)
    file=open(r'C:\Users\Administrator\Desktop\earthquake.xls','w')
    file.writelines(lines)
    file.close()
def collecttime(alist,plist,olist):
    for i in range(30):
        x=alist[i]
        plist.append(str(x[1]))
    for y in plist:
         i=y.replace(' ','')
         g=i.replace('-','')
         v=g.replace(':','')
         w=int(v)
         w=w+1
         e=str(w)
         olist.append(e)   
def filetohtml(alist,olist):
    txt='<table border="1">'
    txt=txt+'\n'+'<h1 style="text-align:center">'+"最新地震信息"+'</h1>'
    txt=txt+'\n'+'<tr>'
    g=["震级大小","时间","经度","纬度","震源深度","地点"]
    for f in range(6):
            txt=txt+'\n'+"<td>"+str(g[f])+"</td>"
    txt=txt+'\n'+'</tr>'
    for i in range(30):
        txt=txt+'\n'+'<tr>'
        x=alist[i]
        for f in range(6):
            if f<5:
                txt=txt+'\n'+"<td>"+str(x[f])+"</td>"
            else:
                txt=txt+'\n'+"<td>"+"<a href="'http://news.ceic.ac.cn/CD'+olist[i]+'.html'">"+str(x[f])+"</a>"+"</td>"
        txt=txt+'\n'+'</tr>'
    txt=txt+'\n'+'</table>'
    txt=txt+'\n'+'<h2 style="text-align:center">'+"地震网站链接"+'</h2>'
    txt=txt+'\n'+"<a href="'http://www.ceic.ac.cn/'">"+"中国地震台网"+"</a>"
    txt=txt+'\n'+"<a href="'https://earthquake.usgs.gov/earthquakes/map/'">美国地质勘探局</a>"
    txt=txt+'\n'+"<a href="'https://www.emsc-csem.org/#2'">欧洲地中海地震观测中心</a>"
    fileh=open(r'C:\Users\Administrator\Desktop\earthquake.html','w')
    fileh.write(txt)
    fileh.close()
def draw(ilist):
    yValues = ilist
    t = turtle.Turtle()
    t.hideturtle()
    drawLine(t,0,0,300,0) 
    drawLine(t,0,0,0,175) 
    for i in range(6):
        drawLineWithDots(t,40 + (40 * i),15 * yValues[i],40 + (40 * (i+1)),15 * (yValues[i+1]),"blue")
    drawTickMarks(t,yValues)
    displayText(t,yValues)
def drawLine(t,x1,y1,x2,y2,colorP="black"):
    t.up()
    t.goto(x1,y1)
    t.down()
    t.pencolor(colorP)
    t.goto(x2,y2)
def drawLineWithDots(t,x1,y1,x2,y2,colorP="black"):
    t.pencolor(colorP)
    t.up()
    t.goto(x1,y1)
    t.dot(5)
    t.down()
    t.goto(x2,y2)
    t.dot(5)
def drawTickMarks(t,yValues):
    for i in range(1,8):
        drawLine(t,40*i,0,40*i,10) 
    drawLine(t,0,15 * max(yValues), 10, 15*max(yValues)) 
    drawLine(t,0,15 * min(yValues), 10, 15*min(yValues))
def displayText(t,yValues):
    t.pencolor("blue")
    t.up()
    t.goto(-10,(15*max(yValues))-8)
    t.write(max(yValues),align="center")
    t.goto(-10,(15*min(yValues))-8)
    t.write(min(yValues),align="center")
    x = 40
    for i in range(0,20,2):
        t.goto(x,-20)
        t.write(str(i),align="center")
        x += 40
    t.goto(0,-50)
    t.write("地震震源深度分析表",font=("Arial",16,"normal"))
def getxy(alist,xlist,ylist):
    for i in range(30):
        x=alist[i]
        xlist.append(int(float(x[2])))
        ylist.append(int(float(x[3])))
def drawaddress(xlist,ylist):
    plt.title("地震信号分析表") 
    plt.subplot()
    plt.xlim(xmax=1800,xmin=-1800)    
    plt.ylim(ymax=900,ymin=-900)      
    plt.xlabel("x")
    plt.ylabel("y")
    plt.plot(xlist,ylist,'k^') 
    plt.show()
def main():
    print("地震信息收集系统")
    alist=[]
    ilist=[]
    datalinks=[]
    xlist=[]
    ylist=[]
    olist=[]
    plist=[]
    url="http://www.ceic.ac.cn/"
    html=readhtml(url)
    analyze(html,alist,datalinks)
    collecttime(alist,plist,olist)
    filedeal(alist)
    filetohtml(alist,olist)
    datacollect(alist,ilist)
    getxy(alist,xlist,ylist)
    draw(ilist)
    drawaddress(xlist,ylist)
if __name__=='__main__':
    main()