之前博主通过python来爬取电影信息,今天,博主分享的项目是疫情监控可视化的项目
编程语言:python
框架:flask
数据库:MySQL
开发环境: python3.7 pycharm
涉及知识:爬虫+ajax+echart+flask
首先给大家看一下最终效果:
我们来梳理一下项目的流程:
爬取各个网站平台的数据
将所得数据进行处理并插入数据库中
从数据库中读取数据并使用echart视图展示
爬取数据
# 爬取并处理腾讯疫情数据
import requests
import json
import time
# 返回历史数据和当日详细数据
def get_tencent_data():
url1 = "https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5"
url2 = "https://view.inews.qq.com/g2/getOnsInfo?name=disease_other"
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36'
}
r1 = requests.get(url1, headers)
r2 = requests.get(url2, headers)
# json字符串转字典
res1 = json.loads(r1.text)
res2 = json.loads(r2.text)
data_all1 = json.loads(res1["data"])
data_all2 = json.loads(res2["data"])
# 历史数据
history = {}
for i in data_all2["chinaDayList"]:
ds = "2020." + i["date"]
tup = time.strptime(ds, "%Y.%m.%d") # 匹配时间
ds = time.strftime("%Y-%m-%d", tup) # 改变时间输入格式,不然插入数据库会报错,数据库是datatime格式
confirm = i["confirm"]
suspect = i["suspect"]
heal = i["heal"]
dead = i["dead"]
history[ds] = {"confirm": confirm, "suspect": suspect, "heal": heal, "dead": dead}
for i in data_all2["chinaDayAddList"]:
ds = "2020." + i["date"]
tup = time.strptime(ds, "%Y.%m.%d") # 匹配时间
ds = time.strftime("%Y-%m-%d", tup) # 改变时间输入格式,不然插入数据库会报错,数据库是datatime格式
confirm = i["confirm"]
suspect = i["suspect"]
heal = i["heal"]
dead = i["dead"]
history[ds].update({"confirm_add": confirm, "suspect_add": suspect, "heal_add": heal, "dead_add": dead})
# 当日详细数据
details = []
update_time = data_all1["lastUpdateTime"]
data_country = data_all1["areaTree"] # list 25个国家
data_province = data_country[0]["children"] # 中国各省
for pro_infos in data_province:
province = pro_infos["name"] # 省名
for city_infos in pro_infos["children"]:
city = city_infos["name"]
confirm = city_infos["total"]["confirm"]
confirm_add = city_infos["today"]["confirm"]
heal = city_infos["total"]["heal"]
dead = city_infos["total"]["dead"]
details.append([update_time, province, city, confirm, confirm_add, heal, dead])
return history, details
his, de = get_tencent_data()
print(his)
print(de)
这段代码用于爬取数据并且将数据进行处理并输出
处理后的数据
接下来我们需要将数据插入到数据库中
数据库操作
数据基本配置:连接与关闭
def get_conn():
#建立连接
conn = pymysql.connect(host="127.0.0.1", user="root", password="px980305", db="cov", charset="utf8")
#创建游标
cursor = conn.cursor()
return conn,cursor
def close_conn(conn,cursor): #关闭连接
if cursor:
cursor.close()
if conn:
conn.close()
插入数据
#插入details数据
def update_details():
cursor = None
conn = None
try:
li = get_tencent_data()[1] #0是历史数据,1是当日详细数据
conn,cursor = get_conn()
sql = "insert into details(update_time,province,city,confirm,confirm_add,heal,dead) values(%s,%s,%s,%s,%s,%s,%s)"
sql_query = "select %s=(select update_time from details order by id desc limit 1)" #对比当前最大时间戳
#对比当前最大时间戳
cursor.execute(sql_query,li[0][0])
if not cursor.fetchone()[0]:
print(f"{time.asctime()}开始更新数据")
for item in li:
cursor.execute(sql,item)
conn.commit()
print(f"{time.asctime()}更新到最新数据")
else:
print(f"{time.asctime()}已是最新数据!")
except:
traceback.print_exc()
finally:
close_conn(conn,cursor)
插入结果如图所示
关于数据库数据分为三个,分别是疫情统计,疫情详情和当前疫情热度
完成数据库模块后,便是使用flask框架来设置路由并引入echart插件
Flask整合Web
app = Flask(__name__)
@app.route('/')
def hello_world():
return render_template('main.html')
@app.route('/c1')
def get_c1_data():
data = utils.get_c1_data()
return jsonify({"confirm":data[0],"suspect":data[1],"heal":data[2],"dead":data[3]})
@app.route('/c2')
def get_c2_data():
res = []
for tup in utils.get_c2_data():
res.append({"name":tup[0],"value":int(tup[1])})
return jsonify({"data":res})
@app.route("/l1")
def get_l1_data():
data = utils.get_l1_data()
day,confirm,suspect,heal,dead = [],[],[],[],[]
for a,b,c,d,e in data[7:]: #很多卫健委网站前7天都是没有数据的,所以把前7天砍掉了
day.append(a.strftime("%m-%d")) #a是datatime类型
confirm.append(b)
suspect.append(c)
heal.append(d)
dead.append(e)
return jsonify({"day":day,"confirm": confirm, "suspect": suspect, "heal": heal, "dead": dead})
@app.route("/l2")
def get_l2_data():
data = utils.get_l2_data()
day, confirm_add, suspect_add = [], [], []
for a, b, c in data[7:]:
day.append(a.strftime("%m-%d")) # a是datatime类型
confirm_add.append(b)
suspect_add.append(c)
return jsonify({"day": day, "confirm_add": confirm_add, "suspect_add": suspect_add})
@app.route("/r1")
def get_r1_data():
data = utils.get_r1_data()
city = []
confirm = []
for k,v in data:
city.append(k)
confirm.append(int(v))
return jsonify({"city": city, "confirm": confirm})
@app.route("/r2")
def get_r2_data():
data = utils.get_r2_data() #格式 (('民警抗疫一线奋战16天牺牲1037364',), ('四川再派两批医疗队1537382',)
d = []
for i in data:
k = i[0].rstrip(string.digits) # 移除热搜数字
v = i[0][len(k):] # 获取热搜数字
ks = extract_tags(k) # 使用jieba 提取关键字
for j in ks:
if not j.isdigit():
d.append({"name": j, "value": v})
return jsonify({"kws": d})
完成这些后,我们对项目便完成了。