#之前写的代码
from requests_html import HTMLSession
import requests
import time
import re
import datetime
import json
session = HTMLSession()
import
mydb = (
host="localhost",
user="****",
passwd="****",
database="***"
)
mycursor = ()
sql = "TRUNCATE TABLE city_data"
(sql)
()
# 获取省与直辖市
def get_Province_list():
# 返回一个 response 对象
response = ('/tjsj/tjbz/tjyqhdmhcxhfdm/2017/') # 单位秒数
content = ('', first=True)
li_list = ('a')
for li in li_list:
url = ['href']
code=("\d+",url)[0]
cname=
print(code)
print('省级',cname)
get_City_list(code)
# SQL 插入语句
sql = "INSERT INTO city_data (name, code) VALUES (%s, %s)"
val = (cname, code)
(sql, val)
() # 数据表内容有更新,必须使用到该语句
#获取市
def get_City_list(pid):
# 返回一个 response 对象
response = ('/tjsj/tjbz/tjyqhdmhcxhfdm/2017/'+pid+'.html') # 单位秒数
content = ('', first=True)
citys = ('')
# print('citys',citys)
# li_list = ('a')
for city in citys:
td_list = ('td')
ycode=td_list[0].find('a')[0]
# print('ycode', ycode)
# url = ['href']
# code=("\d+",url)[0]
ccode=[0:4]
cname=td_list[1].find('a')[0].text
print('ccode',ccode)
print('城市名',cname)
# SQL 插入语句
sql = "INSERT INTO city_data (name, code,pId) VALUES (%s, %s, %s)"
val = (cname, ccode,pid)
(sql, val)
() # 数据表内容有更新,必须使用到该语句
#
get_County_list(pid,ccode)
#获取县级市
def get_County_list(pid,cid):
# 返回一个 response 对象
response = ('/tjsj/tjbz/tjyqhdmhcxhfdm/2017/'+pid+'/'+cid+'.html')
content = ('', first=True)
if content:
citys = ('')
# print('citys',citys)
# li_list = ('a')
for city in citys:
td_list = ('td')
ycode=td_list[0].find('a')
if len(ycode) == 0:
ccode =td_list[0].text
cname = td_list[1].text
else:
ccode = ycode[0].text[0:6]
cname = td_list[1].find('a')[0].text
print('ccode',ccode)
print('县级市',cname)
sql = "INSERT INTO city_data (name, code,pId) VALUES (%s, %s, %s)"
val = (cname, ccode,cid)
(sql, val)
() # 数据表内容有更新,必须使用到该语句
else:
print("没有循环数据!")
else:
print("表格不存在!")
if __name__ == '__main__':
get_Province_list()
转载于:/ytyjm/blog/3049563