python爬虫爬取全国省市区

时间:2025-03-19 07:01:39

#之前写的代码

from requests_html import HTMLSession
import requests
import time
import re
import  datetime
import json
session = HTMLSession()
import 
 
mydb = (
    host="localhost",
    user="****",
    passwd="****",
    database="***"
)
mycursor = ()
 
sql = "TRUNCATE TABLE city_data"
(sql)
 
()
 
# 获取省与直辖市
def get_Province_list():
    # 返回一个 response 对象
    response = ('/tjsj/tjbz/tjyqhdmhcxhfdm/2017/')  # 单位秒数
 
    content = ('', first=True)
 
    li_list = ('a')
 
    for li in li_list:
        url = ['href']
        code=("\d+",url)[0]
        cname=
        print(code)
        print('省级',cname)
       
 
        get_City_list(code)
 
      
# SQL 插入语句
 
        sql = "INSERT INTO city_data (name, code) VALUES (%s, %s)"
        val = (cname, code)
        (sql, val)
 
        ()  # 数据表内容有更新,必须使用到该语句
       
       
#获取市
def get_City_list(pid):
    # 返回一个 response 对象
    response = ('/tjsj/tjbz/tjyqhdmhcxhfdm/2017/'+pid+'.html')  # 单位秒数
 
    content = ('', first=True)
    citys = ('')
    # print('citys',citys)
    # li_list = ('a')
 
    for city in citys:
        td_list = ('td')
        ycode=td_list[0].find('a')[0]
        # print('ycode', ycode)
        # url = ['href']
        # code=("\d+",url)[0]
        ccode=[0:4]
        cname=td_list[1].find('a')[0].text
        print('ccode',ccode)
        print('城市名',cname)
      
        # SQL 插入语句
 
        sql = "INSERT INTO city_data (name, code,pId) VALUES (%s, %s, %s)"
        val = (cname, ccode,pid)
        (sql, val)
 
        ()  # 数据表内容有更新,必须使用到该语句
        #
     
        get_County_list(pid,ccode)
#获取县级市
def get_County_list(pid,cid):
    # 返回一个 response 对象
    response = ('/tjsj/tjbz/tjyqhdmhcxhfdm/2017/'+pid+'/'+cid+'.html')
 
    content = ('', first=True)
    if content:
        citys = ('')
        # print('citys',citys)
        # li_list = ('a')
 
        for city in citys:
            td_list = ('td')
            ycode=td_list[0].find('a')
      
            if len(ycode) == 0:
                ccode =td_list[0].text
                cname = td_list[1].text
            else:
                ccode = ycode[0].text[0:6]
                cname = td_list[1].find('a')[0].text
       
 
                print('ccode',ccode)
                print('县级市',cname)
                sql = "INSERT INTO city_data (name, code,pId) VALUES (%s, %s, %s)"
                val = (cname, ccode,cid)
                (sql, val)
 
                ()  # 数据表内容有更新,必须使用到该语句
        else:
            print("没有循环数据!")
           
    else:
        print("表格不存在!")
 
if __name__ == '__main__':
    get_Province_list()
 

转载于:/ytyjm/blog/3049563