利用python语言结合requests、BeautifulSoup等类库爬取https://api.8684.cn/v3/api.php?do=citys&act=province对应接口中所有城市公交路线信息以及公交站点信息。
import time
import requests
import json, re
from bs4 import BeautifulSoup
# 定义一个函数,传入线路名称相当于在高德地图搜索,来获取每趟公交的站点名称和经纬度
def get_city():
"""
:rtype: object
"""
city_url = 'https://api.8684.cn/v3/api.php?do=citys&act=province'
city_data = requests.get(city_url).text
print(city_data)
city_res = json.loads(city_data)
# print(city_res['stations'])
for province in range(0, len(city_res['stations'])):
for city in range(0, len(city_res['stations'][province]['childs'])):
# print(city_res['stations'][province]['childs'][city])
city_py = city_res['stations'][province]['childs'][city]['e']
city_name = city_res['stations'][province]['childs'][city]['c']
if city_name in ('广州'):
for k in range(1, 200):
url = 'https://{}.8684.cn/line{}'.format(
city_py, k) # 今天就只先演示获取一种线路类型下所有公交的信息,要想拿到整个城市的,其实就加个for循环:line1,line2,line3......
# 伪装请求头
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36"
}
# 通过requests模块模拟get请求
res = requests.get(url=url, headers=headers)
soup = BeautifulSoup(res.text, "lxml")
div = soup.find('div', class_='list clearfix')
if div is not None:
lists = div.find_all('a')
for item in lists:
lines = item.text # 获取a标签下的公交线路
line = re.split(r' |\(', lines)[0]
print(lines, "++++++++++++++++++++++++++++++++++",line )
res_dir = 'E:\全国公交站点信息数据\\'
url_api = 'https://restapi.amap.com/v3/bus/linename?s=rsv3&extensions=all&key=“替换高德创作平台个人Key”&output=json&city={}&offset=2&keywords={}&platform=JS'.format(
city_name, line)
while requests.get(url_api).status_code != 200:
print(url_api)
res = requests.get(url_api).text
time.sleep(2)
# print(res) #可以用于检验传回的信息里面是否有自己需要的数据
rt = json.loads(res)
if 'buslines' in rt:
if len(rt['buslines']) >= 1:
for i in range(0, len(rt['buslines'])):
line_id = rt['buslines'][i]['id']
line_type = rt['buslines'][i]['type']
line_name = rt['buslines'][i]['name']
polyline = rt['buslines'][i]['polyline']
city_code = rt['buslines'][i]['citycode']
start_stop = rt['buslines'][i]['start_stop']
end_stop = rt['buslines'][i]['end_stop']
start_time = rt['buslines'][i]['start_time']
end_time = rt['buslines'][i]['end_time']
status = rt['buslines'][i]['status']
company = rt['buslines'][i]['company']
info = (str(line_id) + '\u0001' + str(line_type) + '\u0001' + str(
line_name) + '\u0001' +
str(polyline) + '\u0001' + str(city_code) + '\u0001' + str(
start_stop) + '\u0001' +
str(end_stop) + '\u0001' + str(start_time) + '\u0001' + str(end_time) + '\u0001'
+ str(status) + '\u0001' + str(company))
print(info)
output_dir = res_dir + str(city_name) + '公交导航信息数据.txt'
with open(output_dir, 'a', encoding="utf-8") as file:
file.write(info + "\n")
stop = rt['buslines'][i]['busstops']
for j in range(len(stop)):
station_id = stop[j]['id']
station = stop[j]['name']
location = stop[j]['location']
sequence = stop[j]['sequence']
info_ = (str(station_id) + '\u0001' + str(line) + '\u0001' + str(
station) + '\u0001' +
str(location) + '\u0001' + str(sequence))
print(info_)
output_dir = res_dir + str(city_name) + '公交站点信息数据.txt'
with open(output_dir, 'a', encoding="utf-8") as file:
file.write(info_ + "\n")
j += 1
else:
pass
else:
pass
else:
print(url_api)
res = requests.get(url_api).text
time.sleep(2)
# print(res) #可以用于检验传回的信息里面是否有自己需要的数据
rt = json.loads(res)
if 'buslines' in rt:
if len(rt['buslines']) >= 1:
for i in range(0, len(rt['buslines'])):
line_id = rt['buslines'][i]['id']
line_type = rt['buslines'][i]['type']
line_name = rt['buslines'][i]['name']
polyline = rt['buslines'][i]['polyline']
city_code = rt['buslines'][i]['citycode']
start_stop = rt['buslines'][i]['start_stop']
end_stop = rt['buslines'][i]['end_stop']
start_time = rt['buslines'][i]['start_time']
end_time = rt['buslines'][i]['end_time']
status = rt['buslines'][i]['status']
company = rt['buslines'][i]['company']
info = (str(line_id) + '\u0001' + str(line_type) + '\u0001' + str(
line_name) + '\u0001' +
str(polyline) + '\u0001' + str(city_code) + '\u0001' + str(
start_stop) + '\u0001' +
str(end_stop) + '\u0001' + str(start_time) + '\u0001' + str(
end_time) + '\u0001'
+ str(status) + '\u0001' + str(company))
print(info)
output_dir = res_dir + str(city_name) + '公交导航信息数据.txt'
with open(output_dir, 'a', encoding="utf-8") as file:
file.write(info + "\n")
stop = rt['buslines'][i]['busstops']
for j in range(len(stop)):
station_id = stop[j]['id']
station = stop[j]['name']
location = stop[j]['location']
sequence = stop[j]['sequence']
info_ = (str(station_id) + '\u0001' + str(line) + '\u0001' + str(
station) + '\u0001' +
str(location) + '\u0001' + str(sequence))
print(info_)
output_dir = res_dir + str(city_name) + '公交站点信息数据.txt'
with open(output_dir, 'a', encoding="utf-8") as file:
file.write(info_ + "\n")
j += 1
else:
pass
else:
pass
else:
pass
else:
pass
if __name__ == '__main__':
result = get_city()
在结合文件写入等操作将采集到的站点信息以及导航信息保存至对应城市的文件中
数据样例展示, 分隔符为