如何获取高德全量POI

时间:2024-04-15 16:10:47

高德API调取具体POI存在几个问题:调取次数限制、每次获取的数据量限制900。

推荐:在调取城市POI具体数据的基础上,先将城市区域切分网格,然后可以获取城市POI数据;

一个git解决全部问题:POI获取

运行app.py

修改参数 :1、app.py内部切分城市区域网格颗粒度pology_split_distance ,推荐0.05

                 2、app.py内部城市编码:city_code;

                 3、app.py内部POI类型编码type_code。

                特别提醒:app.py脚本依赖area_boundary.pycity_grid.pytransCoordinateSystem.py,这三个包都需要下载或者复制到本地。

 感谢大神:git地址:https://github.com/liujiao111/poi

 

如果不需要城市全量数据,只需要部分数据做分析,下面这段脚本足以解决日常需要!

# 高德地图:http://ditu.amap.com/
# 高德地图poi:http://lbs.amap.com/api/webservice/guide/api/search/#text
# 首先得申请一个下载poi数据的key,然后在对应位置填入你的key

import json
import xlwt
from datetime import datetime
from urllib import request
from urllib.parse import quote
import time
import os


# 获取数据
def get_data(pageindex, url_amap):
    global total_record
    # 暂停500毫秒,防止过快取不到数据
    time.sleep(0.5)
    print(\'解析页码: \' + str(pageindex) + \' ... ...\')
    url = url_amap.replace(\'pageindex\', str(pageindex))
    # 中文编码
    url = quote(url, safe=\'/:?&=\')
    html = ""

    with request.urlopen(url) as f:
        html = f.read()
        rr = json.loads(html)
        if total_record == 0:
            total_record = rr[\'count\']
        return rr[\'pois\']


def getPOIdata(page_size, json_name, url_amap):
    global total_record
    print(\'获取POI数据开始\')
    josn_data = get_data(1, url_amap)
    if (int(total_record) % page_size) != 0:
        page_number = int(int(total_record) / page_size) + 2
    else:
        page_number = int(int(total_record)/ page_size) + 1

    with open(json_name, \'w\') as f:
        # 去除最后]
        f.write(json.dumps(josn_data).rstrip(\']\'))
        for each_page in range(2, page_number):
            html = json.dumps(get_data(each_page, url_amap)).lstrip(\'[\').rstrip(\']\')
            if html:
                html = "," + html
            f.write(html)
            print(\'已保存到json文件:\' + json_name)
        f.write(\']\')
    print(\'获取POI数据结束\')

# 写入数据到excel
def write_data_to_excel(json_name, hkeys, bkeys, name):
    # 获取当前日期
    today = datetime.today()
    # 将获取到的datetime对象仅取日期
    today_date = datetime.date(today)

    # 从文件中读取数据
    fp = open(json_name, \'r\')
    result = json.loads(fp.read())
    # 实例化一个Workbook()对象(即excel文件)
    wbk = xlwt.Workbook()
    # 新建一个名为Sheet1的excel sheet。此处的cell_overwrite_ok =True是为了能对同一个单元格重复操作。
    sheet = wbk.add_sheet(\'Sheet1\', cell_overwrite_ok=True)

    # 创建表头
    # for循环访问并获取数组下标enumerate函数
    for index, hkey in enumerate(hkeys):
        sheet.write(0, index, hkey)

    # 遍历result中的每个元素。
    for i in range(len(result)):
        values = result[i]
        n = i + 1
        for index, key in enumerate(bkeys):
            val = ""
            # 判断是否存在属性key
            if key in values.keys():
                val = values[key]
            sheet.write(n, index, val)
    wbk.save(name + str(today_date) + \'.xls\')
    print(\'保存到excel文件: \' + name + str(today_date) + \'.xls!\')


if __name__ == \'__main__\':
    json_name = \'data_amap.json\'
    #os.makedirs(\'data_index\')
    # 高德地图poi:http://lbs.amap.com/api/webservice/guide/api/search/#text
    # city = ["北京市","天津市","上海市","重庆市","河北省","山西省","辽宁省","吉林省","黑龙江","江苏省","浙江省",
    #        "安徽省","福建省","江西省","山东省","河南省","湖北省","湖南省","广东省","海南省","四川省","贵州省",
    #        "云南省","陕西省","甘肃省","青海省","*省","内蒙古自治区","广西壮族自治区","*自治区","宁夏回族自治区","**自治区","香港特别行政区","澳门特别行政区"]

    # 解析city.json数据,读取城市列表
    city = ["上海市"]
    # data = open("city.json", encoding="utf-8-sig")
    # # 转换为python对象
    # strJson = json.load(data)
    # for i in range(len(strJson)):
    #     city.append(strJson[i][\'n\'])
    keyword = ["住宅"]
    # 关键词:"加油站","汽车销售","汽车维修","美食","购物","生活服务","体育休闲","医疗保健","宾馆酒店","风景",...等等
    # type具体可以查表http://lbs.amap.com/api/webservice/guide/api/search/#text
    # type = ["010000","020000","030000","050000","060000","070000","080000","090000","100000","110000","140000","150000","160000"]
    type = ["120300",\'120301\',\'120302\',\'120303\',\'120304\']

    for i in range(0, len(city)):

        for j in range(0, len(keyword)):

            url_amap = \'http://restapi.amap.com/v3/place/text?key=你申请的key&keywords=\' + keyword[j] + \'&types=\' + type[j] + \'&city=\' + city[i] + \'&citylimit=true&children=1&offset=20&page=pageindex&extensions=all\'
            page_size = 25  # 每页记录数据,强烈建议不超过25,若超过25可能造成访问报错
            page_index = r\'page=1\'  # 显示页码
            global total_record
            total_record = 0
            # Excel表头
            hkeys = [\'id\', \'行业类型\', \'名称\', \'类型\', \'地址\', \'联系电话\', \'location\', \'省份代码\', \'省份名称\', \'城市代码\', \'城市名称\', \'区域代码\', \'区域名称\',
                    \'所在商圈\']
            # 获取数据列
            bkeys = [\'id\', \'biz_type\', \'name\', \'type\', \'address\', \'tel\', \'location\', \'pcode\', \'pname\', \'citycode\',
                    \'cityname\',\'adcode\', \'adname\', \'business_area\']
            # 写入数据到json文件,第二次运行可注释
            getPOIdata(page_size, json_name, url_amap)
            # 读取json文件数据写入到excel
            # os.makedirs("data_index\\"+city[i])
            write_data_to_excel(json_name, hkeys, bkeys, "data_index\\" + city[0] + keyword[j] + "-高德地图")
            if (i % 13 == 0):
               time.sleep(15)
            elif (i % 13 != 0):
               time.sleep(15)