json API通用爬取模块

时间:2022-05-26 02:08:24

标签:python 爬虫

&import requests def jsonRequest(url,postdata):     """     使用requests模块post要领请求api接口,返回json串     :param url:     :return json串:     """     response = requests.post(url,data=postdata)     if response.status_code == 200:         return response.json()     else:         return "requests faild" def extractionData(jsondata,keynote):     """     接收一个json字符串和要提取的key值,,返回相应的value值     :param jsondata:     :param keynote:     :return:     """     extractionValue = jsondata[keynote]     if isinstance(extractionValue,dict) == True:         for key in extractionValue.keys():             print key,extractionValue[key]     elif isinstance(extractionValue,list) == True:         for item in extractionValue:             if isinstance(item,dict) == True:                 for key in item.keys():                     print key,item[key]     else:         print extractionValue     return extractionValue def jsonRequestPost(url,postdata):     """     使用requests模块post要领请求api接口,返回json串     :param url:     :return json串:     """     response = requests.post(url,data=postdata)     if response.status_code == 200:         status = 0         msg = "请求url告成"         result =  response.json()     else:         status = 400         msg = "请求url掉败"         result = ''     return {         'status':status,         'msg':msg,         'result':result     } def jsonRequestGet(url):     """     使用requests模块get要领请求api接口,返回json串     :param url:     :return json串:     """     headers = {         "user-agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.3964.2 Safari/537.36",     }     response = requests.get(url,headers=headers)     if response.status_code == 200:         status = 0         msg = "请求url告成"         result =  response.json()     else:         status = 400         msg = "请求url掉败"         result = ''     return {         'status':status,         'msg':msg,         'result':result     } def makePostData(params):     """     构建post请求的postdata     :param params:     :return:     """     postdata = {}     for item in params:         postdata[item['key']]=item['value']     return postdata def getTableField(jsonrequestdata,responseListKey,sourceField):     """     jsonrequestdata:json响应的数据     responseListKey:要哪些数据     sourceField:要取responseListKey下面的哪些字段   sourceField ="networks[1]|ip"sourceField = "serverInfo|hostname"     :return:     """     responseListKeylist = responseListKey.split('|')     jsondata = jsonrequestdata     for key in responseListKeylist:         jsondata=jsondata[key]     sourceFieldlist = sourceField.split('|')     itemlist = []     for list in jsondata:         item = list         for key in sourceFieldlist:             if '[' in key:                 key2 = key.split('[')[0]                 index = key.split('[')[1].split(']')[0]                 try:                     item = item[key2][int(index)]                 except:                     item = "None"             else:                 try:                     item = item[key]                 except:                     item = "None"         itemlist.append(item)     return itemlist if __name__ == "__main__":     url = "https://box.maoyan.com/promovie/api/box/second.json"     params = [{"value": "1", "key": "flag"},               {"value": "12", "key": "userid"},               {"value": "1524234956", "key": "expiretime"},               {"value": "1d5df8ff087815336a8e0f299c9811fe", "key": "token"},               {"value": "7", "key": "roleid"},               {"value": "190", "key": "projectid"}               ]     postdata = makePostData(params)     jsonrequestdata = jsonRequestGet(url)['result']     responseListKey = "data|list"     sourceField = "movieName"     print getTableField(jsonrequestdata, responseListKey, sourceField)


&

json API通用爬取模块

标签:python 爬虫