数据采集---json格式数据

时间:2024-10-06 07:49:23
def parse(self, response): js = ('//script[contains(.,"__INITIAL_STATE__=")]/text()').extract_first() #利用xpath获取<script>标签 # print(js) r = ("__INITIAL_STATE__=")[1] #切片处理 # open("","w",encoding="utf-8").write(r) #保存到本地进行查看 d = (r) # 把json格式字符串转换成python对象 companies = ("positionList") for company in companies: itemdata = ZhilianItem() itemdata["title"] = ("name") itemdata["company"] = ("companyName") itemdata["salary"] = ("salary60") itemdata["address"] = ("workCity")+("cityDistrict") zcxx = ("welfareLabel") arr1 = [] for i in zcxx: arr1 += ("value") itemdata["post"] = str(arr1) itemdata["experience"] = ("workType")+",工作经验"+("workingExp") article_url = ("positionURL") # print(article_url) # print(111) print(itemdata['title']) yield (url=article_url, meta={"item": itemdata}, callback=, dont_filter=True)