python 导入包import requests 以爬取腾讯招聘网的招聘信息为例:(完整代码)
import requests
from lxml import etree
import pymysql headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'}
response = requests.get('https://hr.tencent.com/position.php?lid=&tid=&keywords=', headers=headers)
html = etree.HTML(response.content.decode()) # 职位名称
list_zhiwei = html.xpath('//td[@class="l square"]/a/text()')
print(list_zhiwei) # 职位类型
list_style = html.xpath('//tr[@class="even"]/td[2]/text()| //tr[@class="odd"]/td[2]/text()')
print(list_style) # 人数
list_num = html.xpath('//tr[@class="even"]/td[3]/text()| //tr[@class="odd"]/td[3]/text()')
print(list_num) # 地点
list_place = html.xpath('//tr[@class="even"]/td[4]/text()| //tr[@class="odd"]/td[4]/text()')
print(list_place) # 发布时间
list_time = html.xpath('//tr[@class="even"]/td[5]/text()| //tr[@class="odd"]/td[5]/text()')
print(list_time) 接下来是入库的过程,也是今天在实际应用中犯的错误,把错误代码贴出来,下次切记别犯了!
# 入库 conn = pymysql.connect(host = '127.0.0.1',port = 3306,database = 'python01',user = 'root',password = '123456',charset = 'utf8' )
cursor = conn.cursor()
for i in range(10):
name_z = list_zhiwei[i]
caregory = list_style[i]
num_people = list_num[i]
place = list_place[i]
add_time = list_time[i]
cursor.execute("insert into txZp values(0,'" + name_z + "','" + caregory + "','" + num_people + "','" + place + "','" + add_time + "')")
cursor.close()
conn.commit()
conn.close() # 错误在于,关闭cursor对象和链接的缩进格式不正确,上面的实例对象建立和关闭没有在同一层,导致最后报错,mysql表格里也添加进去数据了,但是仅仅添加了一条
shift+Tab取消缩进后,错误消失,数据正常添加到mysql表格中,效果如下:
ps 缩进问题实在是不应该出现,写一篇博客提醒自己,以后切记!!