# 创建文件准备写入
with open("", "w", encoding='utf-8') as f:
# 获得需要的新房数据
total_page = 1
loupan_list = list()
page = '/loupan/'
# 调用请求头
headers = create_headers()
# 请求 url 并返回结果
response = requests.get(page, timeout=10, headers=headers)
html = response.content
# 解析返回 html
soup = BeautifulSoup(html, "lxml")
# 获取总页数
try:
page_box = soup.find_all('div', class_='page-box')[0]
matches = ('.*data-total-count="(\d+)".*', str(page_box))
total_page = int((int(matches.group(1)) / 10))
except Exception as e:
print(e)
print('总页数:' + total_page)
# 配置请求头
headers = create_headers()
# 从第一页开始遍历
for i in range(1, total_page + 1):
page = '/loupan/pg{0}'.format(i)
print(page)
response = requests.get(page, timeout=10, headers=headers)
html = response.content
# 解释返回结果
soup = BeautifulSoup(html, "lxml")
# 获得小区信息
house_elements = soup.find_all('li', class_="resblock-list")
# 循环遍历获取想要的元素
for house_elem in house_elements:
price = house_elem.find('span', class_="number")
desc = house_elem.find('span', class_="desc")
total = house_elem.find('div', class_="second")
loupan = house_elem.find('a', class_='name')
# 开始清理数据
try:
price = price.text.strip() + desc.text.strip()
except Exception as e:
price = '0'
loupan = .replace("\n", "")
# 继续清理数据
try:
total = ().replace(u'总价', '')
total = (u'/套起', '')
except Exception as e:
total = '0'
# 作为对象保存到变量
loupan = NewHouse(loupan, price, total)
print(())
# 将新房信息加入列表
loupan_list.append(loupan)
# 循环获取的数据并写入到文件中
for loupan in loupan_list:
f.write(() + "\n")