# coding=utf-8
from selenium import webdriver
import json
import time
import pymongo
class Douyu:
def __init__(self):
self.driver = webdriver.Chrome()
# 发送首页请求
self.driver.get("https://www.douyu.com/directory/all")
self.host = '127.0.0.1'
self.port = 27017
self.DBname = 'douyu'
def get_content(self):
time.sleep(3)
li_list = self.driver.find_elements_by_xpath('//ul[@id="live-list-contentbox"]/li')
# print(li_list)
contents = []
# 遍历房间列表
for i in li_list:
item = {}
# 获取房间图片
item['img'] = i.find_element_by_xpath('./a//img').get_attribute("src")
# 获取房间名字
item['title'] = i.find_element_by_xpath('./a').get_attribute("title")
# 获取房间分类
item['category'] = i.find_element_by_xpath('./a/div[@class="mes"]/div/span').text
# 获取主播名字
item['name'] = i.find_element_by_xpath("./a/div[@class='mes']/p/span[1]").text
# 观看人数
item['watch_num'] = i.find_element_by_xpath("./a/div[@class='mes']/p/span[2]").text
# print(item)
contents.append(item)
return contents
# 保存到MongoDB
def save_content(self, contents):
# 创建MongoDB连接
client = pymongo.MongoClient(host=self.host, port=self.port)
# 指向指定的数据库
mdb = client[self.DBname]
self.post = mdb[self.DBname]
self.post.insert(contents)
# 保存到本地
# def save_content(self, contents):
# with open("douyu.json", "a") as f:
# for content in contents:
# json.dump(content, f, ensure_ascii=False, indent=2)
# f.write(',\n')
def run(self):
# 1.发送首页请求
# 2.获取首页信息
contents = self.get_content()
# 3.保存内容
self.save_content(contents)
# 4.循环 点击下一页按钮,直到下一页对应的class名字不再是"shark-pager-next"
# 判断有没有下一页
while self.driver.find_element_by_class_name("shark-pager-next"):
# 5.点击下一页按钮
self.driver.find_element_by_class_name("shark-pager-next").click()
# 6.获取下一页的内容
contents = self.get_content()
# 7.保存内容
self.save_content(contents)
if __name__ == '__main__':
douyu = Douyu()
douyu.run()