一、安装基础的软件包:
1、准备好火狐浏览器,并下载geckodriver,将geckodriver加入到环境变量:
下载geckodriver的地址:https://pan.baidu.com/s/1NDo04Zj8NxmlRe90_CDRow
2、下载selenium:
python pip install selenium
二、脚本说明:
from selenium import webdriver from time import sleep username=\'xxxxxxxxx\' password=\'xxxxxxxxxx\' qzone_url =\'https://qzone.qq.com/\' \'\'\'qq空间自动登录登录\'\'\' def login_qzone(url,username,password,browser_type=\'Firefox\'): if browser_type == \'Firefox\': driver = webdriver.Firefox() elif browser_type == \'Firefox_headless\': #用火狐的无边界浏览器 fireFoxOptions = webdriver.FirefoxOptions() fireFoxOptions.set_headless() driver = webdriver.Firefox(firefox_options=fireFoxOptions) driver.get(url) if \'qzone\' in url: pass else: driver.find_element_by_class_name(\'qzone\').click() sleep(5) driver.switch_to.window(driver.window_handles[1]) #print(driver.window_handles) driver.switch_to.frame(\'login_frame\') #这个很坑,里面还包括子页面 pwd_login_button = driver.find_element_by_id(\'switcher_plogin\') pwd_login_button.click() #点击通过密码登录 input_user = driver.find_element_by_id(\'u\') input_user.send_keys(username) input_pwd = driver.find_element_by_name(\'p\') input_pwd.send_keys(password) login_button = driver.find_element_by_id(\'login_button\') login_button.click() driver.switch_to.default_content() sleep(5) return driver if __name__ == \'__main__\': login_qzone(qzone_url,username,password)
import qzone_login as login from selenium.common.exceptions import StaleElementReferenceException,NoSuchElementException from selenium import webdriver import time import json qq_number_dict ={} def save_qq_number(tag_list): for line in tag_list: qq_number = line.get_attribute(\'href\').split(\'/\')[-1] qq_number_dict.update({qq_number:line.text}) \'\'\'得到qq空间里的所有好友,保存到文件里\'\'\' def save_in_file(): qq_number_dict.update({\'save_time\':time.time()}) browser = login.login_qzone(login.qzone_url,login.username,login.password,browser_type = \'Firefox_headless\') #browser = login.login_qzone(login.qzone_url,login.username,login.password) browser.get(\'https://user.qzone.qq.com/949885111/myhome/friends/index\') browser.switch_to.default_content() frame = browser.find_element_by_tag_name(\'iframe\') browser.switch_to.frame(frame) for i in range(60): target = browser.find_element_by_class_name(\'qz-button.btn-pager-next\') browser.execute_script("arguments[0].scrollIntoView();", target) #屏幕转到qz-button.btn-pager-next类的标签 save_qq_number(browser.find_elements_by_class_name(\'textoverflow\')) target.click() time.sleep(1) with open(\'qq_friends.json\', \'w\', encoding=\'utf-8\') as f: f.write(json.dumps(qq_number_dict)) if __name__ == "__main__": with open(\'qq_friends.json\', \'r\', encoding=\'utf-8\') as f: for line in f: save_time =json.loads(line)[\'save_time\'] if time.time() - save_time > 86400: save_in_file() else: with open(\'qq_friends.json\', \'r\', encoding=\'utf-8\') as f: for line in f: print(len(json.loads(line)))
import qzone_login as login from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import ElementNotInteractableException,NoSuchFrameException,NoSuchElementException import time friend_qzone_url = \'https://user.qzone.qq.com/2453294057\' \'\'\'判断是否登录成功\'\'\' def judge_login(): pass \'\'\'进入frame\'\'\' def switch_frame(broeser,frame): while True: try: broeser.switch_to.frame(frame) #一共两层 except NoSuchFrameException: pass else: break time.sleep(2) # 睡几秒,不然抓不到frame \'\'\'抓异常并初步处理数据\'\'\' def get_erroy(browser,tag,return_sign = False): try: execute_output = browser.find_element_by_class_name(tag) except NoSuchElementException : print(111111111111111111111122222) execute_output = \'\' else: if return_sign == False: execute_output = execute_output.text.replace(\' \',\'\').replace(\'\n\',\'\') return execute_output \'\'\'\'分析生成了的所有页面\'\'\' def analyse_html(broswser): data_dict = {} friend_speaks = broswser.find_elements_by_class_name(\'f-single.f-s-s\') for speak in friend_speaks: qzone_talk = get_erroy(speak,\'f-info\') deta_time = get_erroy(speak,\'ui-mr8.state\') phone_model_tag = get_erroy(speak,\'item\',return_sign= True) if phone_model_tag != \'\': phone_model = phone_model_tag.find_element_by_tag_name(\'a\').text else: phone_model = \'\' print(qzone_talk,phone_model,deta_time) data_dict.update({deta_time: [phone_model, qzone_talk]}) return data_dict \'\'\'判断是否有亲密度弹窗\'\'\' def judge_pop_up(browser): if len(browser.find_elements_by_class_name(\'btn-fs-sure\')) != 0: browser.find_elements_by_class_name(\'btn-fs-sure\')[0].click() \'\'\'得到好友动态页面的所有数据\'\'\' def get_all_data(friend_url): #driver = login.login_qzone(login.qzone_url, login.username, login.password) driver = login.login_qzone(login.qzone_url,login.username,login.password,browser_type = \'Firefox_headless\') driver.get(friend_url) driver.switch_to.default_content() time.sleep(4) #睡几秒,不然抓不到亲密度弹窗 judge_pop_up(driver) driver.find_element_by_id("aOwnerFeeds").click() driver.switch_to.default_content() switch_frame(driver,\'app_canvas_frame\') #进入第一层frame switch_frame(driver,\'frameFeedList\') #进入好友动态frame while True: try: target = driver.find_element_by_class_name(\'b-inline.data_btn_more\') target.click() except ElementNotInteractableException: try: target = driver.find_element_by_class_name(\'b-inline.data_is_loading\') target.click() except ElementNotInteractableException: pass time.sleep(3) finally: driver.execute_script("arguments[0].scrollIntoView(false);", target) finally_sign = driver.find_elements_by_class_name(\'b-inline.data_no_more.none\') # 找不到这个标签时就退出 print(finally_sign) if len(finally_sign) == 0: break time.sleep(2) return driver \'\'\'点赞\'\'\' def give_like(browser): for like in browser.find_elements_by_class_name(\'item.qz_like_btn_v3\'): browser.execute_script("arguments[0].scrollIntoView(false);", like) like.click() time.sleep(60) if __name__ == \'__main__\': driver = get_all_data(friend_qzone_url) data = analyse_html(driver) print(data)
上面三个脚本包括:
1、第一个是登录的脚本,可以选择使用火狐的无边界模式。
2、第二个使用第一个登录后,将空间里的QQ好友信息拿到(qq号:好友备注)保存到json文件里面。
3、第三个使用第一个登录后,有两个功能:
1.得到该好友的所有动态的内容(只包括说说内容,发表的时间,和使用的手机号)
2.可以给好友点赞。注:点太快了会被冻结
注:以上只是学习selenium所写的小脚本,可别用于非法用途。