知识点一:Selenium库详解及其基本使用
什么是Selenium
selenium 是一套完整的web应用程序测试系统,包含了测试的录制(selenium IDE),编写及运行(Selenium Remote Control)和测试的并行处理(Selenium Grid)。
selenium的核心Selenium Core基于JsUnit,完全由JavaScript编写,因此可以用于任何支持JavaScript的浏览器上。
selenium可以模拟真实浏览器,自动化测试工具,支持多种浏览器,爬虫中主要用来解决JavaScript渲染问题。
基本使用
用python写爬虫的时候,主要用的是selenium的Webdriver,我们可以通过下面的方式先看看Selenium.Webdriver支持哪些浏览器
-
基本用法:
#打开google浏览器,再打开百度,输入Python然后按回车
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver)
try:
browser.get('https://www.baidu.com')
input = browser.find_element_by_id('kw') # 找到id为kw的元素
input.send_keys('Python') # 敲入Python
input.send_keys(Keys.ENTER) # 敲入回车
wait = WebDriverWait(browser, 10)
wait.until(EC.presence_of_element_located((By.ID, 'content_left'))) # 等待某个元素加载出来
print(browser.current_url)
print(browser.get_cookies())
print(browser.page_source) # 网页源代码
finally:
browser.close()https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=0&rsv_idx=1&tn=baidu&wd=Python&rsv_pq=aa95af1b0000f45e&rsv_t=53e2qpjWA%2FivTq6GgdT4FAhWM%2FWfkPZhHYkLoaS7MVPQUZCHF%2FkLQV2%2Brnc&rqlang=cn&rsv_enter=1&rsv_sug3=6&rsv_sug2=0&inputT=111&rsv_sug4=112
[{'domain': '.baidu.com', 'httpOnly': False, 'name': 'H_PS_PSSID', 'path': '/', 'secure': False, 'value': '1464_21121_26922_22159'}, {'domain': '.baidu.com', 'expiry': 3681286522.89887, 'httpOnly': False, 'name': 'BAIDUID', 'path': '/', 'secure': False, 'value': '410FC9CBBC1B798C0BEF149D5C0BD4E4:FG=1'}, {'domain': '.baidu.com', 'expiry': 3681286522.899111, 'httpOnly': False, 'name': 'BIDUPSID', 'path': '/', 'secure': False, 'value': '410FC9CBBC1B798C0BEF149D5C0BD4E4'}, {'domain': '.baidu.com', 'expiry': 3681286522.899241, 'httpOnly': False, 'name': 'PSTM', 'path': '/', 'secure': False, 'value': ''}, {'domain': '.baidu.com', 'httpOnly': False, 'name': 'PSINO', 'path': '/', 'secure': False, 'value': ''}, {'domain': 'www.baidu.com', 'expiry': 2479882880.172246, 'httpOnly': False, 'name': 'delPer', 'path': '/', 'secure': False, 'value': ''}, {'domain': 'www.baidu.com', 'httpOnly': False, 'name': 'BD_HOME', 'path': '/', 'secure': False, 'value': ''}, {'domain': 'www.baidu.com', 'expiry': 1534666877, 'httpOnly': False, 'name': 'BD_UPN', 'path': '/', 'secure': False, 'value': ''}, {'domain': 'www.baidu.com', 'httpOnly': False, 'name': 'BD_CK_SAM', 'path': '/', 'secure': False, 'value': ''}, {'domain': 'www.baidu.com', 'expiry': 1533805472, 'httpOnly': False, 'name': 'H_PS_645EC', 'path': '/', 'secure': False, 'value': 'c9083HSTzGdEsBVBx%2FDjhaEep8Lu5MHd8KusVOaaun2nj5W%2Bjur8%2BSHut%2BM'}]
<!DOCTYPE html><!--STATUS OK--><html xmlns="http://www.w3.org/1999/xhtml"><head><script charset="utf-8" async="" src="https://ss0.bdstatic.com/-0U0bnSm1A5BphGlnYG/tam-ogel/5d4e9b24-dcc5-483a-b6da-be1e9e621891.js"></script>获得的结果
-
声明浏览器对象(上面我们知道了selenium支持很多的浏览器,但是如果想要声明并调用浏览器则需要:)
from selenium import webdriver browser = webdriver.Chrome()
browser = webdriver.Firefox()
browser = webdriver.Edge()
browser = webdriver.PhantomJS()
browser = webdriver.Safari() -
访问页面
from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver)
browser.get('https://www.taobao.com')
print(browser.page_source)
browser.close()<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml" lang="zh-CN" class="ks-webkit537 ks-webkit ks-chrome68 ks-chrome"><head><script>/*! 2018-08-07 14:58:00 v8.5.7 */
!function(e){function i(n){if(o[n])return o[n].exports;var r=o[n]={exports:{},id:n,loaded:!1};return e[n].call(r.exports,r,r.exports,i),r.loaded=!0,r.exports}var o={};return i.m=e,i.c=o,i.p="",i(0)}([function(e,i){"use strict";var o=window,n=document;!function(){var e=2,r="ali_analytics";if(o[r]&&o[r].ua&&e<=o[r].ua.version)return void(i.info=o[r].ua);var t,a,d,s,c,u,h,l,m,b,f,v,p,w,g,x,z,O=o.navigator,k=O.appVersion,T=O&&O.userAgent||"",y=function(e){var i=0;return parseFloat(e.replace(/\./g,function(){return 0===i++?".":""}))},_=function(e,i){var o,n;i[o="trident"]=.1,(n=e.match(/Trident\/([\d.]*)/))&&n[1]&&(i[o]=y(n[1])),i.core=o},N=function(e){var i,o;return(i=e.match(/MSIE ([^;]*)|Trident.*; rv(?:\s|:)?([0-9.]+)/))&&(o=i[1]||i[2])?y(o):0},P=function(e){return e||"other"},M=function(e){function i(){for(var i=[["Windows NT 5.1","winXP"],["Windows NT 6.1","win7"],["Windows NT 6.0","winVista"],["Windows NT 6.2","win8"],["Windows NT 10.0","win10"],["iPad","ios"],["iPhone;","ios"],["iPod","ios"],["Macintosh","mac"],["Android","android"],["Ubuntu","ubuntu"],["Linux","linux"],["Windows NT 5.2","win2003"],["Windows NT 5.0","win2000"],["Windows","winOther"],["rhino","rhino"]],o=0,n=i.length;o<n;++o)if(e.indexOf(i[o][0])!==-1)return i[o][1];return"other"}function r(e,i,n,r){var t,a=o.navigator.mimeTypes;try{for(t in a)if(a.hasOwnProperty(t)&&a[t][e]==i){if(void 0!==n&&r.test(a[t][n]))return!0;if(void 0===n)return!0}return!1}catch(e){return!1}}var t,a,d,s,c,u,h,l="",m=l,b=l,f=[6,9],v="{{version}}",p="<!--[if IE "+v+"]><s></s><![endif]-->",w=n&&n.createElement("div"),g=[],x={webkit:void 0,edge:void 0,trident:void 0,gecko:void 0,presto:void 0,chrome:void 0,safari:void 0,firefox:void 0,ie:void 0,ieMode:void 0,opera:void 0,mobile:void 0,core:void 0,shell:void 0,phantomjs:void 0,os:void 0,ipad:void 0,iphone:void 0,ipod:void 0,ios:void 0,android:void 0,nodejs:void 0,extraName:void 0,extraVersion:void 0};if(w&&w.getElementsByTagName&&(w.innerHTML=p.replace(v,""),g=w.getElementsByTagName("s")),g.length>0){for(_(e,x),s=f[0],c=f[1];s<=c;s++)if(w.innerHTML=p.replace(v,s),g.length>0){x[b="ie"]=s;break}!x.ie&&(d=N(e))&&(x[b="ie"]=d)}else((a=e.match(/AppleWebKit\/*\s*([\d.]*)/i))||(a=e.match(/Safari\/([\d.]*)/)))&&a[1]?(x[m="webkit"]=y(a[1]),(a=e.match(/OPR\/(\d+\.\d+)/))&&a[1]?x[b="opera"]=y(a[1]):(a=e.match(/Chrome\/([\d.]*)/))&&a[1]?x[b="chrome"]=y(a[1]):(a=e.match(/\/([\d.]*) Safari/))&&a[1]?x[b="safari"]=y(a[1]):x.safari=x.webkit,(a=e.match(/Edge\/([\d.]*)/))&&a[1]&&(m=b="edge",x[m]=y(a[1])),/ Mobile\//.test(e)&&e.match(/iPad|iPod|iPhone/)?(x.mobile="apple",a=e.match(/OS ([^\s]*)/),a&&a[1]&&(x.ios=y(a[1].replace("_","."))),t="ios"。。。。。。。。。。。获得的结果
-
查找元素
-
单个元素
#element
from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver)
browser.get('https://www.taobao.com')
input_first = browser.find_element_by_id('q') # 寻找 id='q'的元素
input_second = browser.find_element_by_css_selector('#q') # 通过css选择器选择
input_third = browser.find_element_by_xpath('//*[@id="q"]') # 同上
print(input_first, input_second, input_third)
browser.close()
#这里我们通过三种不同的方式去获取响应的元素,第一种是通过id
的方式,第二个中是CSS
选择器,第三种是xpath
选择器,结果都是相同的。<selenium.webdriver.remote.webelement.WebElement (session="49795084aea9c702e52e3464a2e602bf", element="0.06918141330676386-1")> <selenium.webdriver.remote.webelement.WebElement (session="49795084aea9c702e52e3464a2e602bf", element="0.06918141330676386-1")> <selenium.webdriver.remote.webelement.WebElement (session="49795084aea9c702e52e3464a2e602bf", element="0.06918141330676386-1")>
获得的结果
另一种方式:
from selenium import webdriver
from selenium.webdriver.common.by import By chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver)
browser.get('https://www.taobao.com')
input_first = browser.find_element(By.ID, 'q')
print(input_first)
browser.close()另一种方式
<selenium.webdriver.remote.webelement.WebElement (session="2706eb775a80e5eb5af01293caaf84e1", element="0.30861434960698597-1")>
--->获得的结果
查找单个元素的方法
* find_element_by_name
* find_element_by_xpath
* find_element_by_link_text
* find_element_by_partial_link_text
* find_element_by_tag_name
* find_element_by_class_name
* find_element_by_css_selector -
查找多个元素
#elements
from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver)
browser.get('https://www.taobao.com')
lis = browser.find_elements_by_css_selector('.service-bd li')
print(lis)
browser.close()[<selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-1")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-2")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-3")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-4")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-5")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-6")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-7")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-8")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-9")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-10")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-11")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-12")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-13")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-14")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-15")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-16")>]
获得的结果
另一种方式:
from selenium import webdriver
from selenium.webdriver.common.by import By chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver)
browser.get('https://www.taobao.com')
lis = browser.find_elements(By.CSS_SELECTOR, '.service-bd li')
print(lis)
browser.close()另一种方式
[<selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-1")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-2")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-3")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-4")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-5")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-6")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-7")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-8")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-9")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-10")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-11")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-12")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-13")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-14")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-15")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-16")>]
--->获得的结果
一些其他的方式:
find_elements_by_name
find_elements_by_xpath
find_elements_by_link_text
find_elements_by_partial_link_text
find_elements_by_tag_name
find_elements_by_class_name
find_elements_by_css_selector
-
-
元素交互操作
先获取元素。
对获取的元素调用交互方法from selenium import webdriver
import time chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver)
browser.get('https://www.taobao.com')
input = browser.find_element_by_id('q') # 找到搜索框
input.send_keys('iPhone') # 输入 iPhone
time.sleep(1) # 等待一秒
input.clear() # 清空文本框
input.send_keys('iPad') # 输入ipad
button = browser.find_element(By.CLASS_NAME,'btn-search') # 找到搜索按钮
button.click() # 点击
browser.close()更多操作: http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.remote.webelement
-
交互动作(将动作附加到动作链中串行执行)
from selenium import webdriver
from selenium.webdriver import ActionChains chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver) # 声明浏览器对象
url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
browser.get(url)
browser.switch_to.frame('iframeResult') #切换到‘iframeResult’框架
source = browser.find_element_by_css_selector('#draggable') # 选择元素
target = browser.find_element_by_css_selector('#droppable') # 选择元素
actions = ActionChains(browser) # 声明一个动作链对象
actions.drag_and_drop(source, target) # 拖拽方法
actions.perform() # 执行动作更多操作: http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.action_chains
-
执行JavaScript(执行一些交互动作时,可能一些动作没有提供API。)
#把网页拉到最下面,并提示
from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver) # 声明浏览器对象 browser.get('https://www.zhihu.com/explore')
browser.execute_script('window.scrollTo(0, document.body.scrollHeight)')
browser.execute_script('alert("To Bottom")') -
获取元素信息
-
获取属性:*.get_attribute(“class”) *
from selenium import webdriver
from selenium.webdriver import ActionChains chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver) # 声明浏览器对象
url = 'https://www.zhihu.com/explore'
browser.get(url)
logo = browser.find_element_by_id('zh-top-link-logo')
print(logo)
print(logo.get_attribute('class'))
browser.close()<selenium.webdriver.remote.webelement.WebElement (session="d5564ba9ec58a9a53015648f61a56bd7", element="0.46068206240103504-1")>
zu-top-link-logo获得的结果
-
获取文本值:*text*
from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver) # 声明浏览器对象
url = 'https://www.zhihu.com/explore'
browser.get(url)
input = browser.find_element_by_class_name('zu-top-add-question')
print(input.text)
#提问 -
获取ID、位置、标签名、大小
from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver) # 声明浏览器对象
url = 'https://www.zhihu.com/explore'
browser.get(url)
input = browser.find_element_by_class_name('zu-top-add-question')
print(input.id)
print(input.location)
print(input.tag_name)
print(input.size)0.0866191825002236-1
{'x': 759, 'y': 7}
button
{'height': 32, 'width': 66}获得的结果
-
Frame
在很多网页中都是有Frame标签,所以我们爬取数据的时候就涉及到切入到frame中以及切出来的问题,通过下面的例子演示
这里常用的是switch_to.from()和switch_to.parent_frame()
父级的frame要查找子级的frame必须要切换到子frame,否则不能查找。
同理子frame也不能查找父frame的元素。import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver)
url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' # 声明网址
browser.get(url) # get传入网址
browser.switch_to.frame('iframeResult') # 切换到子frame
source = browser.find_element_by_css_selector('#draggable') # 查找子frame中的元素
print(source)
try:
logo = browser.find_element_by_class_name('logo')
except NoSuchElementException:
print('NO LOGO')
browser.switch_to.parent_frame() # 切换到父frame
logo = browser.find_element_by_class_name('logo')
print(logo)
print(logo.text)<selenium.webdriver.remote.webelement.WebElement (session="7d3651ff77a4266e974a48387a562fe1", element="0.7541880746473664-1")>
NO LOGO
<selenium.webdriver.remote.webelement.WebElement (session="7d3651ff77a4266e974a48387a562fe1", element="0.04296955550550363-2")>
RUNOOB.COM获得的结果
-
-
等待
当使用了隐式等待执行测试的时候,如果WebDriver没有在 DOM中找到元素,将继续等待,超出设定时间后则抛出找不到元素的异常, 换句话说,当查找元素或元素并没有立即出现的时候,
隐式等待将等待一段时间再查找DOM,默认的时间是0-
隐式等待
到了一定的时间发现元素还没有加载,则继续等待我们指定的时间,如果超过了我们指定的时间还没有加载就会抛出异常,如果没有需要等待的时候就已经加载完毕就会立即执行
from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver)
browser.implicitly_wait(10) # 传入隐式等待的时间
browser.get('https://www.zhihu.com/explore')
input = browser.find_element_by_class_name('zu-top-add-question')
print(input)<selenium.webdriver.remote.webelement.WebElement (session="7c232ca91150077a6d9ded5a653ebf6d", element="0.019205089543862464-1")>
获得的结果
-
显式等待:* .WebDriverWait(对象,最长等待时间)*
指定一个等待条件,并且指定一个最长等待时间,会在这个时间内进行判断是否满足等待条件,如果成立就会立即返回,如果不成立,就会一直等待,直到等待你指定的最长等待时间,
如果还是不满足,就会抛出异常,如果满足了就会正常返回from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver)
browser.get('https://www.taobao.com/')
wait = WebDriverWait(browser, 10) # 声明一个等待对象
# 判断元素是否加载
input = wait.until(EC.presence_of_element_located((By.ID, 'q'))) # 传入等待条件
# # 判断是否可点击的,一般用来判断是否为按钮
button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn-search')))
print(input, button)<selenium.webdriver.remote.webelement.WebElement (session="ac4d3c5db3d4f3aae7b828ad648e87df", element="0.49320541507860316-1")> <selenium.webdriver.remote.webelement.WebElement (session="ac4d3c5db3d4f3aae7b828ad648e87df", element="0.49320541507860316-2")>
获得的结果
上述的例子中的条件:
EC.presence_of_element_located()
是确认元素是否已经出现了EC.element_to_be_clickable()
是确认元素是否是可点击的常用的判断条件:
title_is
标题是某内容title_contains
标题包含某内容presence_of_element_located
元素加载出,传入定位元组,如(By.ID, 'p')
visibility_of_element_located
元素可见,传入定位元组visibility_of
可见,传入元素对象presence_of_all_elements_located
所有元素加载出text_to_be_present_in_element
某个元素文本包含某文字text_to_be_present_in_element_value
某个元素值包含某文字frame_to_be_available_and_switch_to_it frame
加载并切换invisibility_of_element_located
元素不可见element_to_be_clickable
元素可点击staleness_of
判断一个元素是否仍在DOM
,可判断页面是否已经刷新element_to_be_selected
元素可选择,传元素对象element_located_to_be_selected
元素可选择,传入定位元组element_selection_state_to_be
传入元素对象以及状态,相等返回True
,否则返回False
element_located_selection_state_to_be
传入定位元组以及状态,相等返回True
,否则返回False
alert_is_present
是否出现Alert
更多操作参考:http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.support.expected_conditions
-
-
前进后退:*back();forward()*
#打开谷歌浏览器,然后打开白队首页,在打开淘宝,知乎首页,返回淘宝页面等待一秒在回到知乎界面
import time
from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver)
browser.get('https://www.baidu.com/')
browser.get('https://www.taobao.com/')
browser.get('https://www.zhihu.com/')
browser.back()
time.sleep(1)
browser.forward()
browser.close() -
Cookies:*get_cookies();delete_all_cookes();add_cookie()*
from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver)
browser.get('https://www.zhihu.com/explore')
print(browser.get_cookies())
browser.add_cookie({'name': 'name', 'domain': 'www.zhihu.com', 'value': 'germey'})
print(browser.get_cookies())
browser.delete_all_cookies()
print(browser.get_cookies())[{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'l_n_c', 'path': '/', 'secure': False, 'value': ''}, {'domain': 'www.zhihu.com', 'expiry': 1533802053.671843, 'httpOnly': False, 'name': 'tgw_l7_route', 'path': '/', 'secure': False, 'value': '156dfd931a77f9586c0da07030f2df36'}, {'domain': '.zhihu.com', 'expiry': 1533802958, 'httpOnly': False, 'name': '__utmb', 'path': '/', 'secure': False, 'value': '51854390.0.10.1533801158'}, {'domain': '.zhihu.com', 'expiry': 1628409153.672149, 'httpOnly': False, 'name': 'q_c1', 'path': '/', 'secure': False, 'value': '5fdfbadd6fc347398cc287ebb19a383c|1533801155000|1533801155000'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': 'e58e3a1c1f41931b44a67c2b426c2f1e'}, {'domain': '.zhihu.com', 'expiry': 1536393153.672288, 'httpOnly': False, 'name': 'r_cap_id', 'path': '/', 'secure': False, 'value': '"MjI4MWJhNjU3YTRiNGFkMmE0OTkxNjk5MDMxZjc4NTU=|1533801155|61d044a51d2c7f0c8a99b9efcdec2f929a626003"'}, {'domain': '.zhihu.com', 'expiry': 1536393153.672375, 'httpOnly': False, 'name': 'cap_id', 'path': '/', 'secure': False, 'value': '"OTQ3Mjc1NmI0ZWEyNDFiOTkwYjAxYmY5NmU5Y2YyZGY=|1533801155|e2f66995a8136044730315e95ab3230932d3e85a"'}, {'domain': '.zhihu.com', 'expiry': 1536393153.672453, 'httpOnly': False, 'name': 'l_cap_id', 'path': '/', 'secure': False, 'value': '"NzRiMmNiMGQ2ZjYwNGIwM2I2Nzg0OTExNGUxZTJmZTA=|1533801155|9ad2a89322a8cfaf04770121c359a1de82636336"'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'n_c', 'path': '/', 'secure': False, 'value': ''}, {'domain': '.zhihu.com', 'expiry': 1628409157.390474, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': '"ALDleMF-Bw6PTmu-31M1eULrY7jjkGvu9tA=|1533801158"'}, {'domain': '.zhihu.com', 'expiry': 1596873157, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': '084e48e3-598e-4d8a-85b4-4def087fc321'}, {'domain': '.zhihu.com', 'expiry': 1596873158, 'httpOnly': False, 'name': '__utma', 'path': '/', 'secure': False, 'value': '51854390.543124770.1533801158.1533801158.1533801158.1'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '__utmc', 'path': '/', 'secure': False, 'value': ''}, {'domain': '.zhihu.com', 'expiry': 1549569158, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1533801158.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'}, {'domain': '.zhihu.com', 'expiry': 1596873158, 'httpOnly': False, 'name': '__utmv', 'path': '/', 'secure': False, 'value': '51854390.000--|3=entry_date=20180809=1'}]
[{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'l_n_c', 'path': '/', 'secure': False, 'value': ''}, {'domain': 'www.zhihu.com', 'expiry': 1533802053.671843, 'httpOnly': False, 'name': 'tgw_l7_route', 'path': '/', 'secure': False, 'value': '156dfd931a77f9586c0da07030f2df36'}, {'domain': '.zhihu.com', 'expiry': 1533802958, 'httpOnly': False, 'name': '__utmb', 'path': '/', 'secure': False, 'value': '51854390.0.10.1533801158'}, {'domain': '.zhihu.com', 'expiry': 1628409153.672149, 'httpOnly': False, 'name': 'q_c1', 'path': '/', 'secure': False, 'value': '5fdfbadd6fc347398cc287ebb19a383c|1533801155000|1533801155000'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': 'e58e3a1c1f41931b44a67c2b426c2f1e'}, {'domain': '.zhihu.com', 'expiry': 1536393153.672288, 'httpOnly': False, 'name': 'r_cap_id', 'path': '/', 'secure': False, 'value': '"MjI4MWJhNjU3YTRiNGFkMmE0OTkxNjk5MDMxZjc4NTU=|1533801155|61d044a51d2c7f0c8a99b9efcdec2f929a626003"'}, {'domain': '.zhihu.com', 'expiry': 1536393153.672375, 'httpOnly': False, 'name': 'cap_id', 'path': '/', 'secure': False, 'value': '"OTQ3Mjc1NmI0ZWEyNDFiOTkwYjAxYmY5NmU5Y2YyZGY=|1533801155|e2f66995a8136044730315e95ab3230932d3e85a"'}, {'domain': '.zhihu.com', 'expiry': 1536393153.672453, 'httpOnly': False, 'name': 'l_cap_id', 'path': '/', 'secure': False, 'value': '"NzRiMmNiMGQ2ZjYwNGIwM2I2Nzg0OTExNGUxZTJmZTA=|1533801155|9ad2a89322a8cfaf04770121c359a1de82636336"'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'n_c', 'path': '/', 'secure': False, 'value': ''}, {'domain': '.zhihu.com', 'expiry': 1628409157.390474, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': '"ALDleMF-Bw6PTmu-31M1eULrY7jjkGvu9tA=|1533801158"'}, {'domain': '.zhihu.com', 'expiry': 1596873157, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': '084e48e3-598e-4d8a-85b4-4def087fc321'}, {'domain': '.zhihu.com', 'expiry': 1596873158, 'httpOnly': False, 'name': '__utma', 'path': '/', 'secure': False, 'value': '51854390.543124770.1533801158.1533801158.1533801158.1'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '__utmc', 'path': '/', 'secure': False, 'value': ''}, {'domain': '.zhihu.com', 'expiry': 1549569158, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1533801158.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'}, {'domain': '.zhihu.com', 'expiry': 1596873158, 'httpOnly': False, 'name': '__utmv', 'path': '/', 'secure': False, 'value': '51854390.000--|3=entry_date=20180809=1'}, {'domain': 'www.zhihu.com', 'expiry': 2164521158, 'httpOnly': False, 'name': 'name', 'path': '/', 'secure': True, 'value': 'germey'}]
[]获得的结果
-
选项卡管理
通过执行js命令实现新开选项卡window.open()
不同的选项卡是存在列表里browser.window_handles
通过browser.window_handles[0]就可以操作第一个选项卡import time
from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver)
browser.get('https://www.baidu.com')
browser.execute_script('window.open()') # 通过执行一个js语句来新建选项卡
print(browser.window_handles) # 返回所有的选项卡的引用
browser.switch_to_window(browser.window_handles[1])
browser.get('https://www.taobao.com')
time.sleep(1)
browser.switch_to_window(browser.window_handles[0])
browser.get('https://python.org')['CDwindow-B1E2119962846BA7AED735D128600D3E', 'CDwindow-B231BF356905D63DE59F86F644F77A8F']
获得的结果
-
异常处理
这里的异常比较复杂,官网的参考地址:
http://selenium-python.readthedocs.io/api.html#module-selenium.common.exceptions
这里只进行简单的演示,查找一个不存在的元素-
实例,会报错
from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver)
browser.get('https://www.baidu.com')
browser.find_element_by_id('hello')NoSuchElementException Traceback (most recent call last)
<ipython-input-15-b8bcc6bfca0b> in <module>()
4 browser = webdriver.Chrome(chromedriver)
5 browser.get('https://www.baidu.com')
----> 6 browser.find_element_by_id('hello') D:\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py in find_element_by_id(self, id_)
357 element = driver.find_element_by_id('foo')
358 """
--> 359 return self.find_element(by=By.ID, value=id_)
360
361 def find_elements_by_id(self, id_): D:\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py in find_element(self, by, value)
964 return self.execute(Command.FIND_ELEMENT, {
965 'using': by,
--> 966 'value': value})['value']
967
968 def find_elements(self, by=By.ID, value=None): D:\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py in execute(self, driver_command, params)
318 response = self.command_executor.execute(driver_command, params)
319 if response:
--> 320 self.error_handler.check_response(response)
321 response['value'] = self._unwrap_value(
322 response.get('value', None)) D:\Anaconda3\lib\site-packages\selenium\webdriver\remote\errorhandler.py in check_response(self, response)
240 alert_text = value['alert'].get('text')
241 raise exception_class(message, screen, stacktrace, alert_text)
--> 242 raise exception_class(message, screen, stacktrace)
243
244 def _value_or_default(self, obj, key, default): NoSuchElementException: Message: no such element: Unable to locate element: {"method":"id","selector":"hello"}
(Session info: chrome=68.0.3440.75)
(Driver info: chromedriver=2.41.578737 (49da6702b16031c40d63e5618de03a32ff6c197e),platform=Windows NT 10.0.17134 x86_64)获得的结果
- 抛出异常
from selenium import webdriver
from selenium.common.exceptions import TimeoutException, NoSuchElementException chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
browser = webdriver.Chrome(chromedriver)
try:
browser.get('https://www.baidu.com')
except TimeoutException:
print('Time Out')
try:
browser.find_element_by_id('hello')
except NoSuchElementException:
print('No Element')
finally:
browser.close()No Element
获得的结果
-