有时候我们的抓取项目时需要登陆到某个网站上,才能看见某些内容的,所以模拟登陆功能就必不可少了,散仙这次写的文章,主要有2个例子,一个是普通写法写的,另外一个是基于面向对象写的。
模拟登陆的重点,在于找到表单真实的提交地址,然后携带cookie,post数据即可,只要登陆成功,我们就可以访问其他任意网页,从而获取网页内容。
方式一:
1
|
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
import urllib.request
import urllib.parse
import http.cookiejar
#post的内容
values = {
'logon.x' : 'linke' ,
'password' : 'xxxx' ,
'username' : 'xxxxx'
}
#登陆的地址
logUrl = " http://192.168.32.112:8080/templates/index/hrlogon.do "
#构建cook
cook = http.cookiejar.CookieJar()
#构建openner
openner = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cook))
#添加headers
openner.addheaders = [( 'User-agent' , 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36' )]
r = openner. open (logUrl,urllib.parse.urlencode(values).encode())
#print(r.read().decode('gbk'))
r = openner. open ( " http://192.168.132.62:8080/kq/kqself/card/carddata.do?b_query=link " )
print (r.read().decode( 'gbk' ))
|
方式二:
1
|
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
|
import urllib
import urllib.request
import urllib.parse
import http.cookiejar
import re
class loginRLKQ:
post_data = b"";
def __init__( self ):
#初始化类,cook的值
cj = http.cookiejar.CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
opener.addheaders = [( 'User-Agent' , 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' )]
#初始化全局opener
urllib.request.install_opener(opener)
#login方法需要加入post数据
def login( self ,loginurl,encode):
#模拟登陆
req = urllib.request.Request(loginurl, self .post_data)
rep = urllib.request.urlopen(req)
d = rep.read()
#print(d)
d = d.decode(encode)
return d
#登陆之后获取其他网页方法
def getUrlContent( self ,url,encode):
req2 = urllib.request.Request(url)
rep2 = urllib.request.urlopen(req2)
d2 = rep2.read()
d22 = d2.decode(encode)
return d22
if __name__ = = "__main__" :
#实例化类
x = loginRLKQ()
#给post数据赋值
x.post_data = urllib.parse.urlencode({ 'username' : "xxdd" , 'password' : 'xxdd' , 'logon.x' : 'linke' }).encode(encoding = "gbk" )
#登陆
y = x.login( " http://192.168.132.61:8080/templates/index/hrlogon.do " , "gbk" )
#获取网页信息
print (x.getUrlContent( " http://192.124.32.16:8080/kq/kqself/card/carddata.do?b_query=link " , "gbk" ))
|
以上就是Python 模拟登陆的实现方法,如有疑问请留言或者到本站社区交流讨论,感谢阅读,希望能帮助到大家,谢谢大家对本站的支持!
原文链接:http://qindongliang.iteye.com/blog/2142774