python爬虫学习(3):使用User-Agent和代理ip

时间:2022-09-04 16:56:40
使用User-Agent
方法一,先建立head,作为参数传进去

import urllib.request
import json

content=input("请输入需要翻译的内容:")
url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'

data={}

data['i']=content
data['from']='AUTO'
data['to']='AUTO'
data['smartresult']='dict'
data['client']='fanyideskweb'
data['salt']='1520575049536'
data['sign']='4514c46c320493ba8c034eaa8d9decaf'
data['doctype']='json'
data['version']='2.1'
data['keyfrom']='fanyi.web'
data['action']='FY_BY_CLICKBUTTION'
data['typoResult']='false'
data['ue']='utf-8'
data=urllib.parse.urlencode(data).encode('utf-8')

head={}
head['User-Agent']='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3355.4 Safari/537.36'
#urllib.request.Request的可以有三个参数,head(必须是字典)可以作为第三个参数
req=urllib.request.Request(url,data,head)
response=urllib.request.urlopen(req)
html=response.read().decode('utf-8')
target=json.loads(html)
print("翻译结果:%s"%target['translateResult'][0][0]['tgt'])

 
 

################################################################################

方法二使用建立request后add_hander


import urllib.request
import json

 
 

content=input("请输入需要翻译的内容:")
url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'

 
 

data={}
data['i']=content
data['from']='AUTO'
data['to']='AUTO'
data['smartresult']='dict'
data['client']='fanyideskweb'
data['salt']='1520575049536'
data['sign']='4514c46c320493ba8c034eaa8d9decaf'
data['doctype']='json'
data['version']='2.1'
data['keyfrom']='fanyi.web'
data['action']='FY_BY_CLICKBUTTION'
data['typoResult']='false'
data['ue']='utf-8'
data=urllib.parse.urlencode(data).encode('utf-8')

 
 

req=urllib.request.Request(url,data)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3355.4 Safari/537.36')
response=urllib.request.urlopen(req)
html=response.read().decode('utf-8')
target=json.loads(html)
print("翻译结果:%s"%target['translateResult'][0][0]['tgt'])

 
 

###########################################################################################

#使用代理ip

 

import urllib

def main():
#要访问的网址
url='http://www.whatismyip.com.tw/'
#代理ip
proxy={'http':'106.46.136.112:808'}
#创建ProxyHandler
proxy_support=urllib.request.ProxyHandler(proxy)
#创建opener
opener=urllib.request.build_opener(proxy_support)
#添加User-Agent
opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36')]
#安装opener
urllib.request.install_opener(opener)
#使用自己安装好的opener
response=urllib.request.urlopen(url)
#读取相应信息并解码
html=response.read().decode('utf-8')
print(html)
if __name__=='__name__':
main()