#urllib2设置超时
#获取网页的源码
def getHtml(url,i):
if i > 2:
return
try:
req = urllib2.Request(url)
time.sleep(1)
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36')
page = urllib2.urlopen(req,timeout = 3).read()
except socket.error:
getHtml(url,i+1)
except:
return
return page