依葫芦画瓢
用字符串查找图片地址下载
图片放在当前目录
GIF下载下来不会动.....
import
import time
def open_url(url):
#return htmlpage
print(url)
req = (url)
req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36")
response = (req)
return ()
def getInitialpage():
#return how many pages we have
url = "/ooxx"
html = open_url(url)
html = ("utf-8")
index = ("span class=\"current-comment-page\"")
beginindex = ("[" , index)
endindex = ("]" , index)
initialpage = html[(beginindex+1) : endindex]
return initialpage
def getpiclist(pageurl):
html = open_url(pageurl)
html = ("utf-8")
piclist = list()
for i in range(("[查看原图]</a><br /><img")):
index = ("[查看原图]</a><br /><img")
html=html[index:]
beginindex = ("\"")
endindex = ("\"" , (beginindex+1))
picurl = html[beginindex+1:endindex]
html = html[endindex:]
(picurl)
return piclist
def savepic(piclist):
for picurl in piclist:
html = open_url("http:{}".format(picurl))
filename = ("/")[-1]
print(filename)
with open(filename , "wb") as f:
(html)
(1)
def test(page):
initialpage = int(getInitialpage())
for i in range((initialpage-page),(initialpage+1)):
pageurl = "/ooxx/page-{}#comments".format(i)
piclist = getpiclist(pageurl)
savepic(piclist)
if __name__ == "__main__":
test(1)
补充:
request库应该有一个retrieve方法用于下载,可以替换上述的 savepic() 中的代码,动图可正常显示