Python学习之爬虫(小甲鱼)

时间:2025-01-28 17:17:31

依葫芦画瓢 

用字符串查找图片地址下载 

图片放在当前目录 

GIF下载下来不会动.....

 

 

import 
import time

def open_url(url):
    #return htmlpage
    print(url)
    req = (url)
    req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36")
    response = (req)
    return ()

def getInitialpage():
    #return how many pages we have
    url = "/ooxx"
    html = open_url(url)
    html = ("utf-8")
    index = ("span class=\"current-comment-page\"")
    beginindex = ("[" , index)
    endindex = ("]" , index)
    initialpage = html[(beginindex+1) : endindex]
    return initialpage

def getpiclist(pageurl):
    html = open_url(pageurl)
    html = ("utf-8")
    piclist = list()
    for i in range(("[查看原图]</a><br /><img")):
        index = ("[查看原图]</a><br /><img")
        html=html[index:]
        beginindex = ("\"")
        endindex = ("\"" , (beginindex+1))
        picurl = html[beginindex+1:endindex]
        html = html[endindex:]
        (picurl)
    return piclist

def savepic(piclist):
    for picurl in piclist:
        html = open_url("http:{}".format(picurl))
        filename = ("/")[-1]
        print(filename)
        with open(filename , "wb") as f:
            (html)
        (1)

def test(page):
    initialpage = int(getInitialpage())
    for i in range((initialpage-page),(initialpage+1)):
        pageurl = "/ooxx/page-{}#comments".format(i)
        piclist = getpiclist(pageurl)
        savepic(piclist)        
if __name__ == "__main__":
    test(1)

补充:

request库应该有一个retrieve方法用于下载,可以替换上述的 savepic() 中的代码,动图可正常显示