python3下爬取网页上的图片的爬虫程序

 import urllib.request

 import re

 #py抓取页面图片并保存到本地

 #获取页面信息

 def getHtml(url):

     html = urllib.request.urlopen(url).read()

     return html

 #通过正则获取图片

 def getImg(html):

     reg = 'src="(.+?\.jpg)" pic_ext'

     imgre = re.compile(reg)

     imglist = re.findall(imgre,html)

    # print(imglist)

     return imglist

 html = getHtml("http://*****")

 list=getImg(html.decode())

 #循环把图片存到本地

 x =

 for imgurl in list:

     print(x)

     urllib.request.urlretrieve(imgurl,'d:\\%s.jpg'% x)

     x+=

 print("done")

指定网页获取图片并保存到AWS_s3

 import boto3

 import urllib.request

 def lambda_handler(request, context):

     #download_url = "https://s3.amazonaws.com/testforcustomerservice/192x192.png"

     download_url = "https://gss2.bdstatic.com/-fo3dSag_xI4khGkpoWK1HF6hhy/baike/s%3D220/sign=3707d191fa03738dda4a0b20831bb073/279759ee3d6d55fb3cfdd81761224f4a20a4ddcc.jpg"

     list = download_url.split('/')

     upload_key = list[len(list)-]

     response = urllib.request.urlopen(url=download_url)

     context = response.read()

     #print(context)

     bucket = "testforcustomerservice"

     s3 = boto3.resource("s3")

     file_obj = s3.Bucket(bucket).put_object(Key=upload_key, Body=context)

     print(file_obj)

     response = {

         "url": "https://s3.amazonaws.com/testforcustomerservice/"+upload_key

     }

     return response

秒客网

python3下爬取网页上的图片的爬虫程序

相关文章