python实现下载指定网址所有图片的方法

本文实例讲述了python实现下载指定网址所有图片的方法。分享给大家供大家参考。具体实现方法如下：

				?

									#coding=utf-8

									#download pictures of the url

									#useage: python downpicture.py www.baidu.com

									import os

									import sys

									from html.parser import HTMLParser

									from urllib.request import urlopen

									from urllib.parse import urlparse

									def getpicname(path):

									  '''  retrive filename of url    '''

									  if os.path.splitext(path)[1] == '':

									    return None

									  pr=urlparse(path)

									  path='http://'+pr[1]+pr[2]

									  return os.path.split(path)[1]

									def saveimgto(path, urls):

									  '''

									  save img of url to local path

									  '''

									  if not os.path.isdir(path):

									    print('path is invalid')

									    sys.exit()

									  else:

									    for url in urls:

									      of=open(os.path.join(path, getpicname(url)), 'w+b')

									      q=urlopen(url)

									      of.write(q.read())

									      q.close()

									      of.close()

									class myhtmlparser(HTMLParser):

									  '''put all src of img into urls'''

									  def __init__(self):

									    HTMLParser.__init__(self)

									    self.urls=list()

									    self.num=0

									  def handle_starttag(self, tag, attr):

									    if tag.lower() == 'img':

									      srcs=[u[1] for u in attr if u[0].lower() == 'src']

									      self.urls.extend(srcs)

									      self.num = self.num+1

									if __name__ == '__main__':

									  url=sys.argv[1]

									  if not url.startswith('http://'):

									    url='http://' + sys.argv[1]

									  parseresult=urlparse(url)

									  domain='http://' + parseresult[1]

									  q=urlopen(url)

									  content=q.read().decode('utf-8', 'ignore')

									  q.close()

									  myparser=myhtmlparser()

									  myparser.feed(content)

									  for u in myparser.urls:

									    if (u.startswith('//')):

									      myparser.urls[myparser.urls.index(u)]= 'http:'+u

									    elif u.startswith('/'):

									      myparser.urls[myparser.urls.index(u)]= domain+u

									  saveimgto(r'D:\python\song', myparser.urls)

									  print('num of download pictures is {}'.format(myparser.num))

运行结果如下：

num of download pictures is 19

希望本文所述对大家的Python程序设计有所帮助。

秒客网

python实现下载指定网址所有图片的方法

相关文章