本文实例讲述了python实现下载指定网址所有图片的方法。分享给大家供大家参考。具体实现方法如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
#coding=utf-8
#download pictures of the url
#useage: python downpicture.py www.baidu.com
import os
import sys
from html.parser import HTMLParser
from urllib.request import urlopen
from urllib.parse import urlparse
def getpicname(path):
''' retrive filename of url '''
if os.path.splitext(path)[ 1 ] = = '':
return None
pr = urlparse(path)
path = 'http://' + pr[ 1 ] + pr[ 2 ]
return os.path.split(path)[ 1 ]
def saveimgto(path, urls):
'''
save img of url to local path
'''
if not os.path.isdir(path):
print ( 'path is invalid' )
sys.exit()
else :
for url in urls:
of = open (os.path.join(path, getpicname(url)), 'w+b' )
q = urlopen(url)
of.write(q.read())
q.close()
of.close()
class myhtmlparser(HTMLParser):
'''put all src of img into urls'''
def __init__( self ):
HTMLParser.__init__( self )
self .urls = list ()
self .num = 0
def handle_starttag( self , tag, attr):
if tag.lower() = = 'img' :
srcs = [u[ 1 ] for u in attr if u[ 0 ].lower() = = 'src' ]
self .urls.extend(srcs)
self .num = self .num + 1
if __name__ = = '__main__' :
url = sys.argv[ 1 ]
if not url.startswith( 'http://' ):
url = 'http://' + sys.argv[ 1 ]
parseresult = urlparse(url)
domain = 'http://' + parseresult[ 1 ]
q = urlopen(url)
content = q.read().decode( 'utf-8' , 'ignore' )
q.close()
myparser = myhtmlparser()
myparser.feed(content)
for u in myparser.urls:
if (u.startswith( '//' )):
myparser.urls[myparser.urls.index(u)] = 'http:' + u
elif u.startswith( '/' ):
myparser.urls[myparser.urls.index(u)] = domain + u
saveimgto(r 'D:\python\song' , myparser.urls)
print ( 'num of download pictures is {}' . format (myparser.num))
|
运行结果如下:
num of download pictures is 19
希望本文所述对大家的Python程序设计有所帮助。