#coding=utf-8
'''
Created on 2013-7-17
@author: zinan.zhang
'''
import re
import time
import httplib2
import urllib
from bs4 import BeautifulSoup
#路径
savePath = 'F://TDDOWNLOAD//aNO.4//'
#获取url
def url_xunhuan(url,list):
return url + list
#下载图片的时候
time.sleep(0.5)#先sleep,再读取数据
"""根据url下载文件,文件名自动从url获取"""
def gDownload(url,savePath):
#参数检查,现忽略
fileName = gGetFileName(url)
#fileName =gRandFilename('jpg')
gDownloadWithFilename(url,savePath,fileName)
"""根据url获取文件名"""
def gGetFileName(url):
if url==None: return None
if url=="" : return ""
arr=url.split("/")
return arr[len(arr)-1]
"""根据url下载文件,文件名参数指定"""
def gDownloadWithFilename(url,savePath,file):
#参数检查,现忽略
try:
urlopen=urllib.URLopener()
fp = urlopen.open(url)
data = fp.read()
fp.close()
file=open(savePath + file,'w+b')
file.write(data)
print "下载成功:"+ url
file.close()
except IOError:
print "下载失败:"+ url
#初始化页面,提取必要信息
def getPage(url):
userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)'
headers = {
'User-Agent': userAgent,
'Accept-Language': 'zh-CN,zh;q=0.8',
'Accept': 'text/css,*/*;q=0.1',
}
http = httplib2.Http(".cache")
_, content = http.request(url, 'GET',headers=headers)
return content
#循环下载列表固定的 ---就是wallpaper,enterdesk等网站
def xuanhuan_down_list():
list=[]
url = 'http://tupian.enterdesk.com/2013/mxy/0311/4/'
temp=10
for i in range(temp):
list.append(str(i)+'.jpg')
for i in range(temp):
url_list = url_xunhuan(url,list[i])
gDownload(url_list,savePath)
time.sleep(0.2)
#爬网获取url
def spider_url(url):
page = getPage(url)
dom = BeautifulSoup(page)
srcs = [x['src'] for x in dom.findAll('img')]
#成功获取url
return srcs[0]
#循环下载列表随机的 ---就是ZOL桌面壁纸下载
def xuanhuan_down_suiji():
try:
temp=25
i=88
j=i
while (i <= j+temp):
#http://desk.zol.com.cn/showpic/1920x1200_30688_33.html
url = 'http://desk.zol.com.cn/showpic/1920x1200_12'+str(i)+'_37.html'
src_url = spider_url(url)
gDownload(src_url,savePath)
time.sleep(0.1)
i+=1
except IOError:
print "url获取失败!"
if __name__ == "__main__":
#gDownload(url,savePath)
'''
#批量下载序号固定的图片
xuanhuan_down_list()
'''
'''
#批量下载隐藏jpg路径的文件
xuanhuan_down_suiji()
'''
#批量下载文件
转载:http://www.cnblogs.com/dyllove98/archive/2013/07/19/3201162.html