#!/usr/bin/python
#coding=gbk
import re
import os
import sys
import time
import glob
import string
import socket
import getopt
import urllib
import urllib2
import threading
from sgmllib import SGMLParser
from optparse import OptionParser
# #############################################################################
# #
# # self-defined exception classes
# #
# #############################################################################
class ConnectionError(Exception): pass
class URLUnreachable(Exception):pass
class CanotDownload(Exception):pass
# #############################################################################
# #
# # multiple threads download module starts here
# #
# #############################################################################
class HttpGetThread(threading.Thread):
def __init__(self, name, url, filename, range=0):
#print " name ",name
#print " url ",url
#print " filename ",filename
#print " range ",range
threading.Thread.__init__(self, name=name)
self.name = name
self.url = url
self.filename = filename
self.range = range
self.totalLength = range[1] - range[0] +1
try:
self.downloaded = os.path.getsize(self.filename)
except OSError:
self.downloaded = 0
self.percent = self.downloaded/float(self.totalLength)*100
self.headerrange = (self.range[0]+self.downloaded, self.range[1])
self.bufferSize = 8192
def run(self):
try:
self.downloaded = os.path.getsize(self.filename)
except OSError:
self.downloaded = 0
self.percent = self.downloaded/float(self.totalLength)*100
#self.headerrange = (self.range[0]+self.downloaded, self.range[1])
self.bufferSize = 8192
#request = urllib2.Request(self.url)
#request.add_header('Range', 'bytes=%d-%d' %self.headerrange)
downloadAll = False
retries = 1
while not downloadAll:
if retries > 10:
break
try:
self.headerrange = (self.range[0]+self.downloaded, self.range[1])
request = urllib2.Request(self.url)
request.add_header('Range', 'bytes=%d-%d' %self.headerrange)
conn = urllib2.urlopen(request)
startTime = time.time()
data = conn.read(self.bufferSize)
while data:
f = open(self.filename, 'ab')
f.write(data)
f.close()
self.time = int(time.time() - startTime)
self.downloaded += len(data)
self.percent = self.downloaded/float(self.totalLength) *100
data = conn.read(self.bufferSize)
downloadAll = True
except Exception, err:
retries += 1
time.sleep(1)
continue
#分割文件方便多线程下载
def Split(size,blocks):
ranges = []
blocksize = size / blocks
for i in xrange(blocks-1):
ranges.append((i*blocksize,blocksize*i+blocksize-1))
ranges.append(( blocksize*(blocks-1), size-1))
return ranges
#获得文件大小
def GetHttpFileSize(url):
length = 0
try:
conn = urllib.urlopen(url)
headers = conn.info()
length = headers.getheader("Content-Length")
length = int(length)
print "Get File Length: %d"%length
except Exception, err:
pass
return length
def hasLive(ts):
for t in ts:
#print "/n thread name ",t.getName()," alive ",t.isAlive()
if t.isAlive():
return True
return False
#
def MyHttpGet(url, output=None, connections=4):
"""
arguments:
url, in GBK encoding
output, default encoding, do no convertion
connections, integer
"""
length = GetHttpFileSize(url)
startTime = time.time() #开始时间
#print " startTime ",startTime
mb = length/1024/1024.0
if length == 0:
raise URLUnreachable
blocks = connections
if output:
filename = output
else:
output = url.split('/')[-1]
ranges = Split(length, blocks)
names = []
#names = ["%s_%d" %(filename,i) for i in xrange(blocks)]
for i in xrange(blocks):
names.append("%s_%d" %(filename,i))
ts = []
#print "+++++++++++++++++++++++++++ blocks ",blocks
for i in xrange(blocks):
t = HttpGetThread(" 下载线程 "+str(i), url, names[i], ranges[i])
t.setDaemon(True)
t.start()
ts.append(t)
live = hasLive(ts)
startSize = sum([t.downloaded for t in ts]) # 已下载多少
#print "++++++++++ startSize ",startSize
etime = 0 #
rate = 0 # 下载速度 *
while live:
try:
etime = time.time() - startTime
d = sum([t.downloaded for t in ts])/float(length)*100
downloadedThistime = sum([t.downloaded for t in ts])-startSize
try:
rate = downloadedThistime / float(etime)/1024
except:
rate = 0.0
progressStr = u'/rFilesize: %d(%.2fM) Downloaded: %.2f%% Avg rate: %.1fKB/s' %(length, mb, d, rate)
sys.stdout.write(progressStr)
sys.stdout.flush()
#sys.stdout.write('/b'*(len(progressStr)+1))
live = hasLive(ts)
time.sleep(0.8)
except KeyboardInterrupt:
print
print "Exit..."
for n in names:
try:
os.remove(n)
except:
pass
sys.exit(1)
print
etime = time.time() - startTime
#print " endTime ",time.time()
print u'耗时: %d:%d, 平均速度:%.2fKB/s' %(int(etime)/60, int(etime)%60,rate)
f = open(filename, 'wb')
for n in names:
f.write(open(n,'rb').read())
try:
os.remove(n)
except:
pass
f.close()
if __name__ == "__main__":
parser = OptionParser()
parser.add_option("-f","--file",action="store",type="string",dest="url")
parser.add_option("-o","--output",action="store",type="string",dest="output")
(options,args) =parser.parse_args(sys.argv)
connections = 5
if options.url:
url = options.url
if options.output:
output = options.output
else:
output = os.path.basename(url)
MyHttpGet(url,output,connections);