import mechanize import cookielib from time import ctime,sleep def run(): print 'start!' for i in range(100): browse() print "run",i,"times ",ctime() sleep(1) def browse(): br = mechanize.Browser() cj = cookielib.LWPCookieJar() br.set_cookiejar(cj) br.set_handle_equiv(True) br.set_handle_gzip(True) br.set_handle_redirect(True) br.set_handle_referer(True) br.set_handle_robots(False) br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] r = br.open('http://blog.csdn.net/zhou_yujia/article/details/70115839') html = r.read() #print html run()
多线程版:
import mechanize import cookielib import threading from time import ctime,sleep def run(): print 'start!' for i in range(100): browse() print "run",i,"times ",ctime() sleep(1) def browse(): br = mechanize.Browser() cj = cookielib.LWPCookieJar() br.set_cookiejar(cj) br.set_handle_equiv(True) br.set_handle_gzip(True) br.set_handle_redirect(True) br.set_handle_referer(True) br.set_handle_robots(False) br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] r = br.open('http://blog.csdn.net/zhou_yujia/article/details/52319009') html = r.read() #print html if __name__ == '__main__': threads = [] for i in range(500): t1=threading.Thread(target=run) threads.append(t1) for t in threads: t.setDaemon(True) t.start() print "all over %s" %ctime() #run()