在写程序的时候 , 每次总会写一些重复的函数, 因此有必要把他们收集一下
再之后的使用中,可以直接使用。
1.自定义log打印函数:
#自定义log 打印函数, 以数字定义log 级别
def my_log(log_leavel,msg):
#import time
#0:不打印 1:main2:error3:warning
log= {0:lambda:no_log(msg),
1:lambda:main_log(msg),
2:lambda:error_log(msg),
3:lambda:warning_log(msg)}
def no_log(msg):
print 'log none'
def main_log(msg):
print u'main: %s: %s' % (time.strftime('%Y-%m-%d_%H-%M-%S'), msg)
def error_log(msg):
print u'error: %s: %s' % (time.strftime('%Y-%m-%d_%H-%M-%S'), msg)
def warning_log(msg):
print u'warning: %s: %s' % (time.strftime('%Y-%m-%d_%H-%M-%S'), msg)
return log[log_leavel]()
2.获取网页内容
#获取页面内容
def get_html(url):
#import urllib2
#import HTMLParser
print u'start crawl %s ...' % url
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0'}
req = urllib2.Request(url=url,headers=headers)
try:
html = urllib2.urlopen(req).read().decode('utf-8')
html=HTMLParser.HTMLParser().unescape(html)#处理网页内容, 可以将一些html类型的符号如" 转换回双引号
#html = html.decode('utf-8','replace').encode(sys.getfilesystemencoding())#转码:避免输出出现乱码
except urllib2.HTTPError,e:
print u"连接失败,错误原因:%s " % e.code
return None
except urllib2.URLError,e:
if hasattr(e,'reason'):
print u"连接失败,错误原因:%s " % e.reason
return None
return html
3.打印时间函数
将其作为装饰器,计算函数运行时间
def print_run_time(func):
#import time
def wrapper(*args, **kw):
local_time = time.time()
func(*args, **kw)
print 'current Function [%s] run time is %.2f' % (func.__name__ ,time.time() - local_time)
return wrapper
后续不定时补充。。。。