Python检测URL状态,并追加保存200的URL:
1.Requests
#! /usr/bin/env python #coding=utf-8 import sys import requests def getHttpStatusCode(url): try: request = requests.get(url) httpStatusCode = request.status_code return httpStatusCode except requests.exceptions.HTTPError as e: return e if __name__ == "__main__": with open('1.txt', 'r') as f: for line in f: try: status = getHttpStatusCode(line.strip('\n'))#换行符 if status == 200: with open('200.txt','a') as f: f.write(line + '\n') print line else: print 'no 200 code' except Exception as e: print e
1 #! /usr/bin/env python 2 # -*--coding:utf-8*- 3 4 import requests 5 6 def request_status(line): 7 conn = requests.get(line) 8 if conn.status_code == 200: 9 with open('url_200.txt', 'a') as f: 10 f.write(line + '\n') 11 return line13 else: 14 return None 15 16 17 if __name__ == '__main__': 18 with open('/1.txt', 'rb') as f: 19 for line in f: 20 try: 21 purge_url = request_status(line.strip('\n')) 22 except Exception as e: 23 pass
2.Urllib
#! /usr/bin/env python #coding:utf-8 import os,urllib,linecache import sys result = list() for x in linecache.updatecache(r'1.txt'): try: a = urllib.urlopen(x.replace('/n','')).getcode() #print x,a except Exception,e: print e if a == 200: #result.append(x) #保存 #result.sort() #排序结果 #open('2.txt', 'w').write('%s' % '\n'.join(result)) #保存入结果文件 with open ('200urllib.txt','a') as f: ## r只读,w可写,a追加 f.write(x + '\n') else: print 'error'