no2.crossdomain.xml批量读取(待完善)

时间:2023-03-08 17:24:22

读取太多url有问题

#coding=utf-8
import urllib
import requests
import sys
import re
import time def getxml(url):
xml = urllib.urlopen(url+'/crossdomain.xml')
xmlread = xml.read()
reg = str(r'(?=domain=)(.*?)(?=/>)')
#reg = str(r'<?xml*(.*?)</')
reg = re.compile(reg)
domaintxt = re.findall(reg,xmlread)
#print domaintxt
return domaintxt f = open('xmlsource.txt','r')
f1 = open('reslut.txt','w')
#try:
context=list_of_all_the_lines = f.readlines( )
for i in context:
#context:
x = i.strip()
print 'website:'+x+' have '+str(len(getxml(x)))+' domain:'
print >>f1,'website:'+x+' have '+str(len(getxml(x)))+' domain:'
#print context[i] +str(len(getxml(x)))
xmllen = len(getxml(x))
for m in range(0,xmllen,1):
falresult = getxml(x)[m]
falresult = falresult.replace('"','')
falresult = falresult.replace('domain=','')
print falresult
print >>f1,falresult
print ('\n')
print >>f1,('\n')
time.sleep(1)
print ('Over')
print >>f1,('Over')
f1.close()

 xml:

http://www.sina.com.cn/
http://www.discuz.net/
http://www.rising.com.cn/
http://www.ifeng.com//
http://www.sdo.com/
http://www.sogou.com/
http://www.163.com/