
import re
from common_p3 import download def crawl_sitemap(url):
sitemap = download(url)
links = re.findall('<loc>(.*?)</loc>',sitemap)
print('links=',links)
for link in links:
print('link=',link)
html = download(link)
return crawl_sitemap('http://example.webscraping.com/sitemap.xml') TypeError: cannot use a string pattern on a bytes-like object (主要是版本问题)
对于python3x
'sitemap = download(url)'应改为‘sitemap = download(url).decode('utf-8')’