文件名称:上课学的python,关于爬虫技术
文件大小:927B
文件格式:PY
更新时间:2022-08-09 10:08:27
Python
# -*- coding:utf-8 -*- import scrapy from scrapy_test2.items import ScrapyTest2Item class Test2spider(scrapy.Spider): name = 'test2' allowed_domains = ['quotes.toscrape.com'] start_urls = ['http://quotes.toscrape.com/'] def parse(self, response) : print("123456789") quotes = response.css('.quote') for quote in quotes: item = ScrapyTest2Item() item['text'] = quote.css('.text::text').extract_first() item['author'] = quote.css('.authoe::text').extract_first() item['tags'] = quote.css('.tags .tag::text').extract() # print(text,author,tags) yield item next = response.css('.pager .next a::atter("href")').extract_first() # print(next) next_url = response.urljoin(next) # print(next_ur1) yield scrapy.Request(url=next_url,callback=self.parse)