1.setting.py中打开管道
ITEM_PIPELINES = { # ‘tianmao.pipelines.TianmaoPipeline‘: 300, }
2.setting.py中写入mongodb配置
# mongodb HOST = "127.0.0.1" # 服务器地址 PORT = 27017 # mongo默认端口号 USER = "用户名" PWD = "密码" DB = "数据库名" TABLE = "表名"
3.pipeline.py文件中倒入pymongo,数据写入数据库
from pymongo import MongoClient
class TianmaoPipeline(object): def __init__(self, host, port, user, pwd, db, table): self.host = host self.port = port self.user = user self.pwd = pwd self.db = db self.table = table @classmethod def from_crawler(cls, crawler): HOST = crawler.settings.get(‘HOST‘) PORT = crawler.settings.get(‘PORT‘) USER = crawler.settings.get(‘USER‘) PWD = crawler.settings.get(‘PWD‘) DB = crawler.settings.get(‘DB‘) TABLE = crawler.settings.get(‘TABLE‘) return cls(HOST, PORT, USER, PWD, DB, TABLE) def open_spider(self, spider): self.client = MongoClient(‘mongodb://%s:%[email protected]%s:%s‘ %(self.user,self.pwd,self.host,self.port)) def close_spider(self, spider): self.client.close() def process_item(self, item, spider): self.client[self.db][self.table].save(dict(item))