1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
'scrapy.downloadermiddlewares.cookies.CookiesMiddleware' : 700 ,
'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware' : 400 ,
'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware' : 350 ,
'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware' : 300 ,
'scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware' : 900 ,
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware' : 590 ,
'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware' : 750 ,
'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware' : 580 ,
'scrapy.downloadermiddlewares.redirect.RedirectMiddleware' : 600 ,
'scrapy.downloadermiddlewares.retry.RetryMiddleware' : 550 ,
'scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware' : 100 ,
'scrapy.downloadermiddlewares.stats.DownloaderStats' : 850 ,
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware' : 500 }
|
2. 内置爬虫中间件顺序
1
2
3
4
5
|
{ 'scrapy.spidermiddlewares.depth.DepthMiddleware' : 900 ,
'scrapy.spidermiddlewares.httperror.HttpErrorMiddleware' : 50 ,
'scrapy.spidermiddlewares.offsite.OffsiteMiddleware' : 500 ,
'scrapy.spidermiddlewares.referer.RefererMiddleware' : 700 ,
'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware' : 800 }
|
3. 内置scrapy的settings
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
|
{ 'AJAXCRAWL_ENABLED' : False ,
'AUTOTHROTTLE_DEBUG' : False ,
'AUTOTHROTTLE_ENABLED' : False ,
'AUTOTHROTTLE_MAX_DELAY' : 60.0 ,
'AUTOTHROTTLE_START_DELAY' : 5.0 ,
'AUTOTHROTTLE_TARGET_CONCURRENCY' : 1.0 ,
'BOT_NAME' : 'scrapybot' ,
'CLOSESPIDER_ERRORCOUNT' : 0 ,
'CLOSESPIDER_ITEMCOUNT' : 0 ,
'CLOSESPIDER_PAGECOUNT' : 0 ,
'CLOSESPIDER_TIMEOUT' : 0 ,
'COMMANDS_MODULE' : '',
'COMPRESSION_ENABLED' : True ,
'CONCURRENT_ITEMS' : 100 ,
'CONCURRENT_REQUESTS' : 16 ,
'CONCURRENT_REQUESTS_PER_DOMAIN' : 8 ,
'CONCURRENT_REQUESTS_PER_IP' : 0 ,
'COOKIES_DEBUG' : False ,
'COOKIES_ENABLED' : True ,
'DEFAULT_ITEM_CLASS' : 'scrapy.item.Item' ,
'DEFAULT_REQUEST_HEADERS' : { 'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' ,
'Accept-Language' : 'en' },
'DEPTH_LIMIT' : 0 ,
'DEPTH_PRIORITY' : 0 ,
'DEPTH_STATS_VERBOSE' : False ,
'DNSCACHE_ENABLED' : True ,
'DNSCACHE_SIZE' : 10000 ,
'DNS_TIMEOUT' : 60 ,
'DOWNLOADER' : 'scrapy.core.downloader.Downloader' ,
'DOWNLOADER_CLIENTCONTEXTFACTORY' : 'scrapy.core.downloader.contextfactory.ScrapyClientContextFactory' ,
'DOWNLOADER_CLIENT_TLS_METHOD' : 'TLS' ,
'DOWNLOADER_HTTPCLIENTFACTORY' : 'scrapy.core.downloader.webclient.ScrapyHTTPClientFactory' ,
'DOWNLOADER_MIDDLEWARES' : {},
'DOWNLOADER_MIDDLEWARES_BASE' : { 'scrapy.downloadermiddlewares.ajaxcrawl.AjaxCrawlMiddleware' : 560 ,
'scrapy.downloadermiddlewares.cookies.CookiesMiddleware' : 700 ,
'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware' : 400 ,
'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware' : 350 ,
'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware' : 300 ,
'scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware' : 900 ,
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware' : 590 ,
'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware' : 750 ,
'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware' : 580 ,
'scrapy.downloadermiddlewares.redirect.RedirectMiddleware' : 600 ,
'scrapy.downloadermiddlewares.retry.RetryMiddleware' : 550 ,
'scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware' : 100 ,
'scrapy.downloadermiddlewares.stats.DownloaderStats' : 850 ,
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware' : 500 },
'DOWNLOADER_STATS' : True ,
'DOWNLOAD_DELAY' : 0 ,
'DOWNLOAD_FAIL_ON_DATALOSS' : True ,
'DOWNLOAD_HANDLERS' : {},
'DOWNLOAD_HANDLERS_BASE' : { 'data' : 'scrapy.core.downloader.handlers.datauri.DataURIDownloadHandler' ,
'file' : 'scrapy.core.downloader.handlers.file.FileDownloadHandler' ,
'ftp' : 'scrapy.core.downloader.handlers.ftp.FTPDownloadHandler' ,
'http' : 'scrapy.core.downloader.handlers.http.HTTPDownloadHandler' ,
'https' : 'scrapy.core.downloader.handlers.http.HTTPDownloadHandler' ,
's3' : 'scrapy.core.downloader.handlers.s3.S3DownloadHandler' },
'DOWNLOAD_MAXSIZE' : 1073741824 ,
'DOWNLOAD_TIMEOUT' : 180 ,
'DOWNLOAD_WARNSIZE' : 33554432 ,
'DUPEFILTER_CLASS' : 'scrapy.dupefilters.BaseDupeFilter' ,
'EDITOR' : 'D:\\Program Files (x86)\\Notepad++\\notepad++.exe' ,
'EXTENSIONS' : {},
'EXTENSIONS_BASE' : { 'scrapy.extensions.closespider.CloseSpider' : 0 ,
'scrapy.extensions.corestats.CoreStats' : 0 ,
'scrapy.extensions.feedexport.FeedExporter' : 0 ,
'scrapy.extensions.logstats.LogStats' : 0 ,
'scrapy.extensions.memdebug.MemoryDebugger' : 0 ,
'scrapy.extensions.memusage.MemoryUsage' : 0 ,
'scrapy.extensions.spiderstate.SpiderState' : 0 ,
'scrapy.extensions.telnet.TelnetConsole' : 0 ,
'scrapy.extensions.throttle.AutoThrottle' : 0 },
'FEED_EXPORTERS' : {},
'FEED_EXPORTERS_BASE' : { 'csv' : 'scrapy.exporters.CsvItemExporter' ,
'jl' : 'scrapy.exporters.JsonLinesItemExporter' ,
'json' : 'scrapy.exporters.JsonItemExporter' ,
'jsonlines' : 'scrapy.exporters.JsonLinesItemExporter' ,
'marshal' : 'scrapy.exporters.MarshalItemExporter' ,
'pickle' : 'scrapy.exporters.PickleItemExporter' ,
'xml' : 'scrapy.exporters.XmlItemExporter' },
'FEED_EXPORT_ENCODING' : None ,
'FEED_EXPORT_FIELDS' : None ,
'FEED_EXPORT_INDENT' : 0 ,
'FEED_FORMAT' : 'jsonlines' ,
'FEED_STORAGES' : {},
'FEED_STORAGES_BASE' : {' ': ' scrapy.extensions.feedexport.FileFeedStorage',
'file' : 'scrapy.extensions.feedexport.FileFeedStorage' ,
'ftp' : 'scrapy.extensions.feedexport.FTPFeedStorage' ,
's3' : 'scrapy.extensions.feedexport.S3FeedStorage' ,
'stdout' : 'scrapy.extensions.feedexport.StdoutFeedStorage' },
'FEED_STORE_EMPTY' : False ,
'FEED_TEMPDIR' : None ,
'FEED_URI' : None ,
'FEED_URI_PARAMS' : None ,
'FILES_STORE_GCS_ACL' : '',
'FILES_STORE_S3_ACL' : 'private' ,
'FTP_PASSIVE_MODE' : True ,
'FTP_PASSWORD' : 'guest' ,
'FTP_USER' : 'anonymous' ,
'HTTPCACHE_ALWAYS_STORE' : False ,
'HTTPCACHE_DBM_MODULE' : 'dbm' ,
'HTTPCACHE_DIR' : 'httpcache' ,
'HTTPCACHE_ENABLED' : False ,
'HTTPCACHE_EXPIRATION_SECS' : 0 ,
'HTTPCACHE_GZIP' : False ,
'HTTPCACHE_IGNORE_HTTP_CODES' : [],
'HTTPCACHE_IGNORE_MISSING' : False ,
'HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS' : [],
'HTTPCACHE_IGNORE_SCHEMES' : [ 'file' ],
'HTTPCACHE_POLICY' : 'scrapy.extensions.httpcache.DummyPolicy' ,
'HTTPCACHE_STORAGE' : 'scrapy.extensions.httpcache.FilesystemCacheStorage' ,
'HTTPPROXY_AUTH_ENCODING' : 'latin-1' ,
'HTTPPROXY_ENABLED' : True ,
'IMAGES_STORE_GCS_ACL' : '',
'IMAGES_STORE_S3_ACL' : 'private' ,
'ITEM_PIPELINES' : {},
'ITEM_PIPELINES_BASE' : {},
'ITEM_PROCESSOR' : 'scrapy.pipelines.ItemPipelineManager' ,
'LOGSTATS_INTERVAL' : 0 ,
'LOG_DATEFORMAT' : '%Y-%m-%d %H:%M:%S' ,
'LOG_ENABLED' : True ,
'LOG_ENCODING' : 'utf-8' ,
'LOG_FILE' : None ,
'LOG_FORMAT' : '%(asctime)s [%(name)s] %(levelname)s: %(message)s' ,
'LOG_FORMATTER' : 'scrapy.logformatter.LogFormatter' ,
'LOG_LEVEL' : 'DEBUG' ,
'LOG_SHORT_NAMES' : False ,
'LOG_STDOUT' : False ,
'MAIL_FROM' : 'scrapy@localhost' ,
'MAIL_HOST' : 'localhost' ,
'MAIL_PASS' : None ,
'MAIL_PORT' : 25 ,
'MAIL_USER' : None ,
'MEMDEBUG_ENABLED' : False ,
'MEMDEBUG_NOTIFY' : [],
'MEMUSAGE_CHECK_INTERVAL_SECONDS' : 60.0 ,
'MEMUSAGE_ENABLED' : True ,
'MEMUSAGE_LIMIT_MB' : 0 ,
'MEMUSAGE_NOTIFY_MAIL' : [],
'MEMUSAGE_WARNING_MB' : 0 ,
'METAREFRESH_ENABLED' : True ,
'METAREFRESH_MAXDELAY' : 100 ,
'NEWSPIDER_MODULE' : '',
'RANDOMIZE_DOWNLOAD_DELAY' : True ,
'REACTOR_THREADPOOL_MAXSIZE' : 10 ,
'REDIRECT_ENABLED' : True ,
'REDIRECT_MAX_TIMES' : 20 ,
'REDIRECT_PRIORITY_ADJUST' : 2 ,
'REFERER_ENABLED' : True ,
'REFERRER_POLICY' : 'scrapy.spidermiddlewares.referer.DefaultReferrerPolicy' ,
'RETRY_ENABLED' : True ,
'RETRY_HTTP_CODES' : [ 500 , 502 , 503 , 504 , 522 , 524 , 408 ],
'RETRY_PRIORITY_ADJUST' : - 1 ,
'RETRY_TIMES' : 2 ,
'ROBOTSTXT_OBEY' : False ,
'SCHEDULER' : 'scrapy.core.scheduler.Scheduler' ,
'SCHEDULER_DEBUG' : False ,
'SCHEDULER_DISK_QUEUE' : 'scrapy.squeues.PickleLifoDiskQueue' ,
'SCHEDULER_MEMORY_QUEUE' : 'scrapy.squeues.LifoMemoryQueue' ,
'SCHEDULER_PRIORITY_QUEUE' : 'queuelib.PriorityQueue' ,
'SPIDER_CONTRACTS' : {},
'SPIDER_CONTRACTS_BASE' : { 'scrapy.contracts.default.ReturnsContract' : 2 ,
'scrapy.contracts.default.ScrapesContract' : 3 ,
'scrapy.contracts.default.UrlContract' : 1 },
'SPIDER_LOADER_CLASS' : 'scrapy.spiderloader.SpiderLoader' ,
'SPIDER_LOADER_WARN_ONLY' : False ,
'SPIDER_MIDDLEWARES' : {},
'SPIDER_MIDDLEWARES_BASE' : { 'scrapy.spidermiddlewares.depth.DepthMiddleware' : 900 ,
'scrapy.spidermiddlewares.httperror.HttpErrorMiddleware' : 50 ,
'scrapy.spidermiddlewares.offsite.OffsiteMiddleware' : 500 ,
'scrapy.spidermiddlewares.referer.RefererMiddleware' : 700 ,
'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware' : 800 },
'SPIDER_MODULES' : [],
'STATSMAILER_RCPTS' : [],
'STATS_CLASS' : 'scrapy.statscollectors.MemoryStatsCollector' ,
'STATS_DUMP' : True ,
'TELNETCONSOLE_ENABLED' : 1 ,
'TELNETCONSOLE_HOST' : '127.0.0.1' ,
'TELNETCONSOLE_PASSWORD' : None ,
'TELNETCONSOLE_PORT' : [ 6023 , 6073 ],
'TELNETCONSOLE_USERNAME' : 'scrapy' ,
'TEMPLATES_DIR' : 'd:\\python36\\lib\\site-packages\\scrapy\\templates' ,
'URLLENGTH_LIMIT' : 2083 ,
'USER_AGENT' : 'Scrapy/1.6.0 (+https://scrapy.org)' ,
'KEEP_ALIVE' : True }
|
到此这篇关于详解scrapy内置中间件的顺序的文章就介绍到这了,更多相关scrapy 中间件顺序内容请搜索服务器之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持服务器之家!
原文链接:https://blog.csdn.net/weixin_44087733/article/details/100561331