1.爬取csdn博客http://blog.csdn.net/zhengyong15984285623/article/details/52865229 爬取网址为http://blog.csdn.net/zhengyong15984285623
2、全注解爬取代理网站https://my.oschina.net/anxiaole/blog/755965 http://www.kuaidaili.com/free/
三、 注意事项
WebMagic使用log4j打印日志
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
<log4j:configuration>
<appender name="CONSOLE" class="org.apache.log4j.ConsoleAppender">
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d - %c -%-4r [%t] %-5p %x - %m%n" />
</layout>
<!--限制输出级别-->
<filter class="org.apache.log4j.varia.LevelRangeFilter">
<param name="LevelMax" value="ERROR"/>
<param name="LevelMin" value="TRACE"/>
</filter>
</appender>
<appender name="FILE" class="org.apache.log4j.FileAppender">
<param name="File" value="/Users/zhengyong/log/crawl.log"/>
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d - %c -%-4r [%t] %-5p %x - %m%n" />
</layout>
</appender>
<root>
<priority value="info" />
<appender-ref ref="CONSOLE" />
<appender-ref ref="FILE" />
</root>
</log4j:configuration>
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
Java中main方法使用
import org.apache.log4j.xml.DOMConfigurator;
DOMConfigurator.configure("/Users/zhengyong/crawl/log4j.xml");//加载.xml文件