目标站:http://www.xicidaili.com/
代码:
<?php require \'lib/phpQuery.php\'; require \'lib/QueryList.php\'; require "db/shared/ez_sql_core.php"; require "db/mysql/ez_sql_mysql.php"; require "public/function.php"; use QL\QueryList; //抓取猫眼电影TOP100榜单内容 $db = new ezSQL_mysql(\'root\', \'root\', \'spider\', \'localhost\'); for($j=1;$j<=100;$j++){ $gurl="http://www.xicidaili.com/nn/".$j; $html=curl_request($gurl); for($i=1;$i<=100;$i++){ getIpInfo($html,$i,$db); } echo "第".$j."页完成".PHP_EOL; } function getIpInfo($html,$t,$db){ $rules = array( //采集id为one这个元素里面的纯文本内容 \'ip\' => array("#ip_list tr:eq($t) td:eq(1)",\'text\'),//ip \'port\' => array("#ip_list tr:eq($t) td:eq(2)",\'text\'),//端口 \'area\' => array("#ip_list tr:eq($t) td:eq(3)",\'text\'),//位置 \'anonymous\' => array("#ip_list tr:eq($t) td:eq(4)",\'text\'),//是否匿名 \'type\' => array("#ip_list tr:eq($t) td:eq(5)",\'text\'),//类型 \'speed\' => array("#ip_list tr:eq($t) td:eq(6)",\'html\',\'\',function($content){ $num=explode(\'%\', explode(\':\', $content)[1])[0]; if($num >= 60 && $num<80){ return "一般"; }else if($num >= 80){ return "很快"; }else{ return "较慢"; } }),//速度 \'chtime\' => array("#ip_list tr:eq($t) td:eq(8)",\'text\'),//存活时间. \'yztime\' => array("#ip_list tr:eq($t) td:eq(9)",\'text\'),//验证时间 ); $data = QueryList::Query($html,$rules)->data; print_r($data); $ip=$data[0]["ip"]; $port=$data[0]["port"]; $area=$data[0]["area"]; $anonymous=$data[0]["anonymous"]; $type=$data[0]["type"]; $speed=$data[0]["speed"]; $chtime=$data[0]["chtime"]; $yztime=$data[0]["yztime"]; $db->query("INSERT INTO ip (ip, port,area,anonymous,type,speed,chtime,yztime) VALUES (\'$ip\',\'$port\',\'$area\',\'$anonymous\',\'$type\',\'$speed\',\'$chtime\',\'$yztime\')"); }
结果:
完整项目下载:https://files.cnblogs.com/files/wordblog/spider2.rar