curl模拟ip和来源进行网站采集的实现方法

时间:2022-06-10 15:10:24

对于限制了ip和来源的网站,使用正常的采集方式是不行的。这里说我的一种方法吧,使用php的curl类实现模拟ip和来源,可以实现采集限制ip和来源的网站。

1.设置页面限制ip和来源访问
比如服务端的server.php

<?php

$client_ip = getip();
$referer = getreferer(); $allow_ip = '192.168.0.100';
$allow_referer = 'http://www.xxx.cn'; if($client_ip==$allow_ip && strpos($referer, $allow_referer)===0){
echo 'allow access';
}else{
echo 'deny access';
} // 获取访问者ip
function getip(){
if(!empty($_SERVER['HTTP_CLIENT_IP'])){
$cip = $_SERVER['HTTP_CLIENT_IP'];
}elseif(!empty($_SERVER['HTTP_X_FORWARDED_FOR'])){
$cip = $_SERVER['HTTP_X_FORWARDED_FOR'];
}elseif(!empty($_SERVER['REMOTE_ADDR'])){
$cip = $_SERVER['REMOTE_ADDR'];
}else{
$cip = '';
}
return $cip;
} // 获取访问者来源
function getreferer(){
if(isset($_SERVER['HTTP_REFERER'])){
return $_SERVER['HTTP_REFERER'];
}
return '';
} ?>

使用curl正常访问

<?php
function doCurl($url, $data=array(), $header=array(), $timeout=30){ $ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($data));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); $response = curl_exec($ch); if($error=curl_error($ch)){
die($error);
} curl_close($ch); return $response; } // 调用
$url = 'http://www.xxx.cn/server.php';
$response = doCurl($url); echo $response;
?>

使用curl模拟ip和来源进行访问

模拟来源

curl_setopt($ch, CURLOPT_REFERER, '来源');

模拟ip

curl_setopt($ch, CURLOPT_HTTPHEADER, array('CLIENT-IP: 模拟ip','X-FORWARDED-FOR: 模拟ip'));

完整代码如下:

<?php
function doCurl($url, $data=array(), $header=array(), $referer='', $timeout=30){ $ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($data));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); // 模拟来源
curl_setopt($ch, CURLOPT_REFERER, $referer); $response = curl_exec($ch); if($error=curl_error($ch)){
die($error);
} curl_close($ch); return $response; } // 调用
$url = 'http://www.example.cn/server.php';//外部采集网站
$data = array(); // 设置IP
$header = array(
'CLIENT-IP: 192.168.0.100',
'X-FORWARDED-FOR: 192.168.0.100'
); // 设置来源
$referer = 'http://www.xxx.cn/'; $response = doCurl($url, $data, $header, $referer, 5); echo $response;
?>