PHP 检测机器人,屏蔽内页

时间:2024-12-10 08:34:44

PHP 检测机器人,屏蔽内页

<?php
// SpiderHelper::rewrite301();
// SpiderHelper::showRobotTxt();
class SpiderHelper { /**
* 如果是爬虫,301跳转到某个地址
* @param string $location
*/
public static function rewrite301($location = '/') {
if(self::isSpider()){
header('HTTP/1.1 301 Moved Permanently');
header('Location: '.$location);
exit();
}
} /**
* 显示文件内容
* @var unknown
*/
public static function showRobotTxt() {
if(self::isSpider()){
echo "User-agent: *\r\n".
"Disallow: /";
exit();
}
} /**
* 检查是否是爬虫
* @param string $userAgent 客户端类型
* @return boolean
*/
public static function isSpider($userAgent = '') {
static $kwSpiders = array('bot', 'crawl', 'spider' ,'slurp', 'sohu-search', 'lycos', 'robozilla');
static $kwBrowsers = array('msie', 'netscape', 'opera', 'konqueror', 'mozilla'); $userAgent = strtolower(empty($userAgent) ? $_SERVER['HTTP_USER_AGENT'] : $userAgent);
if(strpos($userAgent, 'http://') === false && self::checkStrpos($userAgent, $kwBrowsers)){ // 浏览器
return false;
}
if(self::checkStrpos($userAgent, $kwSpiders)){ // 蜘蛛
return true;
}
return false;
} /**
* 是否存在数组中的关键字
* @param unknown $needle 字符串
* @param unknown $haystack 关键字数组
* @param string $returnValue 是否返回匹配到的值
* @return boolean|Ambigous <boolean, unknown>
*/
private static function checkStrpos($needle, $haystack, $returnValue = false) {
if(empty($needle)){
return false;
}
foreach((array)$haystack as $v) {
if(strpos($needle, $v) !== false) {
$return = $returnValue ? $v : true;
return $return;
}
}
return false;
}
}