require('pscws4.class.php');
require('../class/sjk.php');
//获得HTML里的文本
function SpHtml2Text($str){
$str = preg_replace("/<sty(.*)\\/style>|<scr(.*)\\/script>|<!--(.*)-->/isU","",$str);
$alltext = "";
$start = 1;
for($i=0;$i<strlen($str);$i++){
if($start==0 && $str[$i]==">") $start = 1;
else if($start==1){
if($str[$i]=="<"){ $start = 0; $alltext .= " "; }
else if(ord($str[$i])>31) $alltext .= $str[$i];
}
}
$alltext = str_replace(" "," ",$alltext);
$alltext = preg_replace("/&([^;&]*)(;|&)/","",$alltext);
$alltext = preg_replace("/[ ]+/s"," ",$alltext);
return $alltext;
}
//获取关键词
function get_tags_arr($title){
$pscws = new PSCWS4("utf8");
$pscws->set_dict('etc/dict.xdb');
$pscws->set_rule('etc/rules.ini');
$pscws->set_ignore(true);
$t1 = SpHtml2Text($title);
$pscws->send_text($t1);
$words = $pscws->get_tops(4);// 15个字关键字
$tags = array();
foreach ($words as $val) {
$tags[] = $val['word'];
}
$arr = $tags;
foreach ($arr as $value)
{
$param1.="". $value . ",";
}
return $param1;
$pscws->close();
}
$sql = "SELECT * from ganxiang where id = 2";
$tt = query($sql);
$r = mysql_fetch_array($tt);
// while ($r = shuzu($sql)) {
$id = $r["id"];
$title = $r["gxbt"];
$smalltext = $r["gxnr"];
$titles = $title." ".$smalltext;
$keywords1=get_tags_arr($titles);
if (empty($keywords1)){
$keywords1=$title;
}else{
$keywords1=substr($keywords1, 0, -1);//去除最后个逗号;
}
echo '正在更新'.$tbname.'表中ID='.$id.'的值,更新的关键字为:'.$keywords1.'<br>';
// $sql2 = "UPDATE {$dbtbpre}ecms_$tbname SET keyboard='$keywords1' WHERE id = $id";
// $update = $empire->query("$sql2");
// }
运行网址如下:http://www.zhidaxue.com/guan/23.php
1 个解决方案
#1
PSCWS4 不会产生乱码数据,因为没有匹配到就不会有结果
所以是你原始数据的问题
所以是你原始数据的问题
#1
PSCWS4 不会产生乱码数据,因为没有匹配到就不会有结果
所以是你原始数据的问题
所以是你原始数据的问题