# # LinuxTone full index search configure file # source lt_posts { type = mysql sql_host = 127.0.0.1 sql_user = root sql_pass = sql_db = lt_bbs sql_port = 3306 sql_query_pre = SET NAMES utf8 sql_query = SELECT pid,tid,fid,dateline,subject,message,author FROM cdb_posts where first=1 #此处是基于posts表来做索引的,这样的目的是可以同时检索到subject,message,author 三个字段的值 sql_attr_uint = fid sql_attr_timestamp = dateline sql_query_info = SELECT * FROM cdb_posts WHERE pid=$id } index lt_posts { source = lt_posts path = /data/sphinx/data/lt_posts docinfo = extern mlock = 0 morphology = none min_word_len = 2 html_strip = 1 charset_dictpath = /usr/local/mmseg-3.2.13/etc/ charset_type = zh_cn.utf-8 ngram_len = 0 } ########## 增量索引 ################## source delta { type = mysql sql_host = 127.0.0.1 sql_user = root sql_pass = sql_db = lt_bbs sql_port = 3306 # optional, default is 3306 sql_query_pre = SET NAMES utf8 sql_query = SELECT pid,tid,fid,dateline,subject,message,author FROM cdb_posts where first=1 and dateline > unix_timestamp()-3600*10 #增量索引采用当前时间戳减去一个需要间隔的时间来新建新增的数据索引 sql_attr_uint = fid sql_attr_timestamp = dateline sql_query_info = SELECT * FROM cdb_posts WHERE pid=$id } index delta { source = delta path = /data/sphinx/data/lt_delta docinfo = extern mlock = 0 morphology = none min_word_len = 2 html_strip = 1 charset_dictpath = /usr/local/mmseg-3.2.13/etc/ charset_type = zh_cn.utf-8 ngram_len = 0 } indexer { mem_limit = 32M } searchd { port = 9312 log = /data/sphinx/var/log/searchd.log query_log = /data/sphinx/var/log/query.log read_timeout = 5 max_children = 30 pid_file = /data/sphinx/var/log/searchd.pid max_matches = 10000 seamless_rotate = 1 preopen_indexes = 0 unlink_old = 1 }
sphinx最主要的就是这个配置文件,当然在增量索引部分可以写一个脚本放到crontab里面来定时跑
下面介绍下sphinx的PHP调用部分,sphinx的接口采用PHP的扩展,可以通过pecl或者http://pecl.php.net/package/sphinx来安装
<?php /** * LinuxTone全文搜索服务 */ define('IN_DISCUZ', TRUE); require_once './include/common.inc.php'; $q = isset($_GET['q']) && !empty($_GET['q']) ? $_GET['q'] : ''; $q = str_replace(array('<','>',' ','\'',','),array('','',' ','',''),strip_tags($q)); $page = isset($_GET['page']) && intval($_GET['page'])>0 ? intval($_GET['page']) : 1; $perNum = 20; $offset = ($page - 1) * $perNum; $search = new SphinxClient(); $search->setServer('127.0.0.1',9312); $search->setConnectTimeout(2); $search->setArrayResult(true); $search->setMatchMode(SPH_MATCH_ANY); $search->setRankingMode(SPH_RANK_PROXIMITY_BM25); $search->setSortMode(SPH_SORT_EXTENDED,'@relevance desc,@weight desc'); $search->setLimits($offset,$perNum); $search->setFieldWeights(array('subject'=>2000,'message'=>0)); $rs = array(); $query_totals = $query_time = 0; if(!empty($q)){ $rs = $search->Query($q,"*"); $pages = ceil($rs['total']/$perNum); $query_totals = $rs['total_found']; $query_time = $rs['time']; } $data = $title = $content = array(); if(!empty($rs) && $page <= $pages){ $pids = array(); foreach($rs['matches'] as $v){ $pids[] = $v['id']; } $pid = implode(',',$pids); $sql = "select pid,tid,author,authorid,subject,message,dateline from cdb_posts where pid IN($pid) and status ='0' and invisible='0'"; $query = $db->query($sql); while($row = $db->fetch_array($query)){ $data[] = $row; $title[] = $row['subject']; $content[] = preg_replace('/\[[\/]?(b|img|url|color|s|hr|p|list|i|align|email|u|font|code|hide|table|tr|td|th|attach|list|indent|float).*\]/','',strip_tags($row['message'])); } //搜索词高亮 $opts = array(); $opts['before_match'] = ''; $opts['after_match'] = ''; $title = $search->BuildExcerpts($title,'lt_posts',$q,$opts); $content = $search->BuildExcerpts($content,'lt_posts',$q,$opts); foreach($data as $k=>$v){ $data[$k]['subject'] = $title[$k]; $data[$k]['message'] = $content[$k]; } $url = "s.php?q=".urlencode($q); $multipage = multi($rs['total'], $perNum, $page, $url); } include template("lt_search"); ?>
跑主索引的shell脚本search-index.sh
- #!/bin/bash
- #
- # The BBS search exec full index
- #
- /usr/local/csft-3.2.13/bin/indexer -c /usr/local/csft-3.2.13/etc/lt_posts.conf --rotate lt_posts >> /data/sphinx/var/`date "+%Y-%m-%d-%H"`.log
跑增量索引的shell脚本search-delta.sh
- #!/bin/bash
- #
- # The BBS search exec delta index
- #
- #跑增量索引
- /usr/local/csft-3.2.13/bin/indexer -c /usr/local/csft-3.2.13/etc/lt_posts.conf --rotate delta
- #合并主索引和增量索引
- #/usr/local/csft-3.2.13/bin/indexer --config /usr/local/csft-3.2.13/etc/lt_posts.conf --rotate --merge lt_posts delta