目录
http://blog.csdn.net/e421083458/article/details/21529969(安装教程)
http://www.coreseek.cn/docs/coreseek_4.1-sphinx_2.0.1-beta.html#installing-windows(Coreseek 4.1 参考手册) http://www.coreseek.cn/products-install/mysql/ (数据源配置:mysql数据源)
http://www.sphinxsearch.org/archives/80(Sphinx中文入门指南) http://www.csdn123.com/html/mycsdn20140110/78/78fff2cc1160f6dac1a461ec0d583727.html(错误)
http://www.oschina.net/question/876307_118989(增索引)
http://www.osyunwei.com/archives/7496.html (初级配置)
http://wenku.baidu.com/link?url=9Nqf3Rz2VnKU1Omjhc9BhCA5dTH5UB6VoS7HtV3-CWCsWEHKMvqHcuH67n4ZmbN8mWa5q_nJGltOkwewdOzJzscanEZgulyLhE81Whi0KtO(扩展插件使用函数)
http://www.coreseek.cn/products-install/step_by_step/(多个字段搜索)
http://www.douban.com/note/332427344/ (配置文件)
一.什么是CoreSeek/Sphinx
Sphinx是一个在GPLv2下分发的全文检索引擎;Coreseek 是一个可供企业使用的、基于Sphinx(可独立于Sphinx原始版本运行)的中文全文检索引擎,按照GPLv2协议发行。商业使用(例如, 嵌入到其他程序中)需要联系我们以获得商业授权。
一般而言,Sphinx是一个独立的全文搜索引擎;而Coreseek是一个支持中文的全文搜索引擎,意图为其他应用提供高速、低空间占用、高相关度结果的中文全文搜索能力。CoreSeek/Sphinx可以非常容易的与SQL数据库和脚本语言集成。
应用程序可以通过三种不同的接口方式来与Sphinx搜索服务(searchd)通信: a) 通过原生的搜索 API (SphinxAPI), b) 通过Sphinx自身支持的MySQL网络协议 (使用命名为SphinxQL的SQL精简子集), 或者 c) 通过MySQL 服务端的存储插件引擎(SphinxSE)。当然, 还可以通过可以使用 a)、b)、c) 的应用程序来构建webservice来为其他应用程序提供通信
在Sphinx发行版本中提供的原生搜索API支持PHP、Python、Perl、Rudy和Java。搜索API非常轻量化,可以在几个小时之内移植到新的语言上。第三方API接口和插件提供了对Perl、C#、Haskell、Ruby-on-Rails支持,以及对其他可能的语言或者框架的支持。
从版本1.10-beta开始,Sphinx支持两种不同的索引后端:“磁盘(disk)”索引后端和“实时索引(realtime)”(RT)索引后端。磁盘索引支持在线全文索引重建,但是仅支持非文本(属性)数据的在线更新。RT实时索引在此基础上,又增加了在线的全文索引更新。在此之前的版本仅支持磁盘索引。
使用命名为数据源的接口,数据可以被加载到磁盘索引。当前系统内置MySQL和PostgreSQL以及ODBC兼容(MS SQL、Oracle等) 数据库数据源的支持,也支持从管道标准输入读取特定格式的XML数据。通过适当修改源代码,用户可以自行增加新的数据源驱动(例如:对其他类型的DBMS的原生支持)。在Coreseek发行的版本中,用户还可以使用Python脚本作为数据源来获取任何已知世界和未知世界的数据,这极大的扩展了数据源的来源。从1.10-beta版本开始的RT实时索引,只能使用MySQL接口通过SphinxQL来操作。
Sphinx 是SQL Phrase Index的缩写,但不幸的和CMU的Sphinx项目重名。
Coreseek (http://www.coreseek.cn/) 为Sphinx在中国地区的用户提供支持服务,如果您不希望纠缠与琐碎的技术细节,请直接联系我们。
本参考手册基于Sphinx 2.0.1-beta最新文档,可能存在潜在的翻译错误,如果您发现本文的翻译错误,请联系我们。
#MySQL数据源配置,详情请查看:http://www.coreseek.cn/products-install/mysql/
#请先将var/test/documents.sql导入数据库,并配置好以下的MySQL用户密码数据库
#源定义
source goods
{
type = mysql
sql_host = 192.168.199.130
sql_user = testbao
sql_pass = 123456
sql_db = zt_lms
sql_port = 3306
sql_query_pre = SET NAMES utf8
sql_query_pre = REPLACE INTO lms_search_gcount SELECT 1,MAX(id) FROM lms_goods
sql_query = SELECT id,name,store_id,is_bonded,price,total_stock,surplus_stock,addtime,photo,status,sort FROM lms_goods WHERE id<=( SELECT max_goods_id FROM lms_search_gcount WHERE counter_id=1 )
#sql_query第一列id需为整数
sql_attr_uint = id #title、content作为字符串/文本字段,被全文索引
sql_attr_uint = store_id #从SQL读取到的值必须为整数
sql_attr_uint = is_bonded
sql_attr_uint = sort
sql_attr_uint = status
sql_attr_uint = total_stock
sql_attr_uint = surplus_stock
sql_attr_float = sql_attr_float
sql_attr_timestamp = addtime #从SQL读取到的值必须为整数,作为时间属性
sql_field_string = name
sql_field_string = photo
sql_query_info_pre = SET NAMES utf8 #命令行查询时,设置正确的字符集
sql_query_info = SELECT * FROM lms_goods WHERE id=$id #命令行查询时,从数据库读取原始数据信息
}
#index定义
index goods
{
source = goods #对应的source名称
path = /usr/local/coreseek/var/data/goods #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
html_strip = 0
#中文分词配置,详情请查看:http://www.coreseek.cn/products-install/coreseek_mmseg/
charset_dictpath = /usr/local/mmseg3/etc/ #BSD、Linux环境下设置,/符号结尾
#charset_dictpath = etc/ #Windows环境下设置,/符号结尾,最好给出绝对路径,例如:C:/usr/local/coreseek/etc/...
charset_type = zh_cn.utf-8
}
source share
{
type = mysql
sql_host = 192.168.199.130
sql_user = testbao
sql_pass = 123456
sql_db = zt_lms
sql_port = 3306
sql_query_pre = SET NAMES utf8
sql_query_pre = SET NAMES utf8
sql_query_pre = REPLACE INTO lms_search_scount SELECT 1,MAX(id) FROM lms_share
sql_query = SELECT share_id,user_id,class_id,share_title,cover_content,detail_content,status,picked_time,recommend_time,add_time,love_num,favorite_num FROM lms_share WHERE id<=( SELECT max_goods_id FROM lms_search_scount WHERE counter_id=1 )
sql_attr_uint = share_id
sql_attr_uint = user_id
sql_attr_uint = class_id
sql_attr_uint = status
sql_attr_uint = love_num
sql_attr_uint = favorite_num
sql_attr_timestamp = add_time #从SQL读取到的值必须为整数,作为时间属性
sql_attr_timestamp = picked_time
sql_field_string = share_title
sql_field_string = cover_content
sql_field_string = detail_content
sql_query_info = SELECT * FROM lms_share WHERE share_id=$id
}
index share
{
source = share
path = /usr/local/coreseek/var/data/share
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
html_strip = 0
#中文分词配置,详情请查看:http://www.coreseek.cn/products-install/coreseek_mmseg/
charset_dictpath = /usr/local/mmseg3/etc/ #BSD、Linux环境下设置,/符号结尾
# #charset_dictpath = etc/ #Windows环境下设置,/符号结尾,最好给出绝对路径,例如:C:/usr/local/coreseek/etc/...
charset_type = zh_cn.utf-8
#
}
#全局index定义
indexer
{
mem_limit = 128M
}
#searchd服务定义
searchd
{
listen = 9312
read_timeout = 5
max_children = 30
max_matches = 1000
seamless_rotate = 0
preopen_indexes = 0
unlink_old = 1
pid_file = /usr/local/coreseek/var/log/searchd_mysql.pid #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
log = /usr/local/coreseek/var/log/searchd_mysql.log #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
query_log = /usr/local/coreseek/var/log/query_mysql.log #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
binlog_path = #关闭binlog日志
}
增量索引
http://www.oschina.net/question/876307_118989(增索引)
sphinx 联合查询 实例 (http://blog.51yip.com/mysql/1658.html)
记得以前sphinx是不支持联合查询的
,第一次接触sphinx,
好像2016年初的时候,当时写过一篇关于sphinx安装的文章。
sphinx mmseg mysql 中文分词,下面举例说明,sphinx的联合查询用法。
一,添加二张测试表和数据1,users表和数据
- mysql> desc users;
- +----------+-------------+------+-----+---------+----------------+
- | Field | Type | Null | Key | Default | Extra |
- +----------+-------------+------+-----+---------+----------------+
- | user_id | int(11) | NO | PRI | NULL | auto_increment |
- | username | varchar(20) | NO | | NULL | |
- +----------+-------------+------+-----+---------+----------------+
- 2 rows in set (0.00 sec)
- mysql> select * from users;
- +------------+------------+
- | user_id | username |
- +------------+------------+
- | 1311895262 | 张三 |
- | 1311895263 | tank张二 |
- | 1311895264 | tank张一 |
- | 1311895265 | tank张 |
- +------------+------------+
- 4 rows in set (0.00 sec)
2,orders表和数据
- mysql> desc orders;
- +--------------+-------------+------+-----+---------+----------------+
- | Field | Type | Null | Key | Default | Extra |
- +--------------+-------------+------+-----+---------+----------------+
- | id | int(11) | NO | PRI | NULL | auto_increment |
- | user_id | int(11) | NO | | NULL | |
- | create_time | datetime | NO | | NULL | |
- | product_name | varchar(20) | NO | | NULL | |
- | summary | text | NO | | NULL | |
- +--------------+-------------+------+-----+---------+----------------+
- 5 rows in set (0.00 sec)
- mysql> select * from orders;
- +----+------------+---------------------+----------------+--------------+
- | id | user_id | create_time | product_name | summary |
- +----+------------+---------------------+----------------+--------------+
- | 9 | 1311895262 | 2014-08-01 00:24:54 | tank is 坦克 | 技术总监 |
- | 10 | 1311895263 | 2014-08-01 00:24:54 | tank is 坦克 | 技术经理 |
- | 11 | 1311895264 | 2014-08-01 00:24:54 | tank is 坦克 | DNB经理 |
- | 12 | 1311895265 | 2014-08-01 00:24:54 | tank is 坦克 | 运维总监 |
- +----+------------+---------------------+----------------+--------------+
- 4 rows in set (0.00 sec)
二,配置sphinx.conf
- source order
- {
- type = mysql
- sql_host = localhost
- sql_user = root
- sql_pass =
- sql_db = test
- sql_query_pre = SET NAMES utf8
- sql_query = \
- SELECT a.id, a.user_id,b.username, UNIX_TIMESTAMP(a.create_time) AS create_time, a.product_name, a.summary \
- FROM orders a left join users b on a.user_id = b.user_id
- sql_attr_uint = user_id
- sql_field_string = username
- sql_field_string = product_name
- sql_attr_timestamp = create_time
- sql_ranged_throttle = 0
- sql_query_info = SELECT * FROM orders WHERE id=$id
- }
- index myorder
- {
- source = order
- path = /usr/local/sphinx/var/data/myorder
- docinfo = extern
- mlock = 0
- morphology = none
- min_word_len = 1
- charset_dictpath = /usr/local/mmseg3/etc/
- charset_type = zh_cn.utf-8
- ngram_len = 0
- html_strip = 0
- }
注意:在这里a.user_id = b.user_id,等号二边一定要有空格,不然就会报错。
三,重启sphinx
- # pkill searchd
- # /usr/local/sphinx/bin/indexer --config /usr/local/sphinx/etc/sphinx.conf --all
- # /usr/local/sphinx/bin/searchd --config /usr/local/sphinx/etc/sphinx.conf
四,测试sphinx
- [root@localhost etc]# mysql -h 127.0.0.1 -P 9306 //登录sphinx,9306端口,不是真实的mysql
- Welcome to the MySQL monitor. Commands end with ; or \g.
- Your MySQL connection id is 1
- Server version: 1.11-id64-dev (r2540)
- Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
- Oracle is a registered trademark of Oracle Corporation and/or its
- affiliates. Other names may be trademarks of their respective
- owners.
- Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
- mysql> select * from myorder where match('张');
- +------+--------+------------+------------+-------------+----------------+
- | id | weight | user_id | username | create_time | product_name |
- +------+--------+------------+------------+-------------+----------------+
- | 9 | 1304 | 1311895262 | 张三 | 1406823894 | tank is 坦克 |
- | 10 | 1304 | 1311895263 | tank张二 | 1406823894 | tank is 坦克 |
- | 11 | 1304 | 1311895264 | tank张一 | 1406823894 | tank is 坦克 |
- | 12 | 1304 | 1311895265 | tank张 | 1406823894 | tank is 坦克 |
- +------+--------+------------+------------+-------------+----------------+
- 4 rows in set (0.01 sec)
- mysql> select * from myorder where match('张三');
- +------+--------+------------+----------+-------------+----------------+
- | id | weight | user_id | username | create_time | product_name |
- +------+--------+------------+----------+-------------+----------------+
- | 9 | 2500 | 1311895262 | 张三 | 1406823894 | tank is 坦克 |
- +------+--------+------------+----------+-------------+----------------+
- 1 row in set (0.00 sec)
php 使用 empty($keyword) && newAppReturn('',0,'请输入商品随笔关键字~',0);
empty($sort)? $sort = "picked_time DESC" :$sort;
$shareList = array();
$keyword = trim($keyword);
if (!$keyword) return $shareList;
$indexName = 'share'; //索引名称
$this->sphinx->SetConnectTimeout(3); //设置超时时间
$this->sphinx->SetMaxQueryTime(2000); //设定查询最大时间 ,单位为MS
$this->sphinx->SetSortMode(SPH_SORT_EXTENDED, $sort);//排序
$this->sphinx->SetFilter('status', array(1));//过滤条件
$this->sphinx->setMatchMode(SPH_MATCH_PHRASE);//匹配模式
$this->sphinx->SetLimits($offset,$page_size, 1000); //查询条数
$err = $this->sphinx->GetLastError();
if($err){
newAppReturn('',0,'服务器配置出错~',0);
}
$tmpList = $this->sphinx->Query ($keyword, $indexName );//query('@content (测试) & @toid =1', '*');
if ($tmpList['total'] <= 0)
{
newAppReturn('',1,'暂时没找到您想要的内容 换个搜索词试试~',0);
}
$searchList['total'] = $tmpList['total'];
$searchList['list'] = array();
foreach ( $tmpList['matches'] as $key => $val )
{
$val['attrs']['share_id'] = $key;
$searchList['list'][] = $val['attrs'];
}
unset($tmpList);
return $searchList;
Sphinx使用手册(PHP版)
http://wenku.baidu.com/view/9107450e4a7302768e9939f1.html?from=search
我的配置 #MySQL数据源配置,详情请查看:http://www.coreseek.cn/products-install/mysql/
#请先将var/test/documents.sql导入数据库,并配置好以下的MySQL用户密码数据库
#源定义
source goods
{
type = mysql
sql_host = 120.24.240.237
sql_user = yt518
sql_pass = Yt@518
sql_db = zt_lms
sql_port = 3306
sql_query_pre = SET NAMES utf8
sql_query_pre = REPLACE INTO lms_search_gcount SELECT 1,MAX(id) FROM lms_goods
sql_query = SELECT id,name,store_id,is_bonded,price,total_stock,surplus_stock,addtime,photo,status,sort FROM lms_goods WHERE id<=( SELECT max_goods_id FROM lms_search_gcount WHERE count_id=1 )
#sql_query第一列id需为整数
#sql_attr_float = price #title、content作为字符串/文本字段,被全文索引
sql_attr_uint = store_id #从SQL读取到的值必须为整数
sql_attr_uint = is_bonded
sql_attr_uint = sort
sql_attr_uint = status
sql_attr_uint = total_stock
sql_attr_uint = surplus_stock
#sql_attr_float = price
sql_attr_timestamp = addtime #从SQL读取到的值必须为整数,作为时间属性
sql_field_string = name
sql_field_string = photo
sql_field_string = price
sql_query_info_pre = SET NAMES utf8 #命令行查询时,设置正确的字符集
sql_query_info = SELECT * FROM lms_goods WHERE id=$id #命令行查询时,从数据库读取原始数据信息
}
#index定义
index goods
{
source = goods #对应的source名称
path = /usr/local/coreseek/var/data/goods #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
min_infix_len = 1
html_strip = 0
expand_keywords = 1
#中文分词配置,详情请查看:http://www.coreseek.cn/products-install/coreseek_mmseg/
charset_dictpath = /usr/local/mmseg3/etc/ #BSD、Linux环境下设置,/符号结尾
#charset_dictpath = etc/ #Windows环境下设置,/符号结尾,最好给出绝对路径,例如:C:/usr/local/coreseek/etc/...
charset_type = zh_cn.utf-8
enable_star = 1
}
source share
{
type = mysql
sql_host = 120.24.240.237
sql_user = yt518
sql_pass = Yt@518
sql_db = zt_lms
sql_port = 3306
sql_query_pre = SET NAMES utf8
sql_query_pre = SET NAMES utf8
sql_query_pre = REPLACE INTO lms_search_scount SELECT 1,MAX(share_id) FROM lms_share
sql_query = SELECT share_id,user_id,class_id,share_title,cover_content,detail_content,status,picked_time,recommend_time,add_time,love_num,favorite_num,comment_num FROM lms_share WHERE share_id<=( SELECT max_share_id FROM lms_search_scount WHERE count_id=1 )
#sql_attr_uint = share_id
sql_attr_uint = user_id
sql_attr_uint = class_id
sql_attr_uint = status
sql_attr_uint = love_num
#sql_attr_uint = picked_time
sql_attr_uint = comment_num
sql_attr_timestamp = add_time #从SQL读取到的值必须为整数,作为时间属性
sql_attr_timestamp = picked_time
sql_field_string = share_title
sql_field_string = cover_content
sql_field_string = detail_content
sql_query_info = SELECT * FROM lms_share WHERE share_id=$id
}
index share
{
source = share
path = /usr/local/coreseek/var/data/share
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
min_infix_len = 1
html_strip = 0
#中文分词配置,详情请查看:http://www.coreseek.cn/products-install/coreseek_mmseg/
charset_dictpath = /usr/local/mmseg3/etc/ #BSD、Linux环境下设置,/符号结尾
# #charset_dictpath = etc/ #Windows环境下设置,/符号结尾,最好给出绝对路径,例如:C:/usr/local/coreseek/etc/...
charset_type = zh_cn.utf-8
#
}
#全局index定义
indexer
{
mem_limit = 128M
}
#searchd服务定义
searchd
{
listen = 9312
read_timeout = 5
max_children = 30
max_matches = 1000
seamless_rotate = 0
preopen_indexes = 0
unlink_old = 1
pid_file = /usr/local/coreseek/var/log/searchd_mysql.pid #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
log = /usr/local/coreseek/var/log/searchd_mysql.log #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
query_log = /usr/local/coreseek/var/log/query_mysql.log #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
binlog_path = #关闭binlog日志
}