sphinx的coreseek4.0中文分词的安装

时间:2022-07-25 15:55:13

远程访问的php代码,一定要保存文本为utf-8,不然查询不了中文

文件-》另存为 格式 utf-8 

1.yum install glibc make gcc g++ gcc-c++ libtool autoconfautomake imake mysql-devel libxml2-devel expat-devel

 

2.
运行locale保证如下设置
LANG=en_US.UTF-8
LC_ALL="en_US.UTF-8"

 

3.
先清除以往autoconf
yum erase autoconf 
 
 
卸载后会发现libtool也被卸载了,会出现如下错误
configure.in:26: error: possibly undefined macro:AM_PROG_LIBTOOL
     If this token and others are legitimate, please usem4_pattern_allow.
     See the Autoconf documentation.
解决办法:     
wget http://mirror.bjtu.edu.cn/gnu/libtool/libtool-2.4.2.tar.gz

tar -xvzf libtool-2.4.2.tar.gz
 ./configure
make
make install


4.

wget http://ftp.gnu.org/gnu/autoconf/autoconf-2.64.tar.gz
tar xzvf autoconf-2.64.tar.gz
cd autoconf-2.64
./configure
make
make install

wget http://ftp.gnu.org/gnu/automake/automake-1.11.2.tar.gz

tar xzvf automake-1.11.2.tar.gz
cd automake-1.11.2
./configure
make
make install

 建议查看官网教程http://www.coreseek.cn/products-install/install_on_bsd_linux/

5.
下载coreseek
wget http://www.coreseek.cn/uploads/csft/3.2/coreseek-3.2.14.tar.gz
tar zxvf coreseek-3.2.14.tar.gz
cd coreseek-3.2.14

 

6.
安装mmseg
cd mmseg-3.2.14
./bootstrap   #输出的warning信息可以忽略,如果出现error则需要解决
./configure --prefix=/usr/local/mmseg
make && make install

 

7.
安装coreseek
cd csft-3.2.14
sh buildconf.sh
./configure --prefix=/usr/local/coreseek --without-unixodbc--with-mmseg --with-mmseg-includes=/usr/local/mmseg/include/mmseg/--with-mmseg-libs=/usr/local/mmseg/lib/ --with-mysql

此时先修改文件
vim src/Makefile

LIBS = -ldl -lm -lz -lexpat  -L/usr/local/lib-lrt  -lpthread
变成
LIBS = -ldl -lm -lz -lexpat -liconv -L/usr/local/lib -lrt  -lpthread

make && make install

 

8.
##测试mmseg分词,coreseek搜索(需要预先设置好字符集为zh_CN.UTF-8,确保正确显示中文)
cd testpack
cat var/test/test.xml   #此时应该正确显示中文
/usr/local/mmseg/bin/mmseg -d/usr/local/mmseg/etc/ /home/software/server/coreseek-4.1-beta/mmseg-3.2.14/src/t1.txt
/usr/local/mmseg/bin/mmseg -d /usr/local/mmseg/etc//home/software/server/coreseek-4.1-beta/testpack/var/test/test.xml
/usr/local/coreseek/bin/indexer -c/usr/local/coreseek/etc/sphinx-min.conf.dist
csft-4.0 版显示:ERROR: nothing to do.

若发现错误
 error while loading shared libraries:libiconv.so.2: cannot open shared object file: No such file ordirectory
解决方式
cd /etc
ln -s /usr/local/lib/libiconv.so.2 /usr/lib/libiconv.so.2
ldconfig

 

9.
配置mysql,导入测试数据
create database test;
mysql -uroot -proot test </home/software/server/coreseek-4.1-beta/testpack/var/test/documents.sql
quit

cp/home/software/server/coreseek-4.1-beta/testpack/etc/csft_mysql.conf/usr/local/coreseek/etc/

 

10.
mkdir /usr/local/coreseek/var/data/
配置csft_mysql.conf文件
vim /usr/local/coreseek/etc/csft_mysql.conf

source mysql
{
    type                = mysql

    sql_host                = localhost
    sql_user                = root
    sql_pass                = 
    sql_db                = test
    sql_port                = 3306
    sql_query_pre            = SET NAMES utf8

    sql_query                = SELECT * FROM keyword
                                            #sql_query第一列id需为整数
                                            #title、content作为字符串/文本字段,被全文索引
    #sql_attr_uint            = group_id            #从SQL读取到的值必须为整数
    #sql_attr_timestamp        = date_added      #从SQL读取到的值必须为整数,作为时间属性

    #sql_query_info_pre      = SET NAMES utf8                                        #命令行查询时,设置正确的字符集,3.2.14开始支持
    #sql_query_info            = SELECT * FROM documents WHERE id=$id  #命令行查询时,从数据库读取原始数据信息
}

#index定义
index mysql
{
    source            = mysql             #对应的source名称
    path            = var/data/mysql
    docinfo            = extern
    mlock            = 0
    morphology        = none
    min_word_len        = 1
    html_strip                = 0
    charset_dictpath = /usr/local/mmseg3/etc/    #BSD、Linux环境下设置,/符号结尾
    #charset_dictpath = etc/                        #Windows环境下设置,/符号结尾
    charset_type        = zh_cn.utf-8
}
#############################################################################
## searchd settings
#############################################################################

searchd
{
	# hostname, port, or hostname:port, or /unix/socket/path to listen on
	# multi-value, multiple listen points are allowed
	# optional, default is 0.0.0.0:9312 (listen on all interfaces, port 9312)
	#
	# listen				= 127.0.0.1
	# listen				= 192.168.0.1:9312
	# listen				= 9312
	# listen				= /var/run/searchd.sock


	# log file, searchd run info is logged here
	# optional, default is 'searchd.log'
	log					= /usr/local/coreseek/var/log/searchd.log

	# query log file, all search queries are logged here
	# optional, default is empty (do not log queries)
	query_log			= /usr/local/coreseek/var/log/query.log

	# client read timeout, seconds
	# optional, default is 5
	read_timeout		= 5

	# request timeout, seconds
	# optional, default is 5 minutes
	client_timeout		= 300

	# maximum amount of children to fork (concurrent searches to run)
	# optional, default is 0 (unlimited)
	max_children		= 30

	# PID file, searchd process ID file name
	# mandatory
	pid_file			= /usr/local/coreseek/var/log/searchd.pid

	# max amount of matches the daemon ever keeps in RAM, per-index
	# WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
	# default is 1000 (just like Google)
	max_matches			= 1000

	# seamless rotate, prevents rotate stalls if precaching huge datasets
	# optional, default is 1
	seamless_rotate		= 1

	# whether to forcibly preopen all indexes on startup
	# optional, default is 0 (do not preopen)
	preopen_indexes		= 0

	# whether to unlink .old index copies on succesful rotation.
	# optional, default is 1 (do unlink)
	unlink_old			= 1

	# attribute updates periodic flush timeout, seconds
	# updates will be automatically dumped to disk this frequently
	# optional, default is 0 (disable periodic flush)
	#
	# attr_flush_period	= 900


	# instance-wide ondisk_dict defaults (per-index value take precedence)
	# optional, default is 0 (precache all dictionaries in RAM)
	#
	# ondisk_dict_default	= 1


	# MVA updates pool size
	# shared between all instances of searchd, disables attr flushes!
	# optional, default size is 1M
	mva_updates_pool	= 1M

	# max allowed network packet size
	# limits both query packets from clients, and responses from agents
	# optional, default size is 8M
	max_packet_size		= 8M

	# crash log path
	# searchd will (try to) log crashed query to 'crash_log_path.PID' file
	# optional, default is empty (do not create crash logs)
	#
	# crash_log_path		= /usr/local/coreseek/var/log/crash


	# max allowed per-query filter count
	# optional, default is 256
	max_filters			= 256

	# max allowed per-filter values count
	# optional, default is 4096
	max_filter_values	= 4096


	# socket listen queue length
	# optional, default is 5
	#
	# listen_backlog		= 5


	# per-keyword read buffer size
	# optional, default is 256K
	#
	# read_buffer			= 256K


	# unhinted read size (currently used when reading hits)
	# optional, default is 32K
	#
	# read_unhinted		= 32K
}


   
pid_file = /usr/local/coreseek/var/log/searchd_mysql.pid
log = /usr/local/coreseek/var/log/searchd_mysql.log
query_log = /usr/local/coreseek/var/log/query_mysql.log

 

11.启动sphinx搜索
/usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft_mysql.conf --all
/usr/local/coreseek/bin/searchd -c /usr/local/coreseek/etc/csft_mysql.conf

如果报错
WARNING: index 'mysql': preload: failed to open var/data/mysql.sph:No such file or directory; NOT SERVING
FATAL: no valid indexes to serve
解决
新建立索引
/usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft_mysql.conf --all

更改
mysql的问题 stock路径问题,需要在my.cnf更改 /var/lib/mysql/mysql.sock
vim /etc/my.cnf
#socket        = /tmp/mysql.sock
socket         = /var/lib/mysql/mysql.sock


防火墙问题  关闭防火墙


9.
启动
/usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft_mysql.conf --all
/usr/local/coreseek/bin/searchd -c /usr/local/coreseek/etc/csft_mysql.conf


10.保证跨机器访问,关闭防火墙
service iptables stop

----------------------------------------------------------------------

#启动coreseek
/usr/local/coreseek/bin/searchd -c /usr/local/coreseek/etc/csft.conf

#创建
/usr/local/coreseek/bin/indexer --rotate --all

/usr/local/coreseek/bin/indexer main --rotate >>/usr/local/coreseek/var/log/merge.log

/usr/local/coreseek/bin/indexer delta --rotate >>/usr/local/coreseek/var/log/delta.log


#停止coreseek
/usr/local/coreseek/bin/searchd -c /usr/local/coreseek/etc/csft.conf --stop