golang全文搜索--使用sphinx
不多废话,测试环境 `ubuntu 13.10`## 安装sudo apt-get install sphinxsearch## 配置nano /etc/sphinxsearch/sphinx.conf # 数据源配置 source default { type = xmlpipe2 xmlpipe_command = /path/xmlpipe2 xmlpipe_fixup_utf8 = 1 } # 索引配置 index default { type = plain source = default # 索引文件路径 path = /path/filename # 不存储文档信息 docinfo = none #最小索引词长度 min_word_len = 2 charset_type = utf-8 # 指定utf-8的编码表 charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F # 简单分词,只支持0和1,如果要搜索中文,请指定为1 ngram_len = 1 # 需要分词的字符,如果要搜索中文,去掉前面的注释 ngram_chars = U+3000..U+2FA1F }## xmlpipe2的格式
[文档内容,这是内容内容!11111111
...
... ... ...
只要配置文件中 xmlpipe_command 字段配置的可执行文件输出为相应的xml流即可,这样几乎适配了所有数据源## 生成索引 $ indexer default Sphinx 2.0.4-release (r3135) Copyright (c) 2001-2012, Andrew Aksyonoff Copyright (c) 2008-2012, Sphinx Technologies Inc (http://sphinxsearch.com) using config file '/etc/sphinxsearch/sphinx.conf'... indexing index 'default'... WARNING: Attribute count is 0: switching to none docinfo WARNING: collect_hits: mem_limit=0 kb too low, increasing to 12288 kb collected 4 docs, 0.0 MB sorted 0.0 Mhits, 100.0% done total 4 docs, 47 bytes total 0.000 sec, 54970 bytes/sec, 4678.36 docs/sec total 2 reads, 0.000 sec, 0.0 kb/call avg, 0.0 msec/call avg total 6 writes, 0.000 sec, 0.0 kb/call avg, 0.0 msec/call avg## 查询 $ search 55 Sphinx 2.0.4-release (r3135) Copyright (c) 2001-2012, Andrew Aksyonoff Copyright (c) 2008-2012, Sphinx Technologies Inc (http://sphinxsearch.com) using config file '/etc/sphinxsearch/sphinx.conf'... index 'default': query '55 ': returned 1 matches of 1 total in 0.000 sec displaying matches: 1. document=233221, weight=1695 words: 1. '55': 1 documents, 1 hits## 配置searchd编辑sphinx配置文件,添加: ## 监听地址 searchd { # 监听地址(Unix socket) listen = /var/log/searchd.sock # 日志文件 log = /var/log/searchd.log # 查询日志 query_log = /var/log/query.log # 客户端读取超时时间 read_timeout = 5 # 客户端请求超时时间 client_timeout = 3000 # PID 文件 pid_file = /var/log/searchd.pid }## 运行searchd $ sudo searchd Sphinx 2.0.4-release (r3135) Copyright (c) 2001-2012, Andrew Aksyonoff Copyright (c) 2008-2012, Sphinx Technologies Inc (http://sphinxsearch.com) using config file '/etc/sphinxsearch/sphinx.conf'... WARNING: compat_sphinxql_magics=1 is deprecated; please update your application and config listening on UNIX socket /var/log/searchd.sock precaching index 'default' precached 1 indexes in 0.000 sec验证一下状态 $ searchd --status Sphinx 2.0.4-release (r3135) Copyright (c) 2001-2012, Andrew Aksyonoff Copyright (c) 2008-2012, Sphinx Technologies Inc (http://sphinxsearch.com) using config file '/etc/sphinxsearch/sphinx.conf'... searchd status -------------- uptime: 7 connections: 1 maxed_out: 0## golang客户端 package main import ( "github.com/yunge/sphinx" "log" ) func main() { // 链接参数 opts := &sphinx.Options{ Socket: "/var/log/searchd.sock", Timeout: 5000, } // 创建客户端 spClient := &sphinx.Client{Options: opts} if err := spClient.Error(); err != nil { log.Fatal(err) } // 打开链接 if err := spClient.Open(); err != nil { log.Fatal(err) } // 获取实例信息 status, err := spClient.Status() if err != nil { log.Fatal(err) } for _, row := range status { log.Printf("%20s:\t%s\n", row[0], row[1]) } // 查询 res, err := spClient.Query("33", "default", "Test Query()") if err != nil { log.Fatal(err) } log.Println(res) }输出: 2013/12/05 01:14:55 uptime: 148 2013/12/05 01:14:55 connections: 2 2013/12/05 01:14:55 maxed_out: 0 2013/12/05 01:14:55 command_search: 0 2013/12/05 01:14:55 command_excerpt: 0 2013/12/05 01:14:55 command_update: 0 2013/12/05 01:14:55 command_keywords: 0 2013/12/05 01:14:55 command_persist: 2 2013/12/05 01:14:55 command_status: 2 2013/12/05 01:14:55 command_flushattrs: 0 2013/12/05 01:14:55 agent_connect: 0 2013/12/05 01:14:55 agent_retry: 0 2013/12/05 01:14:55 queries: 0 2013/12/05 01:14:55 dist_queries: 0 2013/12/05 01:14:55 query_wall: 0.000 2013/12/05 01:14:55 query_cpu: OFF 2013/12/05 01:14:55 dist_wall: 0.000 2013/12/05 01:14:55 dist_local: 0.000 2013/12/05 01:14:55 dist_wait: 0.000 2013/12/05 01:14:55 query_reads: OFF 2013/12/05 01:14:55 query_readkb: OFF 2013/12/05 01:14:55 query_readtime: OFF 2013/12/05 01:14:55 avg_query_wall: 0.000 2013/12/05 01:14:55 avg_query_cpu: OFF 2013/12/05 01:14:55 avg_dist_wall: 0.000 2013/12/05 01:14:55 avg_dist_local: 0.000 2013/12/05 01:14:55 avg_dist_wait: 0.000 2013/12/05 01:14:55 avg_query_reads: OFF 2013/12/05 01:14:55 avg_query_readkb: OFF 2013/12/05 01:14:55 avg_query_readtime: OFF 2013/12/05 01:14:55 &{[content] [] [] [] 1 1 0.001 [{33 1 1}]
0}api参考 http://gowalker.org/github.com/yunge/sphinxsphinx配置参考 http://www.coreseek.cn/docs/coreseek_4.1-sphinx_2.0.1-beta.html