- 引言
- 案例概要
- 思路
- 分词
- 索引的管理
1 //指定索引库文件存放文件位置
2 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(this.IndexDataDir), new NativeFSLockFactory());
3 //判断索引文件目录是否存在
4 bool isExist = IndexReader.IndexExists(directory);
5 if (isExist)
6 {
7 if (IndexWriter.IsLocked(directory))
8 {
9 IndexWriter.Unlock(directory);
10 }
11 }
12 //盘古分词器
13 PanGuAnalyzer analyzer = new PanGuAnalyzer();
14 //索引写入类
15 IndexWriter writer = new IndexWriter(directory, analyzer, !isExist, IndexWriter.MaxFieldLength.UNLIMITED);
16 //循环队列执行操作
17 while (IndexDataQueue.Count > 0)
18 {
19 Document document = new Document();
20 //这是我为索引数据自定义的模型类,主要内容是文件的路径、名称、内容和索引管理的操作类型(新增、更新、删除)
21 BaseDataMode mode = IndexDataQueue.Dequeue();
22 switch (mode.Type)
23 {
24 case OperationType.Insert:
25 {
26 foreach (KeyValuePair<string, string> kv in mode.Content)
27 {
28 //这里kv.Key是设置索引内字段的名称,kv.Value是这个字段内存储的内容。
29 document.Add(new Field(kv.Key, kv.Value, Field.Store.YES, Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS));
30 }
31 writer.AddDocument(document);
32 }; break;
33 case OperationType.Update:
34 {
35 //设置删除条件
36 MultiFieldQueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, new string[] { "id" }, analyzer);
37 Query query = parser.Parse(mode.Content["id"]);
38 writer.DeleteDocuments(query);
39 foreach (KeyValuePair<string, string> kv in mode.Content)
40 {
41 document.Add(new Field(kv.Key, kv.Value, Field.Store.YES, Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS));
42 }
43 writer.AddDocument(document);
44 }; break;
45 case OperationType.Delete:
46 {
47 MultiFieldQueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, new string[] { "id" }, analyzer);
48 Query query = parser.Parse(mode.Content["id"]);
49 writer.DeleteDocuments(query);
50 }; break;
51 default: { }; break;
52 }
53 }
54 //提交操作
55 writer.Commit();
56 //优化
57 writer.Optimize();
58 //关闭连接
59 writer.Close();
60 directory.Close();
- 文件检索
1 //指定索引库文件存放文件位置
2 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(this.IndexDir), new NativeFSLockFactory());
3 IndexReader reader = IndexReader.Open(directory, true);
4 IndexSearcher searcher = new IndexSearcher(reader);
5 //设置关键词在条件中为OR关系
6 BooleanQuery queryOr = new BooleanQuery();
7 foreach (string word in SplitContent.SplitByPanGu(keyword))
8 {
9 foreach (KeyValuePair<string, string> kv in Mode.Content)
10 {
11 TermQuery query = new TermQuery(new Term(kv.Key, word));
12 //这里设置条件为Or关系
13 queryOr.Add(query, BooleanClause.Occur.SHOULD);
14 }
15 }
16 //获取搜索结果
17 //1000为搜索文件的下标限制,设置这个可以控制检索的范围,也可以用于分页显示
18 TopDocs tds = searcher.Search(queryOr, null, 1000);
19 ScoreDoc[] docs = tds.scoreDocs;
20 for (int i = 0; i < docs.Length; i++)
21 {
22 int docId = docs[i].doc;
23 Document doc = searcher.Doc(docId);
24 string content = doc.Get("索引内字段的名称");
25 }
26
- 资源
DLL与词库:http://download.csdn.net/detail/aaakingwin/7208679