lucene3.0 搜索结果排序+高亮展示

1.排序

package cn.xt.sort;

import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;
import org.junit.Before;
import org.junit.Test;

import cn.xt.crud.LuncenUtil;
import cn.xt.pojo.Article;

public class SortTest {
Article article = null;

@Before
public void setUp() throws Exception {
article = new Article();
article.setId(1);
article.setTitle("lucene是一片文章");
article.setContent("是一个非常好用的全文搜索技术，very good。");
}

/**
 * 1、相同的结构，相同的关键词，得分一样
 * 2、相同的结构，不同的关键词，得分不一样，一般来说中文比较高
 * 3、不同的结构，关键词出现的次数越多，得分越高
 * 4、利用document.setBoost(100)可以人为的提高相关度得分
 * @throws Exception
 */
@Test
public void testSearchIndex() throws Exception{
IndexSearcher indexSearcher = new IndexSearcher(LuncenUtil.directory);
QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, new String[]{"title","content"}, LuncenUtil.analyzer);
Query query = queryParser.parse("lucene");
TopDocs topDocs = indexSearcher.search(query, 26);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
List<Article> articleList = new ArrayList<Article>();
for(ScoreDoc scoreDoc:scoreDocs){
float score = scoreDoc.score;
System.out.println(score);//相关的得分
Document document =  indexSearcher.doc(scoreDoc.doc);
articleList.add(article);
}

for(Article article:articleList){
System.out.println(article.getId());
System.out.println(article.getTitle());
System.out.println(article.getContent());
}
}
}

2.高亮显示

package cn.xt.highlighter;

import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.util.Version;
import org.junit.Before;
import org.junit.Test;

import cn.xt.crud.LuncenUtil;
import cn.xt.pojo.Article;

/**
 * 高亮
 *    * 使关键字变色
 *       *  设置
 *       *  使用
 *    * 控制摘要的大小
 * @author Administrator
 */
public class HighlighterTest {
Article article = null;

@Before
public void setUp() throws Exception {
article = new Article();
article.setId(1);
article.setTitle("lucene是一片文章");
article.setContent("是一个非常好用的全文搜索技术，very good。");
}

@Test
public void testSearchIndex() throws Exception{
IndexSearcher indexSearcher = new IndexSearcher(LuncenUtil.directory);
QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, new String[]{"title","content"}, LuncenUtil.analyzer);
Query query = queryParser.parse("Lucene");
TopDocs topDocs = indexSearcher.search(query, 25);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
/***********************************************************************/
/**
 * 给关键字加上前缀和后缀
 */
Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>");
/**
 * scorer封装了关键字
 */
Scorer scorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter,scorer);
/**
 * 创建一个摘要
 */
Fragmenter fragmenter = new SimpleFragmenter(10);
highlighter.setTextFragmenter(fragmenter);
/***********************************************************************/
List<Article> articleList = new ArrayList<Article>();
for(ScoreDoc scoreDoc:scoreDocs){
float score = scoreDoc.score;
System.out.println(score);//相关的得分
Document document =  indexSearcher.doc(scoreDoc.doc);
/*
 * 使用高亮器
 */
/**
 * 1、分词器
 *      查找关键词
 * 2、字段
 *      在哪个字段上进行高亮
 * 3、字段的内容
 *      把字段的内容提取出来
 */
String titleText = highlighter.getBestFragment(LuncenUtil.analyzer, "title", document.get("title"));
String contentText = highlighter.getBestFragment(LuncenUtil.analyzer, "content", document.get("content"));
if(titleText!=null){
article.setTitle(titleText);
}
if(contentText!=null){
article.setContent(contentText);
}
articleList.add(article);
}

for(Article article:articleList){
System.out.println(article.getId());
System.out.println(article.getTitle());
System.out.println(article.getContent());
}
}
}

秒客网

lucene3.0 搜索结果排序+高亮展示

相关文章