搜索可分为如下几步:
创建Directory
创建IndexReader
根据IndexReader创建IndexSearch
创建搜索的Query
根据searcher搜索并且返回TopDocs
根据TopDocs获取ScoreDoc对象
根据searcher和ScoreDoc对象获取具体的Document对象
根据Document对象获取需要的值
下面是例子代码:
3.5版本:
3.5版本比较简单,只需要Lucene核心包lucene-core即可,
package com.darren.lucene35;
import java.io.File;
import java.io.FileReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class HelloLucene {
/**
* 搜索
*/
public void search() {
IndexReader indexReader = null;
try {
// 1、创建Directory
Directory directory = FSDirectory.open(new File("F:/test/lucene/index"));
// 2、创建IndexReader
indexReader = IndexReader.open(directory);
// 3、根据IndexReader创建IndexSearch
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
// 4、创建搜索的Query
// 使用默认的标准分词器
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);
// 在content中搜索Darren
// 创建parser来确定要搜索文件的内容,第二个参数为搜索的域
QueryParser queryParser = new QueryParser(Version.LUCENE_35, "content", analyzer);
// 创建Query表示搜索域为content包含Darren的文档
Query query = queryParser.parse("Darren");
// 5、根据searcher搜索并且返回TopDocs
TopDocs topDocs = indexSearcher.search(query, 10);
// 6、根据TopDocs获取ScoreDoc对象
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
// 7、根据searcher和ScoreDoc对象获取具体的Document对象
Document document = indexSearcher.doc(scoreDoc.doc);
// 8、根据Document对象获取需要的值
System.out.println(document.get("filename") + " " + document.get("filepath"));
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (indexReader != null) {
indexReader.clone();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
4.5版本:
4.5版本需要Lucene核心包lucene-core和查询包lucene-queryparser,从4.0版本之后分词包从核心包分离。代码示例如下:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.darren.lucene45.helloworld</groupId>
<artifactId>lucene45_helloworld</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>lucene45_helloworld</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<lucene.version>4.5.1</lucene.version>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>${lucene.version}</version>
</dependency>
</dependencies>
</project>
5.0版本:
5.0版本和4.5版本一样,代码示例如下:
package com.darren.lucene50;
import java.io.File;
import java.io.FileReader;
import java.nio.file.FileSystems;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class HelloLucene {
/**
* 搜索
*/
public void search() {
DirectoryReader directoryReader = null;
try {
// 1、创建Directory
Directory directory = FSDirectory.open(FileSystems.getDefault().getPath("F:/test/lucene/index"));
// 2、创建IndexReader
/**
* 注意Reader与3.5版本不同:
*
* 所以使用DirectoryReader
*
* @Deprecated public static DirectoryReader open(final Directory directory) throws IOException { return
* DirectoryReader.open(directory); }
*
* 但是和4.5版本相同
*/
// 如下方法过时
// IndexReader indexReader = IndexReader.open(directory);
directoryReader = DirectoryReader.open(directory);
// 3、根据IndexReader创建IndexSearch
IndexSearcher indexSearcher = new IndexSearcher(directoryReader);
// 4、创建搜索的Query
/**
* 注意StandardAnalyzer与3.5版本4.5版本不同:
*
* 不需要版本号
*/
Analyzer analyzer = new StandardAnalyzer();
// 创建parser来确定要搜索文件的内容,第一个参数为搜索的域
/**
* 注意QueryParser与3.5版本4.5版本不同:
*
* 不需要版本号
*/
QueryParser queryParser = new QueryParser("content", analyzer);
// 创建Query表示搜索域为content包含Darren的文档
Query query = queryParser.parse("Darren");
// 5、根据searcher搜索并且返回TopDocs
TopDocs topDocs = indexSearcher.search(query, 10);
// 6、根据TopDocs获取ScoreDoc对象
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
// 7、根据searcher和ScoreDoc对象获取具体的Document对象
Document document = indexSearcher.doc(scoreDoc.doc);
// 8、根据Document对象获取需要的值
System.out.println(document.get("filename") + " " + document.get("filepath"));
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (directoryReader != null) {
directoryReader.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
测试代码:
package com.darren.lucene50;
import org.junit.Test;
public class HelloLuceneTest {
@Test
public void testSearch() {
HelloLucene helloLucene = new HelloLucene();
helloLucene.search();
}
}