Lucene(三)索引域选项

时间:2021-09-26 05:54:48

转载出处:http://blog.csdn.net/ayi_5788/article/details/52121434

通过上两篇的学习,想必已经入了门了,今天来看索引域选项中的几个值得设置 
先来看一个构造器:


/**
* Create a field by specifying its name, value and how it will
* be saved in the index. Term vectors will not be stored in the index.
*
* @param name The name of the field
* @param value The string to process
* @param store Whether <code>value</code> should be stored in the index
* @param index Whether the field should be indexed, and if so, if it should
* be tokenized before indexing
* @throws NullPointerException if name or value is <code>null</code>
* @throws IllegalArgumentException if the field is neither stored nor indexed
*/
public Field(String name, String value, Store store, Index index) {
this(name, value, store, index, TermVector.NO);
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16

我们向Document添加Field可以有更多的设置,那么都是什么意思呢? 
name:字段名,很容易理解 
value:字段值,也很容易理解 
store和index怎么解释,下面就来看一下这两个选项的可选值: 
Field.Store.YES或者NO(存储域选项) 
设置为YES表示或把这个域中的内容完全存储到文件中,方便进行文本的还原 
设置为NO表示把这个域的内容不存储到文件中,但是可以被索引,此时内容无法完全还原 
Field.Index(索引选项) 
Index.ANALYZED:进行分词和索引,适用于标题、内容等 
Index.NOT_ANALYZED:进行索引,但是不进行分词,如果身份证号,姓名,ID等,适用于精确搜索 
Index.ANALYZED_NOT_NORMS:进行分词但是不存储norms信息,这个norms中包括了创建索引的时间和权值等信息 
Index.NOT_ANALYZED_NOT_NORMS:即不进行分词也不存储norms信息 
Index.NO:不进行索引 
写个例子看看,由于pom文件与之前的一样,就不贴出了,直接看例子代码: 
3.5版本:


package com.darren.lucene35;

import java.io.File;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class IndexUtil {
private static final String[] ids = { "1", "2", "3" };
private static final String[] authors = { "Darren", "Tony", "Grylls" };
private static final String[] titles = { "Hello World", "Hello Lucene", "Hello Java" };
private static final String[] contents = { "Hello World, I am on my way", "Today is my first day to study Lucene",
"I like Java" };

/**
* 建立索引
*/

public static void index() {
IndexWriter indexWriter = null;
try {
// 1、创建Directory
Directory directory = FSDirectory.open(new File("F:/test/lucene/index"));

// 2、创建IndexWriter
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, analyzer);
indexWriter = new IndexWriter(directory, config);

int size = ids.length;
for (int i = 0; i < size; i++) {
// 3、创建Document对象
Document document = new Document();
// 看看四个参数的意思
/**
* Create a field by specifying its name, value and how it will be saved in the index. Term vectors will
* not be stored in the index.
*
* @param name
* The name of the field
* @param value
* The string to process
* @param store
* Whether <code>value</code> should be stored in the index
* @param index
* Whether the field should be indexed, and if so, if it should be tokenized before indexing
*
* public Field(String name, String value, Store store, Index index) { this(name, value,
* store, index, TermVector.NO); }
*/


// 4、为Document添加Field

// 对ID存储,但是不分词也不存储norms信息,这个norms中包括了创建索引的时间和权值等信息
document.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
// 对Author存储,但是不分词也不存储norms信息,这个norms中包括了创建索引的时间和权值等信息
document.add(new Field("author", authors[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
// 对Title存储,分词
document.add(new Field("title", titles[i], Field.Store.YES, Field.Index.ANALYZED));
// 对Content不存储,但是分词
/**
* 注:添加内容或文件是默认是不存储的,这个查询时可以证明这个问题
*
* new Field(name, reader)
*
* 那么问题来了,如果想存文件内容怎么办呢?
*
* 那就把文件读出来,比如读出字符串,然后不就能按字符串的方式存储啦
*/

document.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));

// 5、通过IndexWriter添加文档到索引中
indexWriter.addDocument(document);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (indexWriter != null) {
indexWriter.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}

/**
* 搜索
*/

public static void search() {
IndexReader indexReader = null;
try {
// 1、创建Directory
Directory directory = FSDirectory.open(new File("F:/test/lucene/index"));
// 2、创建IndexReader
indexReader = IndexReader.open(directory);
// 3、根据IndexReader创建IndexSearch
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
// 4、创建搜索的Query
// 使用默认的标准分词器
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);

// 在content中搜索Lucene
// 创建parser来确定要搜索文件的内容,第二个参数为搜索的域
QueryParser queryParser = new QueryParser(Version.LUCENE_35, "content", analyzer);
// 创建Query表示搜索域为content包含Lucene的文档
Query query = queryParser.parse("Lucene");

// 5、根据searcher搜索并且返回TopDocs
TopDocs topDocs = indexSearcher.search(query, 10);
// 6、根据TopDocs获取ScoreDoc对象
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
// 7、根据searcher和ScoreDoc对象获取具体的Document对象
Document document = indexSearcher.doc(scoreDoc.doc);
// 8、根据Document对象获取需要的值
System.out.println("id : " + document.get("id"));
System.out.println("author : " + document.get("author"));
System.out.println("title : " + document.get("title"));
/**
* 看看content能不能打印出来,为什么?
*/

System.out.println("content : " + document.get("content"));
}

} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (indexReader != null) {
indexReader.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}

}
}

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155

我在注释中留了问 
题,现在我们是这样添加content字段的:


document.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));
  • 1
  • 2
  • 1
  • 2

测试代码如下:


package com.darren.lucene35;

import org.junit.Test;

public class IndexUtilTest {
@Test
public void testIndex() {
IndexUtil.index();
}

@Test
public void testSearch() {
IndexUtil.search();
}
}

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18

现在跑一下测试看看效果,结果如下:

id : 2
author : Tony
title : Hello Lucene
content : null
  • 1
  • 2
  • 3
  • 4
  • 1
  • 2
  • 3
  • 4

为什么content为null,就是因为没有存,那么我们存一下看看

document.add(new Field("content", contents[i], Field.Store.YES, Field.Index.ANALYZED));
 
 
  • 1
  • 1

再跑一下测试,注意,要先跑索引,再跑查询

id : 2  
author : Tony
title : Hello Lucene
content : Today is my first day to study Lucene
  • 1
  • 2
  • 3
  • 4
  • 1
  • 2
  • 3
  • 4

现在content有值了 
索引选项与此类同,不在赘述 
4.5版本: 
这里先要看看3.5版本的Store和Index到底设置了什么东四,其实在Field的构造器中是这样设置的:

this.isStored = store.isStored(); 

this.isIndexed = index.isIndexed();
this.isTokenized = index.isAnalyzed();
this.omitNorms = index.omitNorms();
  • 1
  • 2
  • 3
  • 4
  • 5
  • 1
  • 2
  • 3
  • 4
  • 5

是使用的这几个属性,那么这几个属性的值是什么呢:


public static enum Store {

/** Store the original field value in the index. This is useful for short texts
* like a document's title which should be displayed with the results. The
* value is stored in its original form, i.e. no analyzer is used before it is
* stored.
*/

YES {
@Override
public boolean isStored() { return true; }
},

/** Do not store the field value in the index. */
NO {
@Override
public boolean isStored() { return false; }
};

public abstract boolean isStored();
}

/** Specifies whether and how a field should be indexed. */
public static enum Index {

/** Do not index the field value. This field can thus not be searched,
* but one can still access its contents provided it is
* {@link Field.Store stored}. */

NO {
@Override
public boolean isIndexed() { return false; }
@Override
public boolean isAnalyzed() { return false; }
@Override
public boolean omitNorms() { return true; }
},

/** Index the tokens produced by running the field's
* value through an Analyzer. This is useful for
* common text. */

ANALYZED {
@Override
public boolean isIndexed() { return true; }
@Override
public boolean isAnalyzed() { return true; }
@Override
public boolean omitNorms() { return false; }
},

/** Index the field's value without using an Analyzer, so it can be searched.
* As no analyzer is used the value will be stored as a single term. This is
* useful for unique Ids like product numbers.
*/

NOT_ANALYZED {
@Override
public boolean isIndexed() { return true; }
@Override
public boolean isAnalyzed() { return false; }
@Override
public boolean omitNorms() { return false; }
},

/** Expert: Index the field's value without an Analyzer,
* and also disable the indexing of norms. Note that you
* can also separately enable/disable norms by calling
* {@link Field#setOmitNorms}. No norms means that
* index-time field and document boosting and field
* length normalization are disabled. The benefit is
* less memory usage as norms take up one byte of RAM
* per indexed field for every document in the index,
* during searching. Note that once you index a given
* field <i>with</i> norms enabled, disabling norms will
* have no effect. In other words, for this to have the
* above described effect on a field, all instances of
* that field must be indexed with NOT_ANALYZED_NO_NORMS
* from the beginning. */

NOT_ANALYZED_NO_NORMS {
@Override
public boolean isIndexed() { return true; }
@Override
public boolean isAnalyzed() { return false; }
@Override
public boolean omitNorms() { return true; }
},

/** Expert: Index the tokens produced by running the
* field's value through an Analyzer, and also
* separately disable the storing of norms. See
* {@link #NOT_ANALYZED_NO_NORMS} for what norms are
* and why you may want to disable them. */

ANALYZED_NO_NORMS {
@Override
public boolean isIndexed() { return true; }
@Override
public boolean isAnalyzed() { return true; }
@Override
public boolean omitNorms() { return true; }
};
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98

明明白白,清清楚楚吧,是预定义好的,对应关系非常清楚,那么我们来看看4.5版本是怎么做的:


package com.darren.lucene45;

import java.io.File;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class IndexUtil {
private static final String[] ids = { "1", "2", "3" };
private static final String[] authors = { "Darren", "Tony", "Grylls" };
private static final String[] titles = { "Hello World", "Hello Lucene", "Hello Java" };
private static final String[] contents = { "Hello World, I am on my way", "Today is my first day to study Lucene",
"I like Java" };

/**
* 建立索引
*/

public static void index() {
IndexWriter indexWriter = null;
try {
// 1、创建Directory
Directory directory = FSDirectory.open(new File("F:/test/lucene/index"));

// 2、创建IndexWriter
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer);
indexWriter = new IndexWriter(directory, config);

int size = ids.length;
for (int i = 0; i < size; i++) {
// 3、创建Document对象
Document document = new Document();
// 看看四个参数的意思

// 4、为Document添加Field
/**
* Create field with String value.
*
* @param name
* field name
* @param value
* string value
* @param type
* field type
* @throws IllegalArgumentException
* if either the name or value is null, or if the field's type is neither indexed() nor
* stored(), or if indexed() is false but storeTermVectors() is true.
* @throws NullPointerException
* if the type is null
*
* public Field(String name, String value, FieldType type)
*/


/**
* 注意:这里与3.5版本不同,原来的构造函数已过时
*/


/**
* 注:这里4.5版本使用FieldType代替了原来的Store和Index,不同的Field预定义了一些FieldType
*
*/

// 对ID存储,但是不分词也不存储norms信息
FieldType idType = TextField.TYPE_STORED;
idType.setIndexed(false);
idType.setOmitNorms(false);
document.add(new Field("id", ids[i], idType));

// 对Author存储,但是不分词
FieldType authorType = TextField.TYPE_STORED;
authorType.setIndexed(false);
document.add(new Field("author", authors[i], authorType));

// 对Title存储,分词
document.add(new Field("title", titles[i], StringField.TYPE_STORED));

// 对Content不存储,但是分词
document.add(new Field("content", contents[i], TextField.TYPE_NOT_STORED));

// 5、通过IndexWriter添加文档到索引中
indexWriter.addDocument(document);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (indexWriter != null) {
indexWriter.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}

/**
* 搜索
*/

public static void search() {
DirectoryReader indexReader = null;
try {
// 1、创建Directory
Directory directory = FSDirectory.open(new File("F:/test/lucene/index"));
// 2、创建IndexReader
/**
* 注意Reader与3.5版本不同:
*
* 所以使用DirectoryReader
*
* @Deprecated public static DirectoryReader open(final Directory directory) throws IOException { return
* DirectoryReader.open(directory); }
*/

indexReader = DirectoryReader.open(directory);
// 3、根据IndexReader创建IndexSearch
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
// 4、创建搜索的Query
// 使用默认的标准分词器
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);

// 在content中搜索Lucene
// 创建parser来确定要搜索文件的内容,第二个参数为搜索的域
QueryParser queryParser = new QueryParser(Version.LUCENE_45, "content", analyzer);
// 创建Query表示搜索域为content包含Lucene的文档
Query query = queryParser.parse("Lucene");

// 5、根据searcher搜索并且返回TopDocs
TopDocs topDocs = indexSearcher.search(query, 10);
// 6、根据TopDocs获取ScoreDoc对象
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
// 7、根据searcher和ScoreDoc对象获取具体的Document对象
Document document = indexSearcher.doc(scoreDoc.doc);
// 8、根据Document对象获取需要的值
System.out.println("id : " + document.get("id"));
System.out.println("author : " + document.get("author"));
System.out.println("title : " + document.get("title"));
/**
* 看看content能不能打印出来,为什么?
*/

System.out.println("content : " + document.get("content"));
}

} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (indexReader != null) {
indexReader.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}

}
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172

4.5版本使用了FieldType来代替Store和Index,其实去看看FieldType是什么东四,就是预定义了一些值,比如StringField


package org.apache.lucene.document;

import org.apache.lucene.index.FieldInfo.IndexOptions;

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/


/** A field that is indexed but not tokenized: the entire
* String value is indexed as a single token. For example
* this might be used for a 'country' field or an 'id'
* field, or any field that you intend to use for sorting
* or access through the field cache. */


public final class StringField extends Field {

/** Indexed, not tokenized, omits norms, indexes
* DOCS_ONLY, not stored. */

public static final FieldType TYPE_NOT_STORED = new FieldType();

/** Indexed, not tokenized, omits norms, indexes
* DOCS_ONLY, stored */

public static final FieldType TYPE_STORED = new FieldType();

static {
TYPE_NOT_STORED.setIndexed(true);
TYPE_NOT_STORED.setOmitNorms(true);
TYPE_NOT_STORED.setIndexOptions(IndexOptions.DOCS_ONLY);
TYPE_NOT_STORED.setTokenized(false);
TYPE_NOT_STORED.freeze();

TYPE_STORED.setIndexed(true);
TYPE_STORED.setOmitNorms(true);
TYPE_STORED.setIndexOptions(IndexOptions.DOCS_ONLY);
TYPE_STORED.setStored(true);
TYPE_STORED.setTokenized(false);
TYPE_STORED.freeze();
}

/** Creates a new StringField.
* @param name field name
* @param value String value
* @param stored Store.YES if the content should also be stored
* @throws IllegalArgumentException if the field name or value is null.
*/

public StringField(String name, String value, Store stored) {
super(name, value, stored == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
}
}

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65

整个类预定义了两种FieldType,分别是TYPE_NOT_STORED和TYPE_STORED,具体的值也是一目了然看看和3.5版本是不是基本一样的,当然还有一些其他的FieldType,比如TextField中预定义了另外两种,


/** Indexed, tokenized, not stored. */
public static final FieldType TYPE_NOT_STORED = new FieldType();

/** Indexed, tokenized, stored. */
public static final FieldType TYPE_STORED = new FieldType();

static {
TYPE_NOT_STORED.setIndexed(true);
TYPE_NOT_STORED.setTokenized(true);
TYPE_NOT_STORED.freeze();

TYPE_STORED.setIndexed(true);
TYPE_STORED.setTokenized(true);
TYPE_STORED.setStored(true);
TYPE_STORED.freeze();
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17

当然,还有其他的FieldType,不再一一列出,那么我们来试一下


package com.darren.lucene45;

import org.junit.Test;

public class IndexUtilTest {
@Test
public void testIndex() {
IndexUtil.index();
}

@Test
public void testSearch() {
IndexUtil.search();
}
}

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18

此时跑一下测试的testIndex()方法看看效果:


java.lang.IllegalStateException: this FieldType is already frozen and cannot be changed
at org.apache.lucene.document.FieldType.checkIfFrozen(FieldType.java:86)
at org.apache.lucene.document.FieldType.setIndexed(FieldType.java:118)
at com.darren.lucene45.IndexUtil.index(IndexUtil.java:80)
at com.darren.lucene45.IndexUtilTest.testIndex(IndexUtilTest.java:8)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:601)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
at org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:50)
at org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:467)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:683)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:390)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:197)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29

竟然报错了,那么怎么办呢,原因是因为预定义的值都调用了freeze()方法,这个方法设置

public void freeze() {  
this.frozen = true;
}
  • 1
  • 2
  • 3
  • 1
  • 2
  • 3

frozen为true,而FieldType中有这样的方法:

private void checkIfFrozen() {  
if (frozen) {
throw new IllegalStateException("this FieldType is already frozen and cannot be changed");
}
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 1
  • 2
  • 3
  • 4
  • 5

如果为true就抛异常,就是这些预定义的值不可修改,那没办法了,我们只好自己设置了,于是放索引方法改为这样:


/**
* 建立索引
*/

public static void index() {
IndexWriter indexWriter = null;
try {
// 1、创建Directory
Directory directory = FSDirectory.open(new File("F:/test/lucene/index"));

// 2、创建IndexWriter
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer);
indexWriter = new IndexWriter(directory, config);

int size = ids.length;
for (int i = 0; i < size; i++) {
// 3、创建Document对象
Document document = new Document();
// 看看四个参数的意思

// 4、为Document添加Field
/**
* Create field with String value.
*
* @param name
* field name
* @param value
* string value
* @param type
* field type
* @throws IllegalArgumentException
* if either the name or value is null, or if the field's type is neither indexed() nor
* stored(), or if indexed() is false but storeTermVectors() is true.
* @throws NullPointerException
* if the type is null
*
* public Field(String name, String value, FieldType type)
*/


/**
* 注意:这里与3.5版本不同,原来的构造函数已过时
*/


/**
* 注:这里4.5版本使用FieldType代替了原来的Store和Index,不同的Field预定义了一些FieldType
*
*/

// 对ID存储,但是不分词也不存储norms信息
FieldType idType = new FieldType();
idType.setStored(true);
idType.setIndexed(false);
idType.setOmitNorms(false);
document.add(new Field("id", ids[i], idType));

// 对Author存储,但是不分词
FieldType authorType = new FieldType();
authorType.setStored(true);
authorType.setIndexed(false);
document.add(new Field("author", authors[i], authorType));

// 对Title存储,分词
document.add(new Field("title", titles[i], StringField.TYPE_STORED));

// 对Content不存储,但是分词
document.add(new Field("content", contents[i], TextField.TYPE_NOT_STORED));

// 5、通过IndexWriter添加文档到索引中
indexWriter.addDocument(document);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (indexWriter != null) {
indexWriter.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84

再跑一下testIndex()方法,没有出错,得到正常的索引,然后跑一下testSearch()方法看看结果: 
id : 2 
author : Tony 
title : Hello Lucene 
content : null
 
此时的content也是null,那么改一改对content的设置: 
把这句:// 对Content不存储,但是分词 
document.add(new Field("content", contents[i], TextField.TYPE_STORED));
 
再跑测试,记住线索因在查找,结果为:

id : 2  
author : Tony
title : Hello Lucene
content : Today is my first day to study Lucene
  • 1
  • 2
  • 3
  • 4
  • 1
  • 2
  • 3
  • 4

此时得到了和3.5版本一样的测试结果,4.5版本完成 
5.0版本: 
5.0版本与4.5版本相比没有太大改动,先看一下代码:


package com.darren.lucene50;

import java.nio.file.FileSystems;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class IndexUtil {
private static final String[] ids = { "1", "2", "3" };
private static final String[] authors = { "Darren", "Tony", "Grylls" };
private static final String[] titles = { "Hello World", "Hello Lucene", "Hello Java" };
private static final String[] contents = { "Hello World, I am on my way", "Today is my first day to study Lucene",
"I like Java" };

/**
* 建立索引
*/

public static void index() {
IndexWriter indexWriter = null;
try {
// 1、创建Directory
Directory directory = FSDirectory.open(FileSystems.getDefault().getPath("F:/test/lucene/index"));

// 2、创建IndexWriter
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
indexWriter = new IndexWriter(directory, indexWriterConfig);

int size = ids.length;
for (int i = 0; i < size; i++) {
// 3、创建Document对象
Document document = new Document();
// 看看四个参数的意思

// 4、为Document添加Field
/**
* Create field with String value.
*
* @param name
* field name
* @param value
* string value
* @param type
* field type
* @throws IllegalArgumentException
* if either the name or value is null, or if the field's type is neither indexed() nor
* stored(), or if indexed() is false but storeTermVectors() is true.
* @throws NullPointerException
* if the type is null
*
* public Field(String name, String value, FieldType type)
*/


/**
* 注意:这里与3.5版本不同,原来的构造函数已过时
*/


/**
* 注:这里4.5版本类似使用FieldType代替了原来的Store和Index,不同的是Index变成IndexOptions
*
*/

// 对ID存储,但是不分词也不存储norms信息
FieldType idType = new FieldType();
idType.setStored(true);
idType.setIndexOptions(IndexOptions.DOCS);
idType.setOmitNorms(false);
document.add(new Field("id", ids[i], idType));

// 对Author存储,但是不分词
FieldType authorType = new FieldType();
authorType.setStored(true);
authorType.setIndexOptions(IndexOptions.DOCS);
document.add(new Field("author", authors[i], authorType));

// 对Title存储,分词
document.add(new Field("title", titles[i], StringField.TYPE_STORED));

// 对Content不存储,但是分词
document.add(new Field("content", contents[i], TextField.TYPE_NOT_STORED));

// 5、通过IndexWriter添加文档到索引中
indexWriter.addDocument(document);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (indexWriter != null) {
indexWriter.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}

/**
* 搜索
*/

public static void search() {
DirectoryReader indexReader = null;
try {
// 1、创建Directory
Directory directory = FSDirectory.open(FileSystems.getDefault().getPath("F:/test/lucene/index"));
// 2、创建IndexReader
indexReader = DirectoryReader.open(directory);
// 3、根据IndexReader创建IndexSearch
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
// 4、创建搜索的Query
// 使用默认的标准分词器
Analyzer analyzer = new StandardAnalyzer();

// 在content中搜索Lucene
// 创建parser来确定要搜索文件的内容,第二个参数为搜索的域
QueryParser queryParser = new QueryParser("content", analyzer);
// 创建Query表示搜索域为content包含Lucene的文档
Query query = queryParser.parse("Lucene");

// 5、根据searcher搜索并且返回TopDocs
TopDocs topDocs = indexSearcher.search(query, 10);
// 6、根据TopDocs获取ScoreDoc对象
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
// 7、根据searcher和ScoreDoc对象获取具体的Document对象
Document document = indexSearcher.doc(scoreDoc.doc);
// 8、根据Document对象获取需要的值
System.out.println("id : " + document.get("id"));
System.out.println("author : " + document.get("author"));
System.out.println("title : " + document.get("title"));
/**
* 看看content能不能打印出来,为什么?
*/

System.out.println("content : " + document.get("content"));
}

} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (indexReader != null) {
indexReader.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}

}
}

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168

FieldType的使用稍有不同,没有了Index而使用IndexOptions代替,现在TextField中预定义的值是这样的:


/** Indexed, tokenized, not stored. */
public static final FieldType TYPE_NOT_STORED = new FieldType();

/** Indexed, tokenized, stored. */
public static final FieldType TYPE_STORED = new FieldType();

static {
TYPE_NOT_STORED.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
TYPE_NOT_STORED.setTokenized(true);
TYPE_NOT_STORED.freeze();

TYPE_STORED.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
TYPE_STORED.setTokenized(true);
TYPE_STORED.setStored(true);
TYPE_STORED.freeze();
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17

我们来看看IndexOptions与Index有什么不同:


/**
* Controls how much information is stored in the postings lists.
* @lucene.experimental
*/


public enum IndexOptions {
// NOTE: order is important here; FieldInfo uses this
// order to merge two conflicting IndexOptions (always
// "downgrades" by picking the lowest).
/** Not indexed */
NONE,
/**
* Only documents are indexed: term frequencies and positions are omitted.
* Phrase and other positional queries on the field will throw an exception, and scoring
* will behave as if any term in the document appears only once.
*/

DOCS,
/**
* Only documents and term frequencies are indexed: positions are omitted.
* This enables normal scoring, except Phrase and other positional queries
* will throw an exception.
*/

DOCS_AND_FREQS,
/**
* Indexes documents, frequencies and positions.
* This is a typical default for full-text search: full scoring is enabled
* and positional queries are supported.
*/

DOCS_AND_FREQS_AND_POSITIONS,
/**
* Indexes documents, frequencies, positions and offsets.
* Character offsets are encoded alongside the positions.
*/

DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
Lucene(三)索引域选项
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36

从它的选项看似乎是多了几个功能,可以对词的频率索引、位置索引、甚至偏移量索引,这是之前版本所没有的。其他方面和4.5版本基本一样。