solr开发之拼写检查spellcheck

首先，修改schema.xml，增加需要检查的字段

<field name="spell" type="text_spell" indexed="true" stored="false" multiValued="true"/>
<copyField source="productName" dest="spell"/>

<fieldType name="text_spell" class="solr.TextField" positionIncrementGap="100">        <analyzer type="index"><tokenizer class="org.ansj.solr5.AnsjTokenizerFactory"                 query="false" pstemming="true" stopwordsDir="stopwords/stopwords.dic"/> <filter class="org.apache.lucene.analysis.pinyin.solr5.PinyinTokenFilterFactory"                pinyinAll="true" minTermLenght="2" maxTermLenght="15"/>            <filter class="org.apache.lucene.analysis.pinyin.solr5.PinyinEdgeNGramTokenFilterFactory"/>        </analyzer><analyzer type="query"><tokenizer class="solr.WhitespaceTokenizerFactory"/></analyzer><analyzer>            <tokenizer class="solr.KeywordTokenizerFactory"/>            <filter class="solr.LowerCaseFilterFactory"/>        </analyzer>    </fieldType>

这里查询不需要分词

然后是配置solrconfig.xml文件，添加组件和处理器

<searchComponent name="spellerror" class="solr.SpellCheckComponent">
    <str name="queryAnalyzerFieldType">string</str>
    <!-- a spellchecker built from a field of the main index   --> 
<lst name="spellchecker">
<str name="name">default</str>
<!--这里指明需要根据哪个字段的索引为依据进行拼写检查。现配置 名为 spell 的字段-->
<str name="field">spell</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<str name="distanceMeasure">internal</str>
<float name="accuracy">0.5</float>
<int name="maxEdits">2</int>
<int name="minPrefix">1</int>
<int name="maxInspections">5</int>
<int name="minQueryLength">2</int>
<float name="maxQueryFrequency">0.01</float>
    </lst>
<lst name="spellchecker">
<str name="classname">solr.FileBasedSpellChecker</str>
<str name="name">file</str>
<str name="sourceLocation">spellings.txt</str>
<str name="characterEncoding">UTF-8</str>
<str name="spellcheckIndexDir">spellcheckerFile</str>
    </lst>
  </searchComponent>
  <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
<!--默认参数-->
    <lst name="defaults">
      <str name="df">spell</str>
      <str name="spellcheck.dictionary">default</str>
      <str name="spellcheck">on</str>
      <!-- <str name="spellcheck.extendedResults">true</str>       
      <str name="spellcheck.count">10</str>
      <str name="spellcheck.alternativeTermCount">5</str>
      <str name="spellcheck.maxResultsForSuggest">5</str> -->       
      <str name="spellcheck.collate">true</str>
      <str name="spellcheck.collateExtendedResults">true</str>  
      <!-- <str name="spellcheck.maxCollationTries">10</str>
      <str name="spellcheck.maxCollations">5</str> -->          
    </lst>
    <arr name="last-components">
      <str>spellerror</str>
    </arr>
  </requestHandler>

配置完后，需要重建索引才能生效

最后是solrJ里的代码：

/**
 * 拼写检查
 * 
 * @param keyword
 * @return
 */
public String spellCheck(String keyword)
{
String[] wordArray = keyword.split(" ");
String s = "";
for (String word : wordArray)
{
SolrQuery query = new SolrQuery();
query.set("spellcheck", "true");
query.set("spellcheck.q", word);
query.set("qt", "/spell");
query.set("spellcheck.build", "true");// 遇到新的检查词，会自动添加到索引里面
query.set("spellcheck.count", 5);
QueryResponse response = null;
try
{
response = solrClient.query(coreName, query);
System.out.println("耗时：" + response.getQTime());
SpellCheckResponse spellres = response.getSpellCheckResponse();
if (spellres != null)
{
if (!spellres.isCorrectlySpelled())
{
List<Suggestion> suggestion = spellres.getSuggestions();
//for (int i = 0; i < suggestion.size(); i++)
//{
//System.out.println("-----推荐词：" + suggestion.get(i).getAlternatives());
//}
String spellWord = spellres.getFirstSuggestion(word);
System.out.println("推荐词：" + spellWord);
if(spellWord != null){
s += spellWord + " ";
}else{
s += word + " ";
}
//return spellWord;
}
}
} catch (SolrServerException | IOException e)
{
throw new SolrWrapException(e);
}
}
s = s.trim();
return s;
}

参考文献：

http://www.cnblogs.com/HD/p/3993424.html

秒客网

solr开发之拼写检查spellcheck

相关文章