一、创建索引时，自定义拼音分词和ik分词

PUT /my_index

{

    "index": {

        "analysis": {

            "analyzer": {

                "ik_pinyin_analyzer": {  自定义分词name

                    "type": "custom",

                    "tokenizer": "ik_smart",

                    "filter": ["my_pinyin", "word_delimiter"]

                },

                "pinyin_analyzer": {

                    "type": "custom",

                    "tokenizer": "ik_max_word",

                    "filter": ["my_pinyin", "word_delimiter"]

                }

            },

            "filter": {

                "my_pinyin": {

                    "type" : "pinyin",

                    "keep_separate_first_letter" : false, 启用该选项时，将保留第一个字母分开，例如：刘德华> l，d，h，默认：false，注意：查询结果也许是太模糊，由于长期过频

                    "keep_full_pinyin" : true,  当启用该选项，例如：刘德华> [ liu，de，hua]，默认值：true

                    "keep_original" : true, 启用此选项时，也将保留原始输入，默认值：false

                    "limit_first_letter_length" : 16, 设置first_letter结果的最大长度，默认值：16
                    "lowercase" : true,  小写非中文字母，默认值：true
                    "remove_duplicated_term" : true  启用此选项后，将删除重复的术语以保存索引，例如：de的> de，default：false，注意：位置相关的查询可能会受到影响
} 
} 
} 
} 
}

二、创建mapping时，设置字段分词(注：相同索引下建不同的type时，相同字段名属性必须设一样)

POST /my_index/user/_mapping

{

    "user": {

        "properties": {

          "id":{

            "type":"integer"

          },

            "userName": {

              "type": "text",

              "store": "no",

              "term_vector": "with_positions_offsets",

              "analyzer": "ik_pinyin_analyzer",   自定义分词器name

              "boost": 10,

              "fielddata" : true,

              "fields": {

                    "raw": {

                        "type": "keyword"    设置keyword时，对该字段不进行分析

                    }

                }

            },

            "reason":{

              "type": "text",

              "store": "no",  字段store为true，这意味着这个field的数据将会被单独存储。这时候，如果你要求返回field1（store：yes），es会分辨出field1已经被存储了，因此不会从_source中加载，而是从field1的存储块中加载。

              "term_vector": "with_positions_offsets",

              "analyzer": "ik_pinyin_analyzer",

              "boost": 10

            }

        }

    }

}

测试

PUT /my_index/user/1

{

  "id":1,

  "userName":"刘德华",

  "reason":"大帅哥"

}

PUT /my_index/user/2

{

  "id":2,

  "userName":"刘德华",

  "reason":"中华人民"

}

不分词查询

GET /my_index/user/_search

{

  "query": {

    "match": {

      "userName.raw": "刘德华"

    }

  }

}

{

  "took": 0,

  "timed_out": false,

  "_shards": {

    "total": 5,

    "successful": 5,

    "skipped": 0,

    "failed": 0

  },

  "hits": {

    "total": 2,

    "max_score": 0.2876821,

    "hits": [

      {

        "_index": "my_index",

        "_type": "user",

        "_id": "2",

        "_score": 0.2876821,

        "_source": {

          "id": 2,

          "userName": "刘德华",

          "reason": "中华人民"

        }

      },

      {

        "_index": "my_index",

        "_type": "user",

        "_id": "1",

        "_score": 0.2876821,

        "_source": {

          "id": 1,

          "userName": "刘德华",

          "reason": "大帅哥"

        }

      }

    ]

  }

}

分词查询

GET /my_index/user/_search

{

  "query": {

    "match": {

      "userName": "刘"

    }

  }

}

{

  "took": 0,

  "timed_out": false,

  "_shards": {

    "total": 5,

    "successful": 5,

    "skipped": 0,

    "failed": 0

  },

  "hits": {

    "total": 2,

    "max_score": 0.31331712,

    "hits": [

      {

        "_index": "my_index",

        "_type": "user",

        "_id": "2",

        "_score": 0.31331712,

        "_source": {

          "id": 2,

          "userName": "刘德华",

          "reason": "中华人民"

        }

      },

      {

        "_index": "my_index",

        "_type": "user",

        "_id": "1",

        "_score": 0.31331712,

        "_source": {

          "id": 1,

          "userName": "刘德华",

          "reason": "大帅哥"

        }

      }

    ]

  }

}

拼音分词

GET /my_index/user/_search

{

  "query": {

    "match": {

      "reason": "shuai"

    }

  }

}

{

  "took": 0,

  "timed_out": false,

  "_shards": {

    "total": 5,

    "successful": 5,

    "skipped": 0,

    "failed": 0

  },

  "hits": {

    "total": 1,

    "max_score": 3.4884284,

    "hits": [

      {

        "_index": "my_index",

        "_type": "user",

        "_id": "1",

        "_score": 3.4884284,

        "_source": {

          "id": 1,

          "userName": "刘德华",

          "reason": "大帅哥"

        }

      }

    ]

  }

}

分组聚合

GET /my_index/user/_search

{

  "size":2,

  "query": {

    "match": {

      "userName": "liu"

    }

  },

  "aggs": {

    "group_by_meetingType": {

      "terms": {

        "field": "userName.raw"

      }

    }

  }

}

{

  "took": 1,

  "timed_out": false,

  "_shards": {

    "total": 5,

    "successful": 5,

    "skipped": 0,

    "failed": 0

  },

  "hits": {

    "total": 2,

    "max_score": 3.133171,

    "hits": [

      {

        "_index": "my_index",

        "_type": "user",

        "_id": "2",

        "_score": 3.133171,

        "_source": {

          "id": 2,

          "userName": "刘德华",

          "reason": "中华人民"

        }

      },

      {

        "_index": "my_index",

        "_type": "user",

        "_id": "1",

        "_score": 3.133171,

        "_source": {

          "id": 1,

          "userName": "刘德华",

          "reason": "大帅哥"

        }

      }

    ]

  },

  "aggregations": {

    "group_by_meetingType": {

      "doc_count_error_upper_bound": 0,

      "sum_other_doc_count": 0,

      "buckets": [

        {

          "key": "刘德华",

          "doc_count": 2

        }

      ]

    }

  }

}

大神们这些都是个人理解哪里有一样的想法或建议欢迎评论！！！！！！！

秒客网

Elasticsearch拼音和ik分词器的结合应用

大神们这些都是个人理解哪里有一样的想法或建议欢迎评论！！！！！！！

相关文章