Elasticsearch 搜索使用详解

时间:2021-12-13 08:46:00
结构化搜索
//精确值查找
使用过滤器(filters),跳过评分阶段,容易缓存。请尽可能使用过滤式查询。
在query DSL查询表达式中,term查询类似where条件查询。
{
    "term" : {
        "price" : 20
    }
}
使用 constant_score(恒定分数) 查询以非评分模式来执行 term 查询并以一作为统一评分。
GET /my_store/products/_search
{
    "query" : {
        "constant_score" : {
            "filter" : {
                "term" : {
                    "price" : 20
                }
            }
        }
    }
}

GET /my_store/_analyze 该语句查看分词结果,避免精确值查询的词被拆分。需要在mapping中使用 "index" : "not_analyzed"避免该情况发生。
注意:过滤器会创建一个 bitset (一个包含 0 和 1 的数组),它描述了哪个文档会包含该 term 。匹配文档的标志位是 1 。

//组合过滤器
当我们需要多个过滤器时,只须将它们置入 bool 过滤器的不同部分即可。可以嵌套。
一个 bool 过滤器由三部分组成:

{
   "bool" : {
      "must" :     [],
      "should" :   [],
      "must_not" : [],
   }
}

must
    所有的语句都 必须(must) 匹配,与 AND 等价。
must_not
    所有的语句都 不能(must not) 匹配,与 NOT 等价。
should
    至少有一个语句要匹配,与 OR 等价。
 
GET /my_store/products/_search
{
   "query" : {
      "filtered" : {
         "filter" : {
            "bool" : {
              "should" : [
                 { "term" : {"price" : 20}},
                 { "term" : {"productID" : "XHDK-A-1293-#fJ3"}}
              ],
              "must_not" : {
                 "term" : {"price" : 30}
              }
           }
         }
      }
   }
}

//查找多个值,从term到terms
{
    "terms" : {
        "price" : [20, 30]
    }
}
term 查询是如何工作的? Elasticsearch 会在倒排索引中查找包括某 term 的所有文档,然后构造一个 bitset 。
token

//范围查询
range 查询可同时提供包含(inclusive)和不包含(exclusive)这两种范围表达式,
gt: > 大于(greater than)
lt: < 小于(less than)
gte: >= 大于或等于(greater than or equal to)
lte: <= 小于或等于(less than or equal to)

"range" : {
    "price" : {
        "gte" : 20,
        "lte" : 40
    }
}

//处理 Null 值
//exists 查询
GET /my_index/posts/_search
{
    "query" : {
        "constant_score" : {
            "filter" : {
                "exists" : { "field" : "tags" }
            }
        }
    }

//missing 查询本质上与 exists 恰好相反: 它返回某个特定 _无_ 值字段的文档
GET /my_index/posts/_search
{
    "query" : {
        "constant_score" : {
            "filter": {
                "missing" : { "field" : "tags" }
            }
        }
    }
}
}

全文搜索
//match查询
GET /my_index/my_type/_search
{
    "query": {
        "match": {
            "title": "QUICK!"
        }
    }
}
控制精度
GET /my_index/my_type/_search
{
  "query": {
    "match": {
      "title": {
        "query":                "quick brown dog",
        "minimum_should_match": "75%"  //控制精度
      }
    }
  }
}
//bool组合查询
bool 查询会为每个文档计算相关度评分 _score
GET /my_index/my_type/_search
{
  "query": {
    "bool": {
      "must":     { "match": { "title": "quick" }},
      "must_not": { "match": { "title": "lazy"  }},
      "should": [
                  { "match": { "title": "brown" }},
                  { "match": { "title": "dog"   }}
      ]
    }
  }
控制精度
GET /my_index/my_type/_search
{
  "query": {
    "bool": {
      "should": [
        { "match": { "title": "brown" }},
        { "match": { "title": "fox"   }},
        { "match": { "title": "dog"   }}
      ],
      "minimum_should_match": 2
    }
  }
}
加权重查询
GET /_search
{
    "query": {
        "bool": {
            "must": {
                "match": {  
                    "content": {
                        "query":    "full text search",
                        "operator": "and"
                    }
                }
            },
            "should": [
                { "match": {
                    "content": {
                        "query": "Elasticsearch",
                        "boost": 3
                    }
                }},
                { "match": {
                    "content": {
                        "query": "Lucene",
                        "boost": 2
                    }
                }}
            ]
        }
    }
}

//分析查询_analyze和explain
GET /my_index/_analyze
{
  "field": "my_type.english_title",   
  "text": "Foxes"
}
GET /my_index/my_type/_validate/query?explain
{
    "query": {
        "bool": {
            "should": [
                { "match": { "title":         "Foxes"}},
                { "match": { "english_title": "Foxes"}}
            ]
        }
    }
}

//dis_max 即分离 最大化查询(Disjunction Max Query) 。分离(Disjunction)的意思是 或(or) ,这与可以把结合(conjunction)理解成 与(and) 相对应。
{
    "query": {
        "dis_max": {
            "queries": [
                { "match": { "title": "Brown fox" }},
                { "match": { "body":  "Brown fox" }}
            ]
        }
    }
}

//最佳字段查询调优
指定 tie_breaker,将其他匹配语句的评分结果与 tie_breaker 相乘。结果自然会匹配精度降低。
{
    "query": {
        "dis_max": {
            "queries": [
                { "match": { "title": "Quick pets" }},
                { "match": { "body":  "Quick pets" }}
            ],
            "tie_breaker": 0.3
        }
    }

//multi_match查询
{
  "query": {
    "multi_match": {
      "query":       "Poland Street W1V",
      "type":        "most_fields",
      "fields":      [ "street", "city", "country", "postcode" ]
    }
  }

//短语匹配match_phrase
GET /my_index/my_type/_search
{
    "query": {
        "match_phrase": {
            "title": "quick brown fox"
        }
    }
}

//置顶"window_size": 50,和邻近度匹配 "slop":  50
GET /my_index/my_type/_search
{
    "query": {
        "match": {  
            "title": {
                "query":                "quick brown fox",
                "minimum_should_match": "30%"
            }
        }
    },
    "rescore": {
        "window_size": 50,
        "query": {         
            "rescore_query": {
                "match_phrase": {
                    "title": {
                        "query": "quick brown fox",
                        "slop":  50
                    }
                }
            }
        }
    }
}
}
}

}

//prefix前缀查询
GET /my_index/address/_search
{
    "query": {
        "prefix": {
            "postcode": "W1"
        }
    }
}

//通配符查询wildcard
GET /my_index/address/_search
{
    "query": {
        "wildcard": {
            "postcode": "W?F*HW"
        }
    }

regexp 正则式查询
GET /my_index/address/_search
{
    "query": {
        "regexp": {
            "postcode": "W[0-9].+"
        }
    }
}
}

相关文章