标题的要求可以用如下 SQL 表示
select sum(column2) from (select distinct(column1),column2 from table)t
要如何用 DSL 实现呢,先准备下索引和数据
PUT test_index
{
"mappings": {
"properties": {
"column1": {
"type": "keyword"
},
"column2": {
"type": "long"
}
}
}
}
PUT test_index/_doc/1
{
"column1": "1",
"column2": 2
}
PUT test_index/_doc/2
{
"column1": "1",
"column2": 2
}
PUT test_index/_doc/3
{
"column1": "2",
"column2": 1
}
PUT test_index/_doc/4
{
"column1": "2",
"column2": 1
}
我首先想到的是 collapse 搭配 cardinality,再sum,那效果如何呢
GET test_index/_search
{
"collapse": {
"field": "column1"
},
"aggs": {
"distinct_column": {
"cardinality": {
"field": "column1"
}
},
"distinct_sum":{
"sum": {
"field": "column2"
}
},
"all_sum":{
"sum": {
"field": "column2"
}
}
}
}
结果,count是去重了的,但sum没有
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "test_index",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"column1" : "1",
"column2" : 2
},
"fields" : {
"column1" : [
"1"
]
}
},
{
"_index" : "test_index",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"column1" : "2",
"column2" : 1
},
"fields" : {
"column1" : [
"2"
]
}
}
]
},
"aggregations" : {
"distinct_sum" : {
"value" : 6.0
},
"all_sum" : {
"value" : 6.0
},
"distinct_column" : {
"value" : 2
}
}
}
尝试多次未果后,找到了这个
Sum aggregation on Unique Data in ElasticSearch - Stack Overflow
那试下呗
GET test_index/_search
{
"size": 0,
"aggs": {
"column1_count": {
"terms": {
"field": "column1",
"size": 100
},
"aggs": {
"column2_avg": {
"avg": {
"field": "column2"
}
}
}
},
"unique_count": {
"cardinality": {
"field": "column1"
}
},
"unique_sum_column2":{
"sum_bucket": {
"buckets_path": "column1_count>column2_avg"
}
},
"sum_column2":{
"sum": {
"field": "column2"
}
}
}
}
非常好,达到期望,这个是先求平均值,再求和,为这思路叹服
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"sum_column2" : {
"value" : 6.0
},
"column1_count" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "1",
"doc_count" : 2,
"column2_avg" : {
"value" : 2.0
}
},
{
"key" : "2",
"doc_count" : 2,
"column2_avg" : {
"value" : 1.0
}
}
]
},
"unique_count" : {
"value" : 2
},
"unique_sum_column2" : {
"value" : 3.0
}
}
}