【11】把 Elasticsearch 当数据库使：Filter 下钻

阅读量：6712 次

发布时间：2019-06-25

本文共 8837 字，大约阅读时间需要 29 分钟。

使用可以用 SQL 进行 elasticsearch 的查询。下钻未必一定是GROUP BY，每记过一层GROUP BY，桶的数量就会增加一层。还有一种下钻是用 filter 来下钻，这种下钻方式会使下一层的 match的文档数量变少，但是桶的数量不变。

SELECT INSIDE WHERE ipo_year=2000

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200    WITH all_symbols AS (SELECT MAX(market_cap) AS max_all_times FROM symbol);    WITH year_2000 AS (SELECT MAX(market_cap) AS max_at_2000 FROM all_symbols         WHERE ipo_year=2000)EOF

{"max_at_2000": 20310000000.0, "max_all_times": 522690000000.0}

可以看到桶的数量还是同一个，只是增加了一个max_at_2000结果字段。

Elasticsearch

{  "aggs": {    "year_2000": {      "filter": {        "term": {          "ipo_year": 2000        }      },       "aggs": {        "max_at_2000": {          "max": {            "field": "market_cap"          }        }      }    },     "max_all_times": {      "max": {        "field": "market_cap"      }    }  },   "size": 0}

{  "hits": {    "hits": [],     "total": 6714,     "max_score": 0.0  },   "_shards": {    "successful": 1,     "failed": 0,     "total": 1  },   "took": 3,   "aggregations": {    "year_2000": {      "max_at_2000": {        "value": 20310000000.0      },       "doc_count": 58    },     "max_all_times": {      "value": 522690000000.0    }  },   "timed_out": false}

Profile

[  {    "query": [      {        "query_type": "TermQuery",        "lucene": "ipo_year:`P",        "time": "0.3492430000ms",        "breakdown": {          "score": 0,          "create_weight": 220149,          "next_doc": 0,          "match": 0,          "build_scorer": 95037,          "advance": 34057        }      },      {        "query_type": "MatchAllDocsQuery",        "lucene": "*:*",        "time": "0.1883710000ms",        "breakdown": {          "score": 0,          "create_weight": 3980,          "next_doc": 169730,          "match": 0,          "build_scorer": 14661,          "advance": 0        }      }    ],    "rewrite_time": 3158,    "collector": [      {        "name": "MultiCollector",        "reason": "search_multi",        "time": "8.789594000ms",        "children": [          {            "name": "TotalHitCountCollector",            "reason": "search_count",            "time": "0.2768050000ms"          },          {            "name": "BucketCollector: [[year_2000, max_all_times]]",            "reason": "aggregation",            "time": "7.667765000ms"          }        ]      }    ]  }]

从Profile的结果来看，其实是一次性做了两个查询，一个是全部文档，一个是ipo_year=2000的文档，然后统一聚合。

SELECT INSIDE 往不同方向过滤下钻

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200    WITH all_symbols AS (SELECT MAX(market_cap) AS max_all_times FROM symbol);    WITH year_2000 AS (SELECT MAX(market_cap) AS max_at_2000 FROM all_symbols         WHERE ipo_year=2000);    WITH year_2001 AS (SELECT MAX(market_cap) AS max_at_2001 FROM all_symbols         WHERE ipo_year=2001)EOF

这个写法其实和 CASE WHEN 很类似，但是其表达能力更强大，更灵活。

$ cat << EOF | ./es_query.py http://127.0.0.1:9200    SELECT per_ipo_year, MAX(market_cap) AS max_all_times FROM symbol         GROUP BY CASE             WHEN ipo_year=2000 THEN 'year_2000'             WHEN ipo_year=2001 THEN 'year_2001'         END AS per_ipo_yearEOF

上面的sql查询结果是

{"max_at_2000": 20310000000.0, "max_all_times": 522690000000.0, "max_at_2001": 8762940000.0}

Elasticsearch

{  "aggs": {    "year_2001": {      "filter": {        "term": {          "ipo_year": 2001        }      },       "aggs": {        "max_at_2001": {          "max": {            "field": "market_cap"          }        }      }    },     "year_2000": {      "filter": {        "term": {          "ipo_year": 2000        }      },       "aggs": {        "max_at_2000": {          "max": {            "field": "market_cap"          }        }      }    },     "max_all_times": {      "max": {        "field": "market_cap"      }    }  },   "size": 0}

{  "hits": {    "hits": [],     "total": 6714,     "max_score": 0.0  },   "_shards": {    "successful": 1,     "failed": 0,     "total": 1  },   "took": 2,   "aggregations": {    "year_2001": {      "max_at_2001": {        "value": 8762940000.0      },       "doc_count": 38    },     "year_2000": {      "max_at_2000": {        "value": 20310000000.0      },       "doc_count": 58    },     "max_all_times": {      "value": 522690000000.0    }  },   "timed_out": false}

Profile

[  {    "query": [      {        "query_type": "TermQuery",        "lucene": "ipo_year:`Q",        "time": "0.2518270000ms",        "breakdown": {          "score": 0,          "create_weight": 186032,          "next_doc": 0,          "match": 0,          "build_scorer": 48664,          "advance": 17131        }      },      {        "query_type": "TermQuery",        "lucene": "ipo_year:`P",        "time": "0.1200760000ms",        "breakdown": {          "score": 0,          "create_weight": 77254,          "next_doc": 0,          "match": 0,          "build_scorer": 25184,          "advance": 17638        }      },      {        "query_type": "MatchAllDocsQuery",        "lucene": "*:*",        "time": "0.1968800000ms",        "breakdown": {          "score": 0,          "create_weight": 3573,          "next_doc": 180136,          "match": 0,          "build_scorer": 13171,          "advance": 0        }      }    ],    "rewrite_time": 4250,    "collector": [      {        "name": "MultiCollector",        "reason": "search_multi",        "time": "2.459413000ms",        "children": [          {            "name": "TotalHitCountCollector",            "reason": "search_count",            "time": "0.2160950000ms"          },          {            "name": "BucketCollector: [[year_2001, year_2000, max_all_times]]",            "reason": "aggregation",            "time": "1.455703000ms"          }        ]      }    ]  }]

SELECT INSIDE 往同一方向连续过滤下钻

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200    WITH SELECT MAX(market_cap) AS max_all_times FROM symbol AS all_symbols;    WITH SELECT MAX(market_cap) AS max_at_2000 FROM all_symbols         WHERE ipo_year=2000 AS year_2000;    WITH SELECT MAX(market_cap) AS max_at_2001_finance FROM year_2000         WHERE sector='Finance' AS year_2000_financeEOF

{"max_at_2000": 20310000000.0, "max_all_times": 522690000000.0, "max_at_2001_finance": 985668354.0}

Elasticsearch

{  "aggs": {    "year_2000": {      "filter": {        "term": {          "ipo_year": 2000        }      },       "aggs": {        "max_at_2000": {          "max": {            "field": "market_cap"          }        },         "year_2000_finance": {          "filter": {            "term": {              "sector": "Finance"            }          },           "aggs": {            "max_at_2001_finance": {              "max": {                "field": "market_cap"              }            }          }        }      }    },     "max_all_times": {      "max": {        "field": "market_cap"      }    }  },   "size": 0}

{  "hits": {    "hits": [],     "total": 6714,     "max_score": 0.0  },   "_shards": {    "successful": 1,     "failed": 0,     "total": 1  },   "took": 2,   "aggregations": {    "year_2000": {      "max_at_2000": {        "value": 20310000000.0      },       "year_2000_finance": {        "max_at_2001_finance": {          "value": 985668354.0        },         "doc_count": 2      },       "doc_count": 58    },     "max_all_times": {      "value": 522690000000.0    }  },   "timed_out": false}

Profile

[  {    "query": [      {        "query_type": "TermQuery",        "lucene": "ipo_year:`P",        "time": "0.1897790000ms",        "breakdown": {          "score": 0,          "create_weight": 145762,          "next_doc": 0,          "match": 0,          "build_scorer": 26216,          "advance": 17801        }      },      {        "query_type": "TermQuery",        "lucene": "sector:Finance",        "time": "0.2380290000ms",        "breakdown": {          "score": 0,          "create_weight": 57770,          "next_doc": 0,          "match": 0,          "build_scorer": 55497,          "advance": 124762        }      },      {        "query_type": "MatchAllDocsQuery",        "lucene": "*:*",        "time": "0.1965630000ms",        "breakdown": {          "score": 0,          "create_weight": 3500,          "next_doc": 178347,          "match": 0,          "build_scorer": 14716,          "advance": 0        }      }    ],    "rewrite_time": 4190,    "collector": [      {        "name": "MultiCollector",        "reason": "search_multi",        "time": "2.466917000ms",        "children": [          {            "name": "TotalHitCountCollector",            "reason": "search_count",            "time": "0.2712430000ms"          },          {            "name": "BucketCollector: [[year_2000, max_all_times]]",            "reason": "aggregation",            "time": "1.370663000ms"          }        ]      }    ]  }]

有了 GROUP BY 下钻和 FILTER 下钻，很多复杂的查询可以一条就查询出来。而这种边下钻边聚合指标的查询能力甚至是传统SQL都不具备的。而且稍微训练一下，就会非常习惯这种下钻的思维方式，写查询也会很自然。

转载地址：http://lwolo.baihongyu.com/

你可能感兴趣的文章