elasticsearch query与agg分开是否可以提高性能？

Elasticsearch | 作者 Ryze | 发布于2021年07月07日 | 阅读数：3162

假如我有一个名为article 的索引，有1亿条记录，它的mappins文件和数据样式如下。我想问的是，像query1这样将query子句和agg子句拆分开使用bukl api去执行是否可以获得比 query2 更高的性能呢？（elasticsearch version 7.x）

mappings：

{

  "article" : {

    "aliases" : { },

    "mappings" : {

      "properties" : {

        "articleID" : {

          "type" : "text",

          "fields" : {

            "keyword" : {

              "type" : "keyword",

              "ignore_above" : 256

            }

          }

        },

        "author_first_name" : {

          "type" : "text",

          "fields" : {

            "keyword" : {

              "type" : "keyword",

              "ignore_above" : 256

            }

          },

          "copy_to" : [

            "new_author_full_name"

          ]

        },

        "author_full_name" : {

          "type" : "text",

          "fields" : {

            "keyword" : {

              "type" : "keyword",

              "ignore_above" : 256

            }

          }

        },

        "author_last_name" : {

          "type" : "text",

          "fields" : {

            "keyword" : {

              "type" : "keyword",

              "ignore_above" : 256

            }

          },

          "copy_to" : [

            "new_author_full_name"

          ]

        },

        "content" : {

          "type" : "text",

          "fields" : {

            "keyword" : {

              "type" : "keyword",

              "ignore_above" : 256

            }

          }

        },

        "follower_num" : {

          "type" : "long"

        },

        "hidden" : {

          "type" : "boolean"

        },

        "new_author_first_name" : {

          "type" : "text",

          "copy_to" : [

            "new_author_full_name"

          ]

        },

        "new_author_full_name" : {

          "type" : "text"

        },

        "new_author_last_name" : {

          "type" : "text",

          "copy_to" : [

            "new_author_full_name"

          ]

        },

        "postDate" : {

          "type" : "date"

        },

        "sub_title" : {

          "type" : "text",

          "fields" : {

            "std" : {

              "type" : "text",

              "analyzer" : "standard"

            }

          },

          "analyzer" : "english"

        },

        "tag" : {

          "type" : "text",

          "fields" : {

            "keyword" : {

              "type" : "keyword",

              "ignore_above" : 256

            }

          }

        },

        "tag_cnt" : {

          "type" : "long"

        },

        "title" : {

          "type" : "text",

          "fields" : {

            "keyword" : {

              "type" : "keyword",

              "ignore_above" : 256

            }

          }

        },

        "userID" : {

          "type" : "long"

        },

        "view_cnt" : {

          "type" : "long"

        }

      }

    },

    "settings" : {

      "index" : {

        "routing" : {

          "allocation" : {

            "include" : {

              "_tier_preference" : "data_content"

            }

          }

        },

        "number_of_shards" : "1",

        "provided_name" : "article",

        "creation_date" : "1609156191282",

        "number_of_replicas" : "1",

        "uuid" : "_LaU_8fuTlCsJ9ZvEqUCjA",

        "version" : {

          "created" : "7100199"

        }

      }

    }

  }

}

数据样式：

{

  "took" : 0,

  "timed_out" : false,

  "_shards" : {

    "total" : 1,

    "successful" : 1,

    "skipped" : 0,

    "failed" : 0

  },

  "hits" : {

    "total" : {

      "value" : 6,

      "relation" : "eq"

    },

    "max_score" : 1.0,

    "hits" : [

      {

        "_index" : "article",

        "_type" : "_doc",

        "_id" : "6",

        "_score" : 1.0,

        "_source" : {

          "title" : "this is java and hadoop blog"

        }

      },

      {

        "_index" : "article",

        "_type" : "_doc",

        "_id" : "1",

        "_score" : 1.0,

        "_source" : {

          "view_cnt" : 30,

          "sub_title" : "learning more courses",

          "author_last_name" : "Smith",

          "hidden" : false,

          "new_author_first_name" : "Peter",

          "articleID" : "XHDK-A-1293-#fJ3",

          "title" : "this is java and elasticsearch blog",

          "userID" : 1,

          "content" : "i like to write best elasticsearch article",

          "author_first_name" : "Peter",

          "tag_cnt" : 2,

          "postDate" : "2017-01-01",

          "tag" : [

            "java",

            "hadoop"

          ],

          "new_author_last_name" : "Smith",

          "follower_num" : 5

        }

      },

      {

        "_index" : "article",

        "_type" : "_doc",

        "_id" : "2",

        "_score" : 1.0,

        "_source" : {

          "view_cnt" : 50,

          "sub_title" : "learned a lot of course",

          "author_last_name" : "Williams",

          "hidden" : false,

          "new_author_first_name" : "Smith",

          "articleID" : "KDKE-B-9947-#kL5",

          "title" : "this is java blog",

          "userID" : 1,

          "content" : "i think java is the best programming language",

          "author_first_name" : "Smith",

          "tag_cnt" : 1,

          "postDate" : "2017-01-02",

          "tag" : [

            "java"

          ],

          "new_author_last_name" : "Williams",

          "follower_num" : 10

        }

      },

      {

        "_index" : "article",

        "_type" : "_doc",

        "_id" : "3",

        "_score" : 1.0,

        "_source" : {

          "view_cnt" : 100,

          "sub_title" : "we have a lot of fun",

          "author_last_name" : "Ma",

          "hidden" : false,

          "new_author_first_name" : "Jack",

          "articleID" : "JODL-X-1937-#pV7",

          "title" : "this is elasticsearch blog",

          "userID" : 2,

          "content" : "i am only an elasticsearch beginner",

          "author_first_name" : "Jack",

          "tag_cnt" : 1,

          "postDate" : "2017-01-01",

          "tag" : [

            "hadoop"

          ],

          "new_author_last_name" : "Ma",

          "follower_num" : 25

        }

      },

      {

        "_index" : "article",

        "_type" : "_doc",

        "_id" : "4",

        "_score" : 1.0,

        "_source" : {

          "view_cnt" : 80,

          "sub_title" : "both of them are good",

          "author_last_name" : "Li",

          "hidden" : true,

          "new_author_first_name" : "Robbin",

          "articleID" : "QQPX-R-3956-#aD8",

          "title" : "this is java, elasticsearch, hadoop blog",

          "userID" : 2,

          "content" : "elasticsearch and hadoop are all very good solution, i am a beginner",

          "author_first_name" : "Robbin",

          "tag_cnt" : 2,

          "postDate" : "2017-01-02",

          "tag" : [

            "java",

            "elasticsearch"

          ],

          "new_author_last_name" : "Li",

          "follower_num" : 3

        }

      },

      {

        "_index" : "article",

        "_type" : "_doc",

        "_id" : "5",

        "_score" : 1.0,

        "_source" : {

          "view_cnt" : 10,

          "sub_title" : "haha, hello world",

          "author_last_name" : "Peter Smith",

          "hidden" : false,

          "new_author_first_name" : "Tonny",

          "articleID" : "DHJK-B-1395-#Ky5",

          "title" : "this is spark blog",

          "userID" : 3,

          "content" : "spark is best big data solution based on scala ,an programming language similar to java",

          "author_first_name" : "Tonny",

          "tag_cnt" : 1,

          "postDate" : "2017-03-01",

          "tag" : [

            "elasticsearch"

          ],

          "new_author_last_name" : "Peter Smith",

          "follower_num" : 60

        }

      }

    ]

  }

}

query1: include a "query" dsl and a "aggs" dsl

### query

GET /article/_search

{

  "from": 0,

  "size": 1000,

  "query": {

    "function_score": {

      "query": {

        "bool": {

          "must": [

            {

              "term": {

                "hidden": {

                  "value": "false"

                }

              }

            },

            {

              "match": {

                "new_author_last_name": "Smith"

              }

            },

            {

              "range": {

                "view_cnt": {

                  "gte": 1

                }

              }

            },

            {

              "bool": {

                "should": [

                  {

                    "simple_query_string": {

                      "query": "i",

                      "fields": ["content"]

                    }

                  }

                ]

              }

            }

          ],

          "must_not": [

            {

              "match": {

                "author_first_name": "Danny"

              }

            }

          ]

        }

      },

      "functions": [

        {

          "filter": {

            "match_all": {

              "boost": 1

            }

          },

          "field_value_factor": {

            "field": "follower_num",

            "factor": 1,

            "missing": 22.5,

            "modifier": "none"

          }

        }

      ]

    }

  },

  "_source": false,

  "sort": [

    {

      "_score": {

        "order": "desc"

      }

    },

    {

      "follower_num": {

        "order": "desc",

        "missing": 0

      }

    },

    {

      "view_cnt": {

        "order": "desc",

        "missing": 0

      }

    }

  ],



  "collapse": {

    "field": "userID"

  }

}

















### agg

GET /article/_search

{

  "size": 0,

  "query": {

    "function_score": {

      "query": {

        "bool": {

          "must": [

            {

              "term": {

                "hidden": {

                  "value": "false"

                }

              }

            },

            {

              "match": {

                "new_author_last_name": "Smith"

              }

            },

            {

              "range": {

                "view_cnt": {

                  "gte": 1

                }

              }

            },

            {

              "bool": {

                "should": [

                  {

                    "simple_query_string": {

                      "query": "i",

                      "fields": ["content"]

                    }

                  }

                ]

              }

            }

          ],

          "must_not": [

            {

              "match": {

                "author_first_name": "Danny"

              }

            }

          ]

        }

      },

      "functions": [

        {

          "filter": {

            "match_all": {

              "boost": 1

            }

          },

          "field_value_factor": {

            "field": "follower_num",

            "factor": 1,

            "missing": 22.5,

            "modifier": "none"

          }

        }

      ]

    }

  },

  "_source": false,

 

  "aggs": {

    "userID_agg": {

      "terms": {

        "field": "userID",

        "size": 100

      }

    }

  }

}

query2：

GET /article/_search

{

  "from": 0,

  "size": 1000,

  "query": {

    "function_score": {

      "query": {

        "bool": {

          "must": [

            {

              "term": {

                "hidden": {

                  "value": "false"

                }

              }

            },

            {

              "match": {

                "new_author_last_name": "Smith"

              }

            },

            {

              "range": {

                "view_cnt": {

                  "gte": 1

                }

              }

            },

            {

              "bool": {

                "should": [

                  {

                    "simple_query_string": {

                      "query": "i",

                      "fields": ["content"]

                    }

                  }

                ]

              }

            }

          ],

          "must_not": [

            {

              "match": {

                "author_first_name": "Danny"

              }

            }

          ]

        }

      },

      "functions": [

        {

          "filter": {

            "match_all": {

              "boost": 1

            }

          },

          "field_value_factor": {

            "field": "follower_num",

            "factor": 1,

            "missing": 22.5,

            "modifier": "none"

          }

        }

      ]

    }

  },

  "_source": false,

  "sort": [

    {

      "_score": {

        "order": "desc"

      }

    },

    {

      "follower_num": {

        "order": "desc",

        "missing": 0

      }

    },

    {

      "view_cnt": {

        "order": "desc",

        "missing": 0

      }

    }

  ],

  "aggs": {

    "userID_agg": {

      "terms": {

        "field": "userID",

        "size": 100

      }

    }

  }, 

  

  "collapse": {

    "field": "userID"

  }

}

[code]

### query

GET /article/_search

{

  "from": 0,

  "size": 1000,

  "query": {

    "function_score": {

      "query": {

        "bool": {

          "must": [

            {

              "term": {

                "hidden": {

                  "value": "false"

                }

              }

            },

            {

              "match": {

                "new_author_last_name": "Smith"

              }

            },

            {

              "range": {

                "view_cnt": {

                  "gte": 1

                }

              }

            },

            {

              "bool": {

                "should": [

                  {

                    "simple_query_string": {

                      "query": "i",

                      "fields": ["content"]

                    }

                  }

                ]

              }

            }

          ],

          "must_not": [

            {

              "match": {

                "author_first_name": "Danny"

              }

            }

          ]

        }

      },

      "functions": [

        {

          "filter": {

            "match_all": {

              "boost": 1

            }

          },

          "field_value_factor": {

            "field": "follower_num",

            "factor": 1,

            "missing": 22.5,

            "modifier": "none"

          }

        }

      ]

    }

  },

  "_source": false,

  "sort": [

    {

      "_score": {

        "order": "desc"

      }

    },

    {

      "follower_num": {

        "order": "desc",

        "missing": 0

      }

    },

    {

      "view_cnt": {

        "order": "desc",

        "missing": 0

      }

    }

  ],



  "collapse": {

    "field": "userID"

  }

}

















### agg

GET /article/_search

{

  "size": 0,

  "query": {

    "function_score": {

      "query": {

        "bool": {

          "must": [

            {

              "term": {

                "hidden": {

                  "value": "false"

                }

              }

            },

            {

              "match": {

                "new_author_last_name": "Smith"

              }

            },

            {

              "range": {

                "view_cnt": {

                  "gte": 1

                }

              }

            },

            {

              "bool": {

                "should": [

                  {

                    "simple_query_string": {

                      "query": "i",

                      "fields": ["content"]

                    }

                  }

                ]

              }

            }

          ],

          "must_not": [

            {

              "match": {

                "author_first_name": "Danny"

              }

            }

          ]

        }

      },

      "functions": [

        {

          "filter": {

            "match_all": {

              "boost": 1

            }

          },

          "field_value_factor": {

            "field": "follower_num",

            "factor": 1,

            "missing": 22.5,

            "modifier": "none"

          }

        }

      ]

    }

  },

  "_source": false,

 

  "aggs": {

    "userID_agg": {

      "terms": {

        "field": "userID",

        "size": 100

      }

    }

  }

}

[/code]

1 个回复

tongchuan1992 - 学无止境、学以致用

赞同来自: Ryze

不会，es跟数据库一样，都是会对语句进行优化的，你这两条语句命中的文档数是一样的，所有并不会提升性能。我感觉第二种可能更快一点。

要回复问题请先登录或注册

elasticsearch query与agg分开是否可以提高性能？

1 个回复

发起人

活动推荐

相关问题

问题状态

elasticsearch query与agg分开是否可以提高性能？

与内容相关的链接

1 个回复

发起人

活动推荐

相关问题

问题状态