首页 > 代码库 > elasticsearh搜索过滤filter

elasticsearh搜索过滤filter

首先要讲下,为什么需要使用filter过滤

  • 过滤并不会返回一个匹配度score,以至于它比查询要快很多

  • 过滤查询后的结果能被缓存到内存中,并被多次重复使用.


1.如果我们要查询出account中blance从20000到30000之间的数据

 curl -XPOST localhost:9200/bank/_search?pretty -d ‘{
    "query":{
        "filtered":{
            "query":{
                "match_all":{}},
                "filter":{
                    "range":{
                        "balance":{
                            "gte":20000,
                            "lte":30000
                          }
                       }
                  }
             }
         }
}‘
{
  "took" : 102,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "failed" : 0
  },
  "hits" : {
    "total" : 217,
    "max_score" : 1.0,
    "hits" : [ {
      "_index" : "bank",
      "_type" : "account",
      "_id" : "49",
      "_score" : 1.0,
      "_source":{"account_number":49,"balance":29104,"firstname":"Fulton","lastname":"Holt","age":23,"gender":"F","address":"451 Humboldt Street","employer":"Anocha","email":"fultonholt@anocha.com","city":"Sunriver","state":"RI"}
    }, {
      "_index" : "bank",
      "_type" : "account",
      "_id" : "102",
      "_score" : 1.0,
      "_source":{"account_number":102,"balance":29712,"firstname":"Dena","lastname":"Olson","age":27,"gender":"F","address":"759 Newkirk Avenue","employer":"Hinway","email":"denaolson@hinway.com","city":"Choctaw","state":"NJ"}
    }, {
      "_index" : "bank",
      "_type" : "account",
      "_id" : "133",
      "_score" : 1.0,
      "_source":{"account_number":133,"balance":26135,"firstname":"Deena","lastname":"Richmond","age":36,"gender":"F","address":"646 Underhill Avenue","employer":"Sunclipse","email":"deenarichmond@sunclipse.com","city":"Austinburg","state":"SC"}
    }, {
      "_index" : "bank",
      "_type" : "account",
      "_id" : "140",
      "_score" : 1.0,
      "_source":{"account_number":140,"balance":26696,"firstname":"Cotton","lastname":"Christensen","age":32,"gender":"M","address":"878 Schermerhorn Street","employer":"Prowaste","email":"cottonchristensen@prowaste.com","city":"Mayfair","state":"LA"}
    }, {
      "_index" : "bank",
      "_type" : "account",
      "_id" : "203",
      "_score" : 1.0,
      "_source":{"account_number":203,"balance":21890,"firstname":"Eve","lastname":"Wyatt","age":33,"gender":"M","address":"435 Furman Street","employer":"Assitia","email":"evewyatt@assitia.com","city":"Jamestown","state":"MN"}
    }, {
      "_index" : "bank",
      "_type" : "account",
      "_id" : "239",
      "_score" : 1.0,
      "_source":{"account_number":239,"balance":25719,"firstname":"Chang","lastname":"Boyer","age":36,"gender":"M","address":"895 Brigham Street","employer":"Qaboos","email":"changboyer@qaboos.com","city":"Belgreen","state":"NH"}
    }, {
      "_index" : "bank",
      "_type" : "account",
      "_id" : "241",
      "_score" : 1.0,
      "_source":{"account_number":241,"balance":25379,"firstname":"Schroeder","lastname":"Harrington","age":26,"gender":"M","address":"610 Tapscott Avenue","employer":"Otherway","email":"schroederharrington@otherway.com","city":"Ebro","state":"TX"}
    }, {
      "_index" : "bank",
      "_type" : "account",
      "_id" : "246",
      "_score" : 1.0,
      "_source":{"account_number":246,"balance":28405,"firstname":"Katheryn","lastname":"Foster","age":21,"gender":"F","address":"259 Kane Street","employer":"Quantalia","email":"katherynfoster@quantalia.com","city":"Bath","state":"TX"}
    }, {
      "_index" : "bank",
      "_type" : "account",
      "_id" : "253",
      "_score" : 1.0,
      "_source":{"account_number":253,"balance":20240,"firstname":"Melissa","lastname":"Gould","age":31,"gender":"M","address":"440 Fuller Place","employer":"Buzzopia","email":"melissagould@buzzopia.com","city":"Lumberton","state":"MD"}
    }, {
      "_index" : "bank",
      "_type" : "account",
      "_id" : "277",
      "_score" : 1.0,
      "_source":{"account_number":277,"balance":29564,"firstname":"Romero","lastname":"Lott","age":31,"gender":"M","address":"456 Danforth Street","employer":"Plasto","email":"romerolott@plasto.com","city":"Vincent","state":"VT"}
    } ]
  }
}

可以看到查询的结果都是在指定范围内

2.当然elasticsearch能够像sql一样使用聚合函数

curl -XPOST ‘localhost:9200/bank/_search?pretty‘ -d ‘
{
  "size": 0,
  "aggs": {
    "group_by_state": {
      "terms": {
        "field": "state"
      }
    }
  }
}‘

这个例子其实就是根据state字段进行分组.相当于下列sql语句

SELECT COUNT(*) from bank GROUP BY state ORDER BY COUNT(*) DESC

结果为:

{
  "took" : 190,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "failed" : 0
  },
  "hits" : {
    "total" : 1000,
    "max_score" : 0.0,
    "hits" : [ ]
  },
  "aggregations" : {
    "group_by_state" : {
      "buckets" : [ {
        "key" : "al",
        "doc_count" : 21
      }, {
        "key" : "tx",
        "doc_count" : 17
      }, {
        "key" : "id",
        "doc_count" : 15
      }, {
        "key" : "ma",
        "doc_count" : 15
      }, {
        "key" : "md",
        "doc_count" : 15
      }, {
        "key" : "pa",
        "doc_count" : 15
      }, {
        "key" : "dc",
        "doc_count" : 14
      }, {
        "key" : "me",
        "doc_count" : 14
      }, {
        "key" : "mo",
        "doc_count" : 14
      }, {
        "key" : "nd",
        "doc_count" : 14
      } ]
    }
  }
}

其中key就是分组中的state值,doc_count就是个数.group_by_state只是分组的一个别名

我们再使用gender来进行分组

curl -XPOST ‘localhost:9200/bank/_search?pretty‘ -d ‘
{
  "size": 0,
  "aggs": {
    "group_by_gender": {
      "terms": {
        "field": "gender"
      }
    }
  }
}‘
{
  "took" : 30,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "failed" : 0
  },
  "hits" : {
    "total" : 1000,
    "max_score" : 0.0,
    "hits" : [ ]
  },
  "aggregations" : {
    "group_by_gender" : {
      "buckets" : [ {
        "key" : "m",
        "doc_count" : 507
      }, {
        "key" : "f",
        "doc_count" : 493
      } ]
    }
  }
}

可以发现M的有507个,f的有493个,可以看到这两次查询都设置了size为0,因为我们不想显示匹配的一条条数据,只想看聚合的结果.如果去掉size=0,那么hits节点下hits中会存在数据.

如果我需要不仅要查询state的分组信息,还要查询出各个分组中的blance的平均数

curl -XPOST ‘localhost:9200/bank/_search?pretty‘ -d ‘
{
  "size": 0,
  "aggs": {
    "group_by_state": {
      "terms": {
        "field": "state"
        },
        "aggs": {
        "average_balance": {
          "avg": {
            "field": "balance"
              }
          }
      }
    }
  }
}‘
{
  "took" : 34,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "failed" : 0
  },
  "hits" : {
    "total" : 1000,
    "max_score" : 0.0,
    "hits" : [ ]
  },
  "aggregations" : {
    "group_by_state" : {
      "buckets" : [ {
        "key" : "al",
        "doc_count" : 21,
        "average_balance" : {
          "value" : 25377.571428571428
        }
      }, {
        "key" : "tx",
        "doc_count" : 17,
        "average_balance" : {
          "value" : 22466.058823529413
        }
      }, {
        "key" : "id",
        "doc_count" : 15,
        "average_balance" : {
          "value" : 23614.933333333334
        }
      }, {
        "key" : "ma",
        "doc_count" : 15,
        "average_balance" : {
          "value" : 29064.666666666668
        }
      }, {
        "key" : "md",
        "doc_count" : 15,
        "average_balance" : {
          "value" : 20143.733333333334
        }
      }, {
        "key" : "pa",
        "doc_count" : 15,
        "average_balance" : {
          "value" : 25320.933333333334
        }
      }, {
        "key" : "dc",
        "doc_count" : 14,
        "average_balance" : {
          "value" : 24543.64285714286
        }
      }, {
        "key" : "me",
        "doc_count" : 14,
        "average_balance" : {
          "value" : 20061.14285714286
        }
      }, {
        "key" : "mo",
        "doc_count" : 14,
        "average_balance" : {
          "value" : 25414.64285714286
        }
      }, {
        "key" : "nd",
        "doc_count" : 14,
        "average_balance" : {
          "value" : 31717.571428571428
        }
      } ]
    }
  }
}

注意average_balance是别名

如果我需要根据查出的balance平均数进行一个排序呢?

curl -XPOST ‘localhost:9200/bank/_search?pretty‘ -d ‘
{
  "size": 0,
  "aggs": {
    "group_by_state": {
      "terms": {
        "field": "state",
        "order": {
          "average_balance": "desc"
        }
      },
      "aggs": {
        "average_balance": {
          "avg": {
            "field": "balance"
          }
        }
      }
    }
  }
}‘

3.我需要将20-29,30-39,40-49这三个年龄段的账户信息进行分组

curl -XPOST ‘localhost:9200/bank/_search?pretty‘ -d ‘
{
  "size": 0,
  "aggs": {
    "group_by_age": {
      "range": {
        "field": "age",
        "ranges": [
          {
            "from": 20,
            "to": 30
          },
          {
            "from": 30,
            "to": 40
          },
          {
            "from": 40,
            "to": 50
          }
        ]
      },
      "aggs": {
        "group_by_gender": {
          "terms": {
            "field": "gender"
          },
          "aggs": {
            "average_balance": {
              "avg": {
                "field": "balance"
              }
            }
          }
        }
      }
    }
  }
}‘
{
  "took" : 21,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "failed" : 0
  },
  "hits" : {
    "total" : 1000,
    "max_score" : 0.0,
    "hits" : [ ]
  },
  "aggregations" : {
    "group_by_age" : {
      "buckets" : [ {
        "key" : "20.0-30.0",
        "from" : 20.0,
        "from_as_string" : "20.0",
        "to" : 30.0,
        "to_as_string" : "30.0",
        "doc_count" : 451,
        "group_by_gender" : {
          "buckets" : [ {
            "key" : "m",
            "doc_count" : 232,
            "average_balance" : {
              "value" : 27374.05172413793
            }
          }, {
            "key" : "f",
            "doc_count" : 219,
            "average_balance" : {
              "value" : 25341.260273972603
            }
          } ]
        }
      }, {
        "key" : "30.0-40.0",
        "from" : 30.0,
        "from_as_string" : "30.0",
        "to" : 40.0,
        "to_as_string" : "40.0",
        "doc_count" : 504,
        "group_by_gender" : {
          "buckets" : [ {
            "key" : "f",
            "doc_count" : 253,
            "average_balance" : {
              "value" : 25670.869565217392
            }
          }, {
            "key" : "m",
            "doc_count" : 251,
            "average_balance" : {
              "value" : 24288.239043824702
            }
          } ]
        }
      }, {
        "key" : "40.0-50.0",
        "from" : 40.0,
        "from_as_string" : "40.0",
        "to" : 50.0,
        "to_as_string" : "50.0",
        "doc_count" : 45,
        "group_by_gender" : {
          "buckets" : [ {
            "key" : "m",
            "doc_count" : 24,
            "average_balance" : {
              "value" : 26474.958333333332
            }
          }, {
            "key" : "f",
            "doc_count" : 21,
            "average_balance" : {
              "value" : 27992.571428571428
            }
          } ]
        }
      } ]
    }
  }
}

可以看到结果如上.

elasticsearh搜索过滤filter