一、制造数据
1.1 新建索引并增加数据
POST /data/article/_bulk { "index": { "_id": 1 }} { "articleID" : "XHDK-A-1293-#fJ3", "userID" : 1, "hidden": false, "postDate": "2022-01-01" } { "index": { "_id": 2 }} { "articleID" : "KDKE-B-9947-#kL5", "userID" : 1, "hidden": false, "postDate": "2022-01-02" } { "index": { "_id": 3 }} { "articleID" : "JODL-X-1937-#pV7", "userID" : 2, "hidden": false, "postDate": "2022-01-01" } { "index": { "_id": 4 }} { "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden": true, "postDate": "2022-01-02" }
# 追加新增字段 POST /data/article/_bulk {"update":{"_id":"1"}} {"doc":{"title":"this is java and elasticsearch blog"}} {"update":{"_id":"2"}} {"doc":{"title":"this is java blog"}} {"update":{"_id":"3"}} {"doc":{"title":"this is elasticsearch blog"}} {"update":{"_id":"4"}} {"doc":{"title":"this is java, elasticsearch, hadoop blog"}} {"update":{"_id":"5"}} {"doc":{"title":"this is spark blog"}}
# 查看数据 GET data/_search
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security. { "took" : 151, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 4, "relation" : "eq" }, "max_score" : 1.0, "hits" : [ { "_index" : "data", "_type" : "article", "_id" : "1", "_score" : 1.0, "_source" : { "articleID" : "XHDK-A-1293-#fJ3", "userID" : 1, "hidden" : false, "postDate" : "2022-01-01", "title" : "this is java and elasticsearch blog" } }, { "_index" : "data", "_type" : "article", "_id" : "2", "_score" : 1.0, "_source" : { "articleID" : "KDKE-B-9947-#kL5", "userID" : 1, "hidden" : false, "postDate" : "2022-01-02", "title" : "this is java blog" } }, { "_index" : "data", "_type" : "article", "_id" : "3", "_score" : 1.0, "_source" : { "articleID" : "JODL-X-1937-#pV7", "userID" : 2, "hidden" : false, "postDate" : "2022-01-01", "title" : "this is elasticsearch blog" } }, { "_index" : "data", "_type" : "article", "_id" : "4", "_score" : 1.0, "_source" : { "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden" : true, "postDate" : "2022-01-02", "title" : "this is java, elasticsearch, hadoop blog" } } ] } }二、Text分词匹配 2.1 搜索标题中包含java或elasticsearch的条目
- match query,是负责进行全文检索的,分詞匹配的。
GET /data/article/_search { "query": { "match": { "title": "java elasticsearch" } } }
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security. #! [types removal] Specifying types in search requests is deprecated. { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 4, "relation" : "eq" }, "max_score" : 0.6593991, "hits" : [ { "_index" : "data", "_type" : "article", "_id" : "1", "_score" : 0.6593991, "_source" : { "articleID" : "XHDK-A-1293-#fJ3", "userID" : 1, "hidden" : false, "postDate" : "2022-01-01", "title" : "this is java and elasticsearch blog" } }, { "_index" : "data", "_type" : "article", "_id" : "4", "_score" : 0.6593991, "_source" : { "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden" : true, "postDate" : "2022-01-02", "title" : "this is java, elasticsearch, hadoop blog" } }, { "_index" : "data", "_type" : "article", "_id" : "2", "_score" : 0.38845783, "_source" : { "articleID" : "KDKE-B-9947-#kL5", "userID" : 1, "hidden" : false, "postDate" : "2022-01-02", "title" : "this is java blog" } }, { "_index" : "data", "_type" : "article", "_id" : "3", "_score" : 0.38845783, "_source" : { "articleID" : "JODL-X-1937-#pV7", "userID" : 2, "hidden" : false, "postDate" : "2022-01-01", "title" : "this is elasticsearch blog" } } ] } }2.2 搜索标题中包含java和elasticsearch的条目
- 灵活使用and关键字,如果你是希望所有的搜索关键字都要匹配的,那么就用and,可以实现单纯match query无法实现的效果
GET /data/article/_search { "query": { "match": { "title": { "query": "java elasticsearch", "operator": "and" } } } }
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security. #! [types removal] Specifying types in search requests is deprecated. { "took" : 2, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 2, "relation" : "eq" }, "max_score" : 0.6593991, "hits" : [ { "_index" : "data", "_type" : "article", "_id" : "1", "_score" : 0.6593991, "_source" : { "articleID" : "XHDK-A-1293-#fJ3", "userID" : 1, "hidden" : false, "postDate" : "2022-01-01", "title" : "this is java and elasticsearch blog" } }, { "_index" : "data", "_type" : "article", "_id" : "4", "_score" : 0.6593991, "_source" : { "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden" : true, "postDate" : "2022-01-02", "title" : "this is java, elasticsearch, hadoop blog" } } ] } }2.3 搜索包含java,elasticsearch,spark,hadoop,4个关键字中,至少3个的条目
- 指定一些关键字中,必须至少匹配其中的多少个关键字,才能作为结果返回
GET /data/article/_search { "query": { "match": { "title": { "query": "java elasticsearch spark hadhoop", "minimum_should_match": "50%" } } } }
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security. #! [types removal] Specifying types in search requests is deprecated. { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 2, "relation" : "eq" }, "max_score" : 0.6593991, "hits" : [ { "_index" : "data", "_type" : "article", "_id" : "1", "_score" : 0.6593991, "_source" : { "articleID" : "XHDK-A-1293-#fJ3", "userID" : 1, "hidden" : false, "postDate" : "2022-01-01", "title" : "this is java and elasticsearch blog" } }, { "_index" : "data", "_type" : "article", "_id" : "4", "_score" : 0.6593991, "_source" : { "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden" : true, "postDate" : "2022-01-02", "title" : "this is java, elasticsearch, hadoop blog" } } ] } }2.4 用bool组合多个搜索条件,来搜索title
GET /data/article/_search { "query": { "bool": { "must": [ { "match": { "title": "java" } } ], "must_not": { "match": { "title": "spark" } }, "should": [ { "match": { "title": "hadoop" } }, { "match": { "title": "elasticsearch" } } ] } } }
- must是确保必须有这个关键字,同时会根据这个must的条件去计算出document对这个搜索条件的relevance score 在满足must的基础之上,should中的条件,不匹配也可以,但是如果匹配的更多,那么document的relevance score就会更高。
- should是可以影响相关度分数的
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security. #! [types removal] Specifying types in search requests is deprecated. { "took" : 1, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 3, "relation" : "eq" }, "max_score" : 1.7723151, "hits" : [ { "_index" : "data", "_type" : "article", "_id" : "4", "_score" : 1.7723151, "_source" : { "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden" : true, "postDate" : "2022-01-02", "title" : "this is java, elasticsearch, hadoop blog" } }, { "_index" : "data", "_type" : "article", "_id" : "1", "_score" : 0.6593991, "_source" : { "articleID" : "XHDK-A-1293-#fJ3", "userID" : 1, "hidden" : false, "postDate" : "2022-01-01", "title" : "this is java and elasticsearch blog" } }, { "_index" : "data", "_type" : "article", "_id" : "2", "_score" : 0.38845783, "_source" : { "articleID" : "KDKE-B-9947-#kL5", "userID" : 1, "hidden" : false, "postDate" : "2022-01-02", "title" : "this is java blog" } } ] } }2.5 搜索java,hadoop,spark,elasticsearch,至少包含其中3个关键字
- 默认情况下,should是可以不匹配任何一个的,比如上面的搜索中,this is java blog,就不匹配任何一个should条件,但是有个例外的情况,如果没有must的话,那么should中必须至少匹配一个才可以,比如下面的搜索,should中有4个条件,默认情况下,只要满足其中一个条件,就可以匹配作为结果返回。
- 可以精准控制,should的4个条件中,至少匹配几个才能作为结果返回
GET /data/article/_search { "query": { "bool": { "should": [ { "match": { "title": "java" } }, { "match": { "title": "elasticsearch" } }, { "match": { "title": "hadoop" } }, { "match": { "title": "spark" } } ], "minimum_should_match": 3 } } }
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security. #! [types removal] Specifying types in search requests is deprecated. { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 1, "relation" : "eq" }, "max_score" : 1.7723153, "hits" : [ { "_index" : "data", "_type" : "article", "_id" : "4", "_score" : 1.7723153, "_source" : { "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden" : true, "postDate" : "2022-01-02", "title" : "this is java, elasticsearch, hadoop blog" } } ] } }2.6 普通match如何转换为term+should
- 使用诸如上面的match query进行多值搜索的时候,es会在底层自动将这个match query转换为bool的语法 bool should,指定多个搜索词,同时使用term query
转换前:
GET /data/article/_search { "query": { "match": { "title": "java elasticsearch" } } }
转换后:
GET /data/article/_search { "query": { "bool": { "should": [ { "term": { "title": "java" } }, { "term": { "title": "elasticsearch" } } ] } } }
查询数据结果:
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security. #! [types removal] Specifying types in search requests is deprecated. { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 4, "relation" : "eq" }, "max_score" : 0.6593991, "hits" : [ { "_index" : "data", "_type" : "article", "_id" : "1", "_score" : 0.6593991, "_source" : { "articleID" : "XHDK-A-1293-#fJ3", "userID" : 1, "hidden" : false, "postDate" : "2022-01-01", "title" : "this is java and elasticsearch blog" } }, { "_index" : "data", "_type" : "article", "_id" : "4", "_score" : 0.6593991, "_source" : { "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden" : true, "postDate" : "2022-01-02", "title" : "this is java, elasticsearch, hadoop blog" } }, { "_index" : "data", "_type" : "article", "_id" : "2", "_score" : 0.38845783, "_source" : { "articleID" : "KDKE-B-9947-#kL5", "userID" : 1, "hidden" : false, "postDate" : "2022-01-02", "title" : "this is java blog" } }, { "_index" : "data", "_type" : "article", "_id" : "3", "_score" : 0.38845783, "_source" : { "articleID" : "JODL-X-1937-#pV7", "userID" : 2, "hidden" : false, "postDate" : "2022-01-01", "title" : "this is elasticsearch blog" } } ] } }2.6 and match如何转换为term+must
转换前:
GET /data/article/_search { "query": { "match": { "title": { "query": "java elasticsearch", "operator": "and" } } } }
转换后:
GET /data/article/_search { "query": { "bool": { "must": [ { "term": { "title": "java" } }, { "term": { "title": "elasticsearch" } } ] } } }
查询数据结果:
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security. #! [types removal] Specifying types in search requests is deprecated. { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 2, "relation" : "eq" }, "max_score" : 0.6593991, "hits" : [ { "_index" : "data", "_type" : "article", "_id" : "1", "_score" : 0.6593991, "_source" : { "articleID" : "XHDK-A-1293-#fJ3", "userID" : 1, "hidden" : false, "postDate" : "2022-01-01", "title" : "this is java and elasticsearch blog" } }, { "_index" : "data", "_type" : "article", "_id" : "4", "_score" : 0.6593991, "_source" : { "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden" : true, "postDate" : "2022-01-02", "title" : "this is java, elasticsearch, hadoop blog" } } ] } }2.7 minimum_should_match如何转换
转换前:
GET /data/article/_search { "query": { "match": { "title": { "query": "java elasticsearch hadoop spark", "minimum_should_match": "75%" } } } }
转换后:
GET /data/article/_search { "query": { "bool": { "should": [ { "term": { "title": "java" } }, { "term": { "title": "elasticsearch" } }, { "term": { "title": "hadoop" } }, { "term": { "title": "spark" } } ], "minimum_should_match": 3 } } }
查询数据结果:
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security. #! [types removal] Specifying types in search requests is deprecated. { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 1, "relation" : "eq" }, "max_score" : 1.7723153, "hits" : [ { "_index" : "data", "_type" : "article", "_id" : "4", "_score" : 1.7723153, "_source" : { "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden" : true, "postDate" : "2022-01-02", "title" : "this is java, elasticsearch, hadoop blog" } } ] } }2.8 使用sort对查询数据排序,并按照size返回查询的数量
- desc:降序
- asc:升序
GET /data/article/_search?size=2 { "query": { "match": { "title": "java elasticsearch" } }, "sort": { "postDate": { "order": "desc" } } }
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security. #! [types removal] Specifying types in search requests is deprecated. { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 4, "relation" : "eq" }, "max_score" : null, "hits" : [ { "_index" : "data", "_type" : "article", "_id" : "2", "_score" : null, "_source" : { "articleID" : "KDKE-B-9947-#kL5", "userID" : 1, "hidden" : false, "postDate" : "2022-01-02", "title" : "this is java blog" }, "sort" : [ 1641081600000 ] }, { "_index" : "data", "_type" : "article", "_id" : "4", "_score" : null, "_source" : { "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden" : true, "postDate" : "2022-01-02", "title" : "this is java, elasticsearch, hadoop blog" }, "sort" : [ 1641081600000 ] } ] } }