Elasticsearch 术语查询按 geo_point 分组
Elasticsearch term query grouped by geo_point
我想用 elasticsearch 做一个 group_by。
我存储了一个文本字段:
"text": {
"type": "string",
"store": true,
"analyzer" : "twittsTokenizor"
},
和地理 pint 字段
"geo": {
"type": "geo_point",
"store": true
}
我正在尝试在我的文本字段中获取按我的位置分组的最常用术语,但...它不起作用。
如果这样的查询有效(如果我在查询中定义了我的位置):
curl -XGET http://localhost:9200/twitter/_search -d '{
"query" : {
"match_all" : {}
},
"filter" : {
"bool" : {
"must" : [{
"range" : {
"created_at" : {
"from" : "Mon Feb 22 14:04:23 +0000 2015",
"to" : "Wed Feb 23 22:06:25 +0000 2015"
}}
},{
"geo_distance" : {
"distance" : "100km",
"geo" : { "lat" : 48.856506, "lon" : 2.352133 }
}}
]
}
},
"facets" : {
"tag" : {
"terms" : {
"field" : "text" ,
"size" : 10
}
}
}
}'
这不起作用:
curl -XGET http://localhost:9200/twitter/_search -d '{
"query" : {
"match_all" : {
}
},
"aggs" : {
"geo1" : {
"terms" : {
"field" : "geo"
}
},
"tag" : {
"terms" : {
"field" : "text" ,
"size" : 10
}
}
}
}
}'
这行不通:
curl -XGET http://localhost:9200/twitter/_search -d '{
"query" : {
"match_all" : {
}
},
"facets" : {
"tag" : {
"terms" : {
"field" : "text" ,
"size" : 10
}
},
"geo1" : {
"terms" : {
"field" : "geo"
}
}
}
}
}'
而且 facet_filters 没有完成任务。
我做错了什么?有可能吗?
非常感谢。
编辑:这是我的映射:
curl -s -XPUT "http://localhost:9200/twitter" -d '
{
"settings": {
"analysis": {
"analyzer": {
"twittsTokenizor" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : [
"french_elision",
"asciifolding",
"lowercase",
"french_stop",
"english_stop",
"twitter_stop"
]
}
},
"filter" : {
"french_elision" : {
"type" : "elision",
"articles" : [ "l", "m", "t", "qu", "n", "s",
"j", "d", "c", "jusqu", "quoiqu",
"lorsqu", "puisqu"
]
},
"asciifolding" : {
"type" : "asciifolding"
},
"french_stop": {
"type": "stop",
"stopwords": "_french_"
},
"english_stop": {
"type": "stop",
"stopwords": "_english_"
},
"twitter_stop" : {
"type" : "stop",
"stopwords": ["RT", "FAV", "TT", "FF", "rt"]
}
}
}
},
"mappings": {
"twitter": {
"properties": {
"id": {
"type": "long",
"store": true
},
"text": {
"type": "string",
"store": true,
"analyzer" : "twittsTokenizor"
},
"created_at": {
"type": "date",
"format": "EE MMM d HH:mm:ss Z yyyy",
"store": true
},
"location": {
"type": "string",
"store": true
},
"geo": {
"type": "geo_point",
"store": true
}
}
}
}
}'
和数据样本:
{ "_id" : ObjectId("54eb3c35a710901a698b4567"), "country" : "FR", "created_at" : "Mon Feb 23 14:25:30 +0000 2015", "geo" : { "lat" : 49.119696, "lon" : 6.176355 }, "id" : -812216320, "location" : "Metz ", "text" : "Passer des vacances formidable avec des gens géniaux sans aucunes pression avec pour seul soucis s'éclater et se laisser vivre #BONHEUR"}
如果我对你的理解是正确的,你想获得每个地理点最常用的术语吗?然后你需要两级聚合,首先是 geohash_grid
聚合,然后是 terms
聚合:
{
"query": {
"match_all": {}
},
"aggs": {
//Buckets for each geohash grid
"geo1": {
"geohash_grid": {
"field":"geo",
"precision": 5
},
"aggs": {
//Buckets for each unique text-tag in this geo point bucket, maximum of 10 buckets
"tags": {
"terms": {
"field": "text",
"size": 10
}
}
}
}
}
}
或者您可以使用 geo_distance
聚合
我想用 elasticsearch 做一个 group_by。
我存储了一个文本字段:
"text": {
"type": "string",
"store": true,
"analyzer" : "twittsTokenizor"
},
和地理 pint 字段
"geo": {
"type": "geo_point",
"store": true
}
我正在尝试在我的文本字段中获取按我的位置分组的最常用术语,但...它不起作用。
如果这样的查询有效(如果我在查询中定义了我的位置):
curl -XGET http://localhost:9200/twitter/_search -d '{
"query" : {
"match_all" : {}
},
"filter" : {
"bool" : {
"must" : [{
"range" : {
"created_at" : {
"from" : "Mon Feb 22 14:04:23 +0000 2015",
"to" : "Wed Feb 23 22:06:25 +0000 2015"
}}
},{
"geo_distance" : {
"distance" : "100km",
"geo" : { "lat" : 48.856506, "lon" : 2.352133 }
}}
]
}
},
"facets" : {
"tag" : {
"terms" : {
"field" : "text" ,
"size" : 10
}
}
}
}'
这不起作用:
curl -XGET http://localhost:9200/twitter/_search -d '{
"query" : {
"match_all" : {
}
},
"aggs" : {
"geo1" : {
"terms" : {
"field" : "geo"
}
},
"tag" : {
"terms" : {
"field" : "text" ,
"size" : 10
}
}
}
}
}'
这行不通:
curl -XGET http://localhost:9200/twitter/_search -d '{
"query" : {
"match_all" : {
}
},
"facets" : {
"tag" : {
"terms" : {
"field" : "text" ,
"size" : 10
}
},
"geo1" : {
"terms" : {
"field" : "geo"
}
}
}
}
}'
而且 facet_filters 没有完成任务。
我做错了什么?有可能吗? 非常感谢。
编辑:这是我的映射:
curl -s -XPUT "http://localhost:9200/twitter" -d '
{
"settings": {
"analysis": {
"analyzer": {
"twittsTokenizor" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : [
"french_elision",
"asciifolding",
"lowercase",
"french_stop",
"english_stop",
"twitter_stop"
]
}
},
"filter" : {
"french_elision" : {
"type" : "elision",
"articles" : [ "l", "m", "t", "qu", "n", "s",
"j", "d", "c", "jusqu", "quoiqu",
"lorsqu", "puisqu"
]
},
"asciifolding" : {
"type" : "asciifolding"
},
"french_stop": {
"type": "stop",
"stopwords": "_french_"
},
"english_stop": {
"type": "stop",
"stopwords": "_english_"
},
"twitter_stop" : {
"type" : "stop",
"stopwords": ["RT", "FAV", "TT", "FF", "rt"]
}
}
}
},
"mappings": {
"twitter": {
"properties": {
"id": {
"type": "long",
"store": true
},
"text": {
"type": "string",
"store": true,
"analyzer" : "twittsTokenizor"
},
"created_at": {
"type": "date",
"format": "EE MMM d HH:mm:ss Z yyyy",
"store": true
},
"location": {
"type": "string",
"store": true
},
"geo": {
"type": "geo_point",
"store": true
}
}
}
}
}'
和数据样本:
{ "_id" : ObjectId("54eb3c35a710901a698b4567"), "country" : "FR", "created_at" : "Mon Feb 23 14:25:30 +0000 2015", "geo" : { "lat" : 49.119696, "lon" : 6.176355 }, "id" : -812216320, "location" : "Metz ", "text" : "Passer des vacances formidable avec des gens géniaux sans aucunes pression avec pour seul soucis s'éclater et se laisser vivre #BONHEUR"}
如果我对你的理解是正确的,你想获得每个地理点最常用的术语吗?然后你需要两级聚合,首先是 geohash_grid
聚合,然后是 terms
聚合:
{
"query": {
"match_all": {}
},
"aggs": {
//Buckets for each geohash grid
"geo1": {
"geohash_grid": {
"field":"geo",
"precision": 5
},
"aggs": {
//Buckets for each unique text-tag in this geo point bucket, maximum of 10 buckets
"tags": {
"terms": {
"field": "text",
"size": 10
}
}
}
}
}
}
或者您可以使用 geo_distance
聚合