在 elasticsearch 中处理大量分片
Handle large amount of shards in elasticsearch
每个商店都有自己的一组库存,这些库存作为一个索引存储在 elasticsearch 中。目前,我有 11,000 家商店,这意味着它有 11,000 个带有索引的分片,很难在 32 GB ram 服务器中获取数据。
谁能指导如何在 elasticsearch 中存储每个商店的库存,因为为每个商店库存创建单独的索引对我没有帮助?
下面是索引的映射。使用的 Elasticsearch 版本是 6.0.1
{
"staging_shop_inventory_558" : {
"mappings" : {
"shop_inventory" : {
"properties" : {
"alternate_name" : {
"type" : "text",
"analyzer" : "standard"
},
"brand" : {
"properties" : {
"created_at" : {
"type" : "date"
},
"id" : {
"type" : "integer"
},
"image" : {
"type" : "text",
"index" : false
},
"is_selected" : {
"type" : "boolean"
},
"name" : {
"type" : "text",
"analyzer" : "standard"
},
"updated_at" : {
"type" : "date"
}
}
},
"brand_autocomplete" : {
"type" : "text",
"analyzer" : "autocomplete",
"search_analyzer" : "autocomplete_search"
},
"brand_suggest" : {
"type" : "text",
"analyzer" : "ngram_analyzer"
},
"category" : {
"properties" : {
"id" : {
"type" : "integer"
},
"image" : {
"type" : "text",
"index" : false
},
"name" : {
"type" : "text",
"analyzer" : "standard"
}
}
},
"created_at" : {
"type" : "date"
},
"deleted_at" : {
"type" : "date"
},
"id" : {
"type" : "integer"
},
"image" : {
"type" : "text",
"index" : false
},
"is_deleted" : {
"type" : "boolean"
},
"name" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
},
"analyzer" : "gramAnalyzer",
"search_analyzer" : "whitespace_analyzer"
},
"name_autocomplete" : {
"type" : "text",
"analyzer" : "autocomplete",
"search_analyzer" : "autocomplete_search"
},
"name_suggest" : {
"type" : "text",
"analyzer" : "ngram_analyzer"
},
"product_id" : {
"type" : "integer"
},
"product_sizes" : {
"type" : "nested",
"properties" : {
"deleted_at" : {
"type" : "date"
},
"description" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ean_code" : {
"type" : "keyword"
},
"id" : {
"type" : "integer"
},
"in_stock" : {
"type" : "boolean"
},
"is_deleted" : {
"type" : "boolean"
},
"price" : {
"type" : "float"
},
"product_id" : {
"type" : "long"
},
"product_update_on" : {
"type" : "date"
},
"product_update_status" : {
"type" : "integer"
},
"uom" : {
"type" : "keyword"
},
"weight" : {
"type" : "float"
}
}
},
"sub_category" : {
"properties" : {
"created_at" : {
"type" : "date"
},
"id" : {
"type" : "integer"
},
"image" : {
"type" : "text",
"index" : false
},
"is_selected" : {
"type" : "boolean"
},
"name" : {
"type" : "text",
"analyzer" : "standard"
},
"updated_at" : {
"type" : "date"
}
}
},
"sub_category_suggest" : {
"type" : "text",
"analyzer" : "gramAnalyzer",
"search_analyzer" : "whitespace_analyzer"
}
}
}
}
}
}
哇,你的节点有 11k 分片(根据副本因素可能会有更多)只有 32 GB RAM(再次注意这不是分配给 Elasticsearch 进程的 JVM)其性能在很大程度上取决于 JVM 大小和 beyond 32GB deteriorates.
Elasticsearch 是一个分布式系统,可以轻松扩展到数千个节点,您应该在集群中添加更多节点并将 Elasticsearch 索引分发到集群中的所有节点。
每个商店都有自己的一组库存,这些库存作为一个索引存储在 elasticsearch 中。目前,我有 11,000 家商店,这意味着它有 11,000 个带有索引的分片,很难在 32 GB ram 服务器中获取数据。
谁能指导如何在 elasticsearch 中存储每个商店的库存,因为为每个商店库存创建单独的索引对我没有帮助?
下面是索引的映射。使用的 Elasticsearch 版本是 6.0.1
{
"staging_shop_inventory_558" : {
"mappings" : {
"shop_inventory" : {
"properties" : {
"alternate_name" : {
"type" : "text",
"analyzer" : "standard"
},
"brand" : {
"properties" : {
"created_at" : {
"type" : "date"
},
"id" : {
"type" : "integer"
},
"image" : {
"type" : "text",
"index" : false
},
"is_selected" : {
"type" : "boolean"
},
"name" : {
"type" : "text",
"analyzer" : "standard"
},
"updated_at" : {
"type" : "date"
}
}
},
"brand_autocomplete" : {
"type" : "text",
"analyzer" : "autocomplete",
"search_analyzer" : "autocomplete_search"
},
"brand_suggest" : {
"type" : "text",
"analyzer" : "ngram_analyzer"
},
"category" : {
"properties" : {
"id" : {
"type" : "integer"
},
"image" : {
"type" : "text",
"index" : false
},
"name" : {
"type" : "text",
"analyzer" : "standard"
}
}
},
"created_at" : {
"type" : "date"
},
"deleted_at" : {
"type" : "date"
},
"id" : {
"type" : "integer"
},
"image" : {
"type" : "text",
"index" : false
},
"is_deleted" : {
"type" : "boolean"
},
"name" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
},
"analyzer" : "gramAnalyzer",
"search_analyzer" : "whitespace_analyzer"
},
"name_autocomplete" : {
"type" : "text",
"analyzer" : "autocomplete",
"search_analyzer" : "autocomplete_search"
},
"name_suggest" : {
"type" : "text",
"analyzer" : "ngram_analyzer"
},
"product_id" : {
"type" : "integer"
},
"product_sizes" : {
"type" : "nested",
"properties" : {
"deleted_at" : {
"type" : "date"
},
"description" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ean_code" : {
"type" : "keyword"
},
"id" : {
"type" : "integer"
},
"in_stock" : {
"type" : "boolean"
},
"is_deleted" : {
"type" : "boolean"
},
"price" : {
"type" : "float"
},
"product_id" : {
"type" : "long"
},
"product_update_on" : {
"type" : "date"
},
"product_update_status" : {
"type" : "integer"
},
"uom" : {
"type" : "keyword"
},
"weight" : {
"type" : "float"
}
}
},
"sub_category" : {
"properties" : {
"created_at" : {
"type" : "date"
},
"id" : {
"type" : "integer"
},
"image" : {
"type" : "text",
"index" : false
},
"is_selected" : {
"type" : "boolean"
},
"name" : {
"type" : "text",
"analyzer" : "standard"
},
"updated_at" : {
"type" : "date"
}
}
},
"sub_category_suggest" : {
"type" : "text",
"analyzer" : "gramAnalyzer",
"search_analyzer" : "whitespace_analyzer"
}
}
}
}
}
}
哇,你的节点有 11k 分片(根据副本因素可能会有更多)只有 32 GB RAM(再次注意这不是分配给 Elasticsearch 进程的 JVM)其性能在很大程度上取决于 JVM 大小和 beyond 32GB deteriorates.
Elasticsearch 是一个分布式系统,可以轻松扩展到数千个节点,您应该在集群中添加更多节点并将 Elasticsearch 索引分发到集群中的所有节点。