Elasticsearch 使用 group by 查询字段的所有值

Elasticsearch query for all values of field with group by

我在形成查询时遇到问题,无法通过 sql 分组获取所有值。

下面是我的数据结构:

product index:

{
  "createdBy" : "61c1fcdd88dbad1920da8caf",
  "creationTime" : "2021-12-22T11:58:53.576932Z",
  "lastModifiedBy" : "61c1fcdd88dbad1920da8caf",
  "lastModificationTime" : "2021-12-22T11:58:53.576932Z",
  "id" : "61c312fdc6aa620a609db0b2",
  "title" : "string",
  "brand" : "string",
  "longDesc" : "string",
  "categoryId" : "string",
  "imageUrls" : [
    "string",
    "string"
  ],
  "keySpecs" : [
    "string",
    "string",
  ],
  "facets" : [
    {
      "name" : "color",
      "value" : "red"
    },
    {
      "name" : "storage",
      "value" : "16 GB"
    },
    {
      "name" : "brand",
      "value" : "Intex"
    }
  ],
  "categoryName" : "handsets"
}

现在,我想获取具有不同值的所有方面并进行计数。比方说

  1. productA 为蓝色,productB 为红色
  2. 产品 A 有品牌 ABC,产品 B 有品牌 XYZ

所以,我想要列出所有方面的数据,例如:

  1. 颜色:蓝色(200 个),红色(12 个)
  2. 品牌:ABC(13 个),XYZ(99 个)

此外,不同的产品会有不同类型的切面,例如iphone会有颜色记忆品牌尺寸,但一支笔只有颜色和品牌(而不是memory/size)。

注意:我使用的是最新版本的 elastic

=================

更新 1: 以下是 es 映射详情

{
  "settings": {
    "analysis": {
      "filter": {
        "english_stop": {
          "type": "stop",
          "stopwords": "_english_"
        },
        "english_keywords": {
          "type": "keyword_marker",
          "keywords": [
            "example"
          ]
        },
        "english_stemmer": {
          "type": "stemmer",
          "language": "english"
        },
        "english_possessive_stemmer": {
          "type": "stemmer",
          "language": "possessive_english"
        }
      },
      "analyzer": {
        "lalashree_standard_analyzer": {
          "tokenizer": "standard",
          "filter": [
            "english_possessive_stemmer",
            "lowercase",
            "english_stop",
            "english_keywords",
            "english_stemmer"
          ]
        },
        "html_standard_analyzer": {
          "char_filter": [
            "html_strip"
          ],
          "tokenizer": "standard",
          "filter": [
            "english_possessive_stemmer",
            "lowercase",
            "english_stop",
            "english_keywords",
            "english_stemmer"
          ]
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "id": {
        "type": "keyword"
      },
      "createdBy": {
        "type": "keyword"
      },
      "creationTime": {
        "type": "date"
      },
      "lastModifiedBy": {
        "type": "keyword"
      },
      "lastModificationTime": {
        "type": "date"
      },
      "deleted": {
        "type": "boolean"
      },
      "deletedBy": {
        "type": "keyword"
      },
      "deletionTime": {
        "type": "date"
      },

      "title": {
        "type": "text",
        "analyzer": "lalashree_standard_analyzer",
        "fields": {
          "suggest": {
            "type": "completion"
          }
        }
      },
      "shortDesc": {
        "type": "text",
        "analyzer": "lalashree_standard_analyzer"
      },
      "longDesc": {
        "type": "text",
        "analyzer": "lalashree_standard_analyzer"
      },
      "categoryId": {
        "type": "keyword"
      },
      "searchDetails": {
        "type": "object",
        "properties": {
          "desc": {
            "type": "text",
            "analyzer": "lalashree_standard_analyzer"
          },
          "keywords": {
            "type": "text",
            "analyzer": "lalashree_standard_analyzer",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
        }
      },
      "imageUrls": {
        "type": "keyword",
        "index": false
      },
      "keySpecs": {
        "type": "text",
        "analyzer": "lalashree_standard_analyzer"
      },
      "sections": {
        "type": "object",
        "properties": {
          "name": {
            "type": "text",
            "index": false
          },
          "shortDesc": {
            "type": "text",
            "analyzer": "lalashree_standard_analyzer"
          },
          "longDesc": {
            "type": "text",
            "analyzer": "lalashree_standard_analyzer"
          },
          "htmlContent": {
            "type": "text",
            "analyzer": "html_standard_analyzer"
          }
        }
      },
      "facets": {
        "type": "nested",
        "properties": {
          "name": {
            "type": "keyword"
          },
          "value": {
            "type": "keyword"
          }
        }
      },
      "specificationItems": {
        "type": "object",
        "properties": {
          "key": {
            "type": "text",
            "analyzer": "lalashree_standard_analyzer",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "values": {
            "type": "text",
            "analyzer": "lalashree_standard_analyzer"
          }
        }
      },
      "categoryName": {
        "type": "keyword"
      },
      "productFamily": {
        "type": "nested",
        "properties": {
          "id": {
            "type": "keyword"
          },
          "familyVariantOptions": {
            "type": "nested",
            "properties": {
              "name": {
                "type": "keyword"
              },
              "values": {
                "type": "keyword"
              }
            }
          },
          "productFamilyItems": {
            "type": "nested",
            "properties": {
              "baseProductId": {
                "type": "keyword"
              },
              "itemVariantInfoSet": {
                "type": "nested",
                "properties": {
                  "name": {
                    "type": "keyword"
                  },
                  "value": {
                    "type": "keyword"
                  }
                }
              }
            }
          }
        }
      },
      "rating": {
        "type": "float"
      },
      "totalReviewsCount": {
        "type": "long"
      },
      "stores": {
        "type": "nested",
        "properties": {
          "id": {
            "type": "keyword"
          },
          "logo": {
            "type": "keyword",
            "index": false
          },
          "active": {
            "type": "boolean"
          },
          "name": {
            "type": "text"
          },
          "quantity": {
            "type": "long"
          },
          "rating": {
            "type": "float"
          },
          "totalReviewsCount": {
            "type": "long"
          },
          "price.mrp": {
            "type": "float"
          },
          "price.sp": {
            "type": "float"
          },
          "location.geoPoint": {
            "type": "geo_point"
          },
          "oos": {
            "type": "boolean"
          }
        }
      }
    }
  }
}

此查询首先按名称分组,然后对每个名称的值进行分组。通过设置大小,您可以安排所需的分面数和每个分面中的项目数。我认为它可以满足您的需求。

请注意,如果您有太多文档并且性能很重要,则此查询可能执行不当。

{
  "size": 0, 
  "aggs": {
    "facets": {
      "nested": {
        "path": "facets"
      },
      "aggs": {
        "names": {
          "terms": {
            "field": "facets.name",
            "size": 10
          },
          "aggs": {
            "values": {
              "terms": {
                "field": "facets.value",
                "size": 10
              }
            }
          }
        }
      }
    }
  }
}