Elasticsearch 查询包括它应该排除的文档,反之亦然

Elasticsearch query including documents it should exclude and vice versa

重要编辑: 下面描述的逻辑似乎都是正确的。我的问题的根源实际上是由我们用来将新数据推送到 ES 数据库的实用程序引起的,而不是由查询本身引起的。我接受了表示查询按预期工作的答案。

我有一个 Elasticsearch 服务器,其映射如下所示(由 curl 'elastic:9200/resourcelibrary/_mapping 输出):

{
    "resourcelibrary": {
        "mappings": {
            "resource": {
                "properties": {
                    "created_at": {
                        "type": "date"
                    },
                    "created_by": {
                        "type": "text"
                    },
                    "custom_key": {
                        "type": "keyword"
                    },
                    "defaultAction": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "default_action": {
                        "type": "keyword"
                    },
                    "description": {
                        "type": "text"
                    },
                    "id": {
                        "type": "text"
                    },
                    "indexed": {
                        "type": "keyword"
                    },
                    "is_searchable": {
                        "type": "keyword"
                    },
                    "key": {
                        "type": "text"
                    },
                    "licenses": {
                        "type": "keyword"
                    },
                    "raw": {
                        "type": "text"
                    },
                    "require_priv": {
                        "type": "keyword"
                    },
                    "source": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        },
                        "fielddata": true
                    },
                    "stat": {
                        "type": "text"
                    },
                    "style_def": {
                        "type": "keyword"
                    },
                    "tags": {
                        "type": "text"
                    },
                    "thumbnail": {
                        "type": "text"
                    },
                    "title": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "type": {
                        "type": "keyword"
                    },
                    "uid": {
                        "type": "text"
                    },
                    "updated_at": {
                        "type": "date"
                    },
                    "updated_by": {
                        "type": "text"
                    }
                }
            }
        }
    }
}

resourcelibrary 集合完全为空的情况下,我向其中添加了以下文档:

[
        {
            'type'         : 'video',
            'uid'          : '2c444278-e0d3-497b-9b5b-b70756b0fdc0',
            'key'          : 'test-test',
            'custom_key'   : 'test-test',
            'description'  : 'Random text just to fill up the description. Also, math',
            'privileged'   : [],
            'require_priv' : true,
            'title'        : 'Title!!!',
            'defaultAction': '9dfcdb39-6644-4023-82c3-8227ba184c02',
            'source'       : 'frontline'
        },
        {
            'type'         : 'course',
            'uid'          : '8afb5c95-c7b5-498a-abec-ae829d164964',
            'key'          : 'test-scorm',
            'custom_key'   : 'test-scorm',
            'description'  : 'SCORM!!!',
            'privileged'   : [],
            'require_priv' : true,
            'title'        : 'SCORM!!!',
            'defaultAction': '1302dead-9941-4b90-b35c-30eff4993365',
            'source'       : 'scormcloud'
        },
        {
            'type'         : 'mc',
            'uid'          : '8e66c2fa-6090-49da-91dd-d939124fef90',
            'key'          : 'test-mc',
            'custom_key'   : 'test-mc',
            'description'  : 'MC!!!',
            'require_priv' : false,
            'title'        : 'MC!!!',
            'defaultAction': '7957b8f5-c934-4296-b7bb-70f2cc4b2ad0',
            'source'       : 'edivate'
        },
        {
            'type'         : 'group',
            'uid'          : '80a908c3-dd6c-4902-9f05-647a8af689ac',
            'key'          : 'test-group',
            'custom_key'   : 'test-group',
            'description'  : 'GROUP!!!',
            'require_priv' : false,
            'title'        : 'GROUP!!!',
            'defaultAction': '25b700a5-7563-4d6e-9eab-18465d08a683',
            'source'       : 'two words'
        },
        {
            'type'         : 'video',
            'uid'          : '3d555389-e0d3-497b-9b5b-c81867c10ed1',
            'key'          : 'test-video',
            'custom_key'   : 'test-video',
            'description'  : 'Random text just to fill up the description. Also, science',
            'require_priv' : false,
            'title'        : 'NO-PRIVS-REQUIRED RESOURCE!!!',
            'defaultAction': '9dfcdb39-6644-4023-82c3-8227ba184c02',
            'source'       : 'ets'
        },
        {
            'type'         : 'video',
            'uid'          : 'fbc0f853-9020-4ed7-8d4d-e18ebe75d815',
            'key'          : 'test-test-test',
            'custom_key'   : 'test-test-test',
            'description'  : 'integration testing description',
            'require_priv' : false,
            'title'        : 'Search All Resources Integration Title',
            'defaultAction': '9dfcdb39-6644-4023-82c3-8227ba184c02'
        },
        {
            'type'         : 'file',
            'uid'          : 'cf84e252-1082-4a94-9fe5-45fa73364e2f',
            'key'          : 'test-test-test two',
            'custom_key'   : 'test-test-test two',
            'description'  : 'integration testing description two',
            'require_priv' : false,
            'title'        : 'Search All Resources Integration Title two',
            'defaultAction': '0b7abf9e-c88a-4d19-891d-52fe0b220506'
        },
        {
            'id'           : 'ba462b70-de73-4173-88bf-66bc9d1385b9',
            'type'         : 'course',
            'uid'          : 'scormcloud-course-cf84e252-1082-4a94-9fe5-45fa73364e2f',
            'key'          : 'test-test-test two',
            'custom_key'   : 'test-test-test two',
            'description'  : 'integration testing description two',
            'require_priv' : false,
            'title'        : 'Search All Resources Integration Title two',
            'defaultAction': '0b7abf9e-c88a-4d19-891d-52fe0b220506'
        },
        {
            'id'           : '7f0cbbc6-a1dd-43ca-9108-b31f90904dce',
            'type'         : 'course',
            'uid'          : 'scormcloud-course-7f0cbbc6-a1dd-43ca-9108-b31f90904dce',
            'key'          : 'LD_7f0cbbc6-a1dd-43ca-9108-b31f90904dce',
            'custom_key'   : 'LD_7f0cbbc6-a1dd-43ca-9108-b31f90904dce',
            'description'  : 'This is a LearningDesigner course',
            'require_priv' : false, // Still requires LCR Tooling
            'title'        : 'LearningDesigner Course 1',
            'defaultAction': '1302dead-9941-4b90-b35c-30eff4993365'
        },
        {
            'id'           : '9e195a62-1a53-42c0-8648-4aa35c309d48',
            'type'         : 'course',
            'uid'          : 'scormcloud-course-9e195a62-1a53-42c0-8648-4aa35c309d48',
            'key'          : 'user-SCORM_9e195a62-1a53-42c0-8648-4aa35c309d48',
            'custom_key'   : 'user-SCORM_9e195a62-1a53-42c0-8648-4aa35c309d48',
            'description'  : 'This is a user-uploaded SCORM course',
            'require_priv' : false, // Still requires LCR Tooling
            'title'        : 'User-Uploaded Course 1',
            'defaultAction': '1302dead-9941-4b90-b35c-30eff4993365'
        },
        {
            'id'           : '2b1b2197-48ae-4669-8bfa-7edd440cb027',
            'type'         : 'course',
            'uid'          : 'course-2b1b2197-48ae-4669-8bfa-7edd440cb027',
            'source'       : 'canvas',
            'key'          : '9e195a62-1a53-42c0-8648-4aa35c309d48',
            'custom_key'   : '9e195a62-1a53-42c0-8648-4aa35c309d48',
            'description'  : 'This is a Canvas course',
            'require_priv' : false,
            'title'        : 'Canvas Course 1',
            'defaultAction': '7aef5ef2-e0c1-4188-9e74-3e24057e7e6e'
        },
        {
            'id'           : '5e113295-f905-4a3d-97e0-f5d49926c979',
            'type'         : 'collaborative',
            'uid'          : 'frontline-collaborative-5e113295-f905-4a3d-97e0-f5d49926c979',
            'key'          : '5e113295-f905-4a3d-97e0-f5d49926c979',
            'custom_key'   : '5e113295-f905-4a3d-97e0-f5d49926c979',
            'description'  : 'This is a Collaborative resource',
            'require_priv' : false,
            'title'        : 'Collab Resource 1',
            'defaultAction': 'e153409d-3330-4202-baf2-d602b4cb7d66'
        },
    ]

我正在与 BodyBuilder.JS 合作生成一个查询,该查询 returns 除了满足以下任何条件的文档之外的所有内容:

在我的应用程序中,我像这样实现了这些排他性条件(注意:body 是使用 BodyBuilder.JS 中的 bodybuilder() 函数创建的对象):

body.notFilter('term', 'type', 'collaborative');
body.notFilter('term', 'source', 'canvas');
body.notFilter('bool', subFilter => {
    subFilter.filter('term', 'type', 'course');
    subFilter.filter('regexp', 'custom_key', 'LD_.*');
    return subFilter;
});
body.notFilter('bool', subFilter => {
    subFilter.filter('term', 'type', 'course');
    subFilter.filter('regexp', 'custom_key', 'user-SCORM_.*');
    return subFilter;
});

... 当调用 body.build() 时,它会生成以下 Elasticsearch DSL 字符串:

{
  "from": "0",
  "size": 100,
  "query": {
    "bool": {
      "filter": {
        "bool": {
          "must": {
            "bool": {}
          },
          "must_not": [
            {
              "term": {
                "type": "collaborative"
              }
            },
            {
              "term": {
                "source": "canvas"
              }
            },
            {
              "bool": {
                "must": [
                  {
                    "term": {
                      "type": "course"
                    }
                  },
                  {
                    "regexp": {
                      "custom_key": "LD_.*"
                    }
                  }
                ]
              }
            },
            {
              "bool": {
                "must": [
                  {
                    "term": {
                      "type": "course"
                    }
                  },
                  {
                    "regexp": {
                      "custom_key": "user-SCORM_.*"
                    }
                  }
                ]
              }
            }
          ]
        }
      }
    }
  }
}

当我将此查询发送到我的 Elasticsearch 服务器时,它正确地忽略了 type 为“collaborative”和 source 为“canvas”的文档。但是,它做了以下不正确的事情:

  1. 搜索结果缺少第二个文档(uid值“8afb5c95-c7b5-498a-abec-ae829d164964”)
  2. 搜索结果不正确地包括 type 是“课程”且custom_key 以“LD_”开头的文档
  3. 搜索结果不正确地包括 type 是“课程”且custom_key 以“user-SCORM_”开头的文档

我真的不确定我做错了什么。我还尝试用 match 子句替换 regexp 子句(例如 subFilter.filter('match', 'custom_key', 'LD_*'), 但我得到了完全相同的结果。

我已经盯着这个东西看了一个星期了,尝试了数百种与此 post 中描述的内容类似的略有不同的东西,但这是我最接近获得我需要的结果。我做错了什么?

这可能有助于在本地解决和分析您的问题。

我已经创建了索引:

PUT /资源库

{
  "mappings": {
    "properties": {
      "created_at": {
        "type": "date"
      },
      "created_by": {
        "type": "text"
      },
      "custom_key": {
        "type": "keyword"
      },
      "defaultAction": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "default_action": {
        "type": "keyword"
      },
      "description": {
        "type": "text"
      },
      "id": {
        "type": "text"
      },
      "indexed": {
        "type": "keyword"
      },
      "is_searchable": {
        "type": "keyword"
      },
      "key": {
        "type": "text"
      },
      "licenses": {
        "type": "keyword"
      },
      "raw": {
        "type": "text"
      },
      "require_priv": {
        "type": "keyword"
      },
      "source": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        },
        "fielddata": true
      },
      "stat": {
        "type": "text"
      },
      "style_def": {
        "type": "keyword"
      },
      "tags": {
        "type": "text"
      },
      "thumbnail": {
        "type": "text"
      },
      "title": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "type": {
        "type": "keyword"
      },
      "uid": {
        "type": "text"
      },
      "updated_at": {
        "type": "date"
      },
      "updated_by": {
        "type": "text"
      }
    }
  }
}

数据摄取:

POST /resourcelibrary/_doc

{
  "custom_key": "5e113295-f905-4a3d-97e0-f5d49926c979",
  "defaultAction": "e153409d-3330-4202-baf2-d602b4cb7d66",
  "description": "This is a Collaborative resource",
  "id": "5e113295-f905-4a3d-97e0-f5d49926c979",
  "key": "5e113295-f905-4a3d-97e0-f5d49926c979",
  "require_priv": false,
  "title": "Collab Resource 1",
  "type": "collaborative",
  "uid": "frontline-collaborative-5e113295-f905-4a3d-97e0-f5d49926c979"
}

搜索查询(工作正常)

获取/resourcelibrary/_search

{
  "query": {
    "bool": {
      "filter": {
        "bool": {
          "must": {
            "bool": {}
          },
          "must_not": [
            {
              "term": {
                "type": "collaborative"
              }
            },
            {
              "term": {
                "source": "canvas"
              }
            },
            {
              "bool": {
                "must": [
                  {
                    "term": {
                      "type": "course"
                    }
                  },
                  {
                    "regexp": {
                      "custom_key": "LD_.*"
                    }
                  }
                ]
              }
            },
            {
              "bool": {
                "must": [
                  {
                    "term": {
                      "type": "course"
                    }
                  },
                  {
                    "regexp": {
                      "custom_key": "user-SCORM_.*"
                    }
                  }
                ]
              }
            }
          ]
        }
      }
    }
  }
}

尝试创建一个具有不同名称的索引并尝试执行这些查询。这样您就可以调试到实际问题。

但是正在正确生成查询。