如何在 ArangoDb 中对以下 json 文档进行全文索引和搜索?
How to do Full Text indexing and search on the below json Document in ArangoDb?
{
"batters":
{
"batter":[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"topping":[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
基本上要在这里进行全文搜索,我需要在 "batters.batter" 和 "batters.topping" 上进行索引,即在两个属性上。如何处理这种全文搜索。请解释该方法,我将通过 REST API 实现我的搜索。在此先感谢您。
解决此问题的最佳方法是稍微更改数据布局,因为全文索引只能对一个属性起作用,并且两次请求索引无论如何都不会很快。因此我们使用匿名图将字符串连接到它们的对象。
因此,我们创建了两个(顶点)集合,一个边集合,一个具有全文索引的顶点集合:
db._create("dishStrings")
db._createEdgeCollection("dishEdges")
db._create("dish")
db.dishStrings.ensureIndex({type: "fulltext", fields: [ "name" ]});
并通过将它们联系在一起的关系将文档保存到它们。
我们使用 _key
属性来引用 _from
和 _to
边关系中的顶点:
db.dishStrings.save({"_key": "1001", "name": "Regular" , type: "Batter"});
db.dishStrings.save({"_key": "1002", "name": "Chocolate", type: "Batter" });
db.dishStrings.save({"_key": "1003", "name": "Blueberry", type: "Batter"});
db.dishStrings.save({"_key": "1004", "name": "Devil's Food", type: "Batter"});
db.dishStrings.save({"_key": "5001", "name": "None", type: "Topping"});
db.dishStrings.save({"_key": "5002", "name": "Glazed", type: "Topping"});
db.dishStrings.save({"_key": "5005", "name": "Sugar", type: "Topping"});
db.dishStrings.save({"_key": "5007", "name": "Powdered Sugar", type: "Topping"});
db.dishStrings.save({"_key": "5006", "name": "Chocolate with Sprinkles", type: "Topping"});
db.dishStrings.save({"_key": "5003", "name": "Chocolate", type: "Topping"});
db.dishStrings.save({"_key": "5004", "name": "Maple", type: "Topping"});
db.dishEdges.save("dishStrings/1001", "dish/batter", {tasty: true, type: "Batter"})
db.dishEdges.save("dishStrings/1002", "dish/batter", {tasty: true, type: "Batter"})
db.dishEdges.save("dishStrings/1003", "dish/batter", {tasty: true, type: "Batter"})
db.dishEdges.save("dishStrings/1004", "dish/batter", {tasty: true, type: "Batter"})
db.dishEdges.save("dishStrings/5001", "dish/batter", {tasty: true, type: "Topping"})
db.dishEdges.save("dishStrings/5002", "dish/batter", {tasty: true, type: "Topping"})
db.dishEdges.save("dishStrings/5003", "dish/batter", {tasty: true, type: "Topping"})
db.dishEdges.save("dishStrings/5004", "dish/batter", {tasty: true, type: "Topping"})
db.dishEdges.save("dishStrings/5005", "dish/batter", {tasty: true, type: "Topping"})
db.dishEdges.save("dishStrings/5006", "dish/batter", {tasty: true, type: "Topping"})
db.dishEdges.save("dishStrings/5007", "dish/batter", {tasty: true, type: "Topping"})
db.dish.save({_key: "batter", tasty: true})
我们重新验证全文索引是否有效:
db._query("FOR oneDishStr IN FULLTEXT(dishStrings, 'name', 'Chocolate')" +
" RETURN oneDishStr").toArray()
(.toArray()
将在控制台上打印结果)
我们得到 3 次安打,一次击球,两次浇头。由于搜索字符串可能包含未经验证的字符串,我们宁愿 use bind variables to circumvent injections:
db._query("FOR oneDishStr IN FULLTEXT(dishStrings, 'name', @searchString) " +
" RETURN oneDishStr",
{searchString: "Chocolate"});
现在让我们使用边关系来查找连接的盘子:
db._query("FOR oneDishStr IN FULLTEXT(dishStrings, 'name', @searchString) "+
"RETURN {str: oneDishStr, " +
"dishes: NEIGHBORS(dishStrings, dishEdges, oneDishStr," +
" 'outbound')}",
{searchString: "Chocolate"})
这是使用图形的旧方法(最高 2.7),因为我们想使用快速过滤器,lets translate this to the new 2.8 syntax:
db._query("FOR oneDishStr IN FULLTEXT(dishStrings, 'name', @searchString) " +
" FOR v IN 1..1 OUTBOUND oneDishStr dishEdges RETURN " +
" {str: oneDishStr, dish: v}",
{searchString: "Chocolate"})
我们可以看到,在这两种情况下,我们对 Chocolate
的 3 个全文搜索命中中的每一个都进行了一次遍历。现在我们只对 Toppings
的命中感兴趣,因此我们将过滤掉所有那些不属于 Topping
:
类型的边
db._query("FOR oneDishStr IN FULLTEXT(dishStrings, 'name', @searchString) "+
" FOR v, e IN 1..1 OUTBOUND oneDishStr dishEdges " +
" FILTER e.type == 'Topping' " +
" RETURN {str: oneDishStr, dish: v}",
{searchString: "Chocolate"})
{
"batters":
{
"batter":[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"topping":[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
基本上要在这里进行全文搜索,我需要在 "batters.batter" 和 "batters.topping" 上进行索引,即在两个属性上。如何处理这种全文搜索。请解释该方法,我将通过 REST API 实现我的搜索。在此先感谢您。
解决此问题的最佳方法是稍微更改数据布局,因为全文索引只能对一个属性起作用,并且两次请求索引无论如何都不会很快。因此我们使用匿名图将字符串连接到它们的对象。
因此,我们创建了两个(顶点)集合,一个边集合,一个具有全文索引的顶点集合:
db._create("dishStrings")
db._createEdgeCollection("dishEdges")
db._create("dish")
db.dishStrings.ensureIndex({type: "fulltext", fields: [ "name" ]});
并通过将它们联系在一起的关系将文档保存到它们。
我们使用 _key
属性来引用 _from
和 _to
边关系中的顶点:
db.dishStrings.save({"_key": "1001", "name": "Regular" , type: "Batter"});
db.dishStrings.save({"_key": "1002", "name": "Chocolate", type: "Batter" });
db.dishStrings.save({"_key": "1003", "name": "Blueberry", type: "Batter"});
db.dishStrings.save({"_key": "1004", "name": "Devil's Food", type: "Batter"});
db.dishStrings.save({"_key": "5001", "name": "None", type: "Topping"});
db.dishStrings.save({"_key": "5002", "name": "Glazed", type: "Topping"});
db.dishStrings.save({"_key": "5005", "name": "Sugar", type: "Topping"});
db.dishStrings.save({"_key": "5007", "name": "Powdered Sugar", type: "Topping"});
db.dishStrings.save({"_key": "5006", "name": "Chocolate with Sprinkles", type: "Topping"});
db.dishStrings.save({"_key": "5003", "name": "Chocolate", type: "Topping"});
db.dishStrings.save({"_key": "5004", "name": "Maple", type: "Topping"});
db.dishEdges.save("dishStrings/1001", "dish/batter", {tasty: true, type: "Batter"})
db.dishEdges.save("dishStrings/1002", "dish/batter", {tasty: true, type: "Batter"})
db.dishEdges.save("dishStrings/1003", "dish/batter", {tasty: true, type: "Batter"})
db.dishEdges.save("dishStrings/1004", "dish/batter", {tasty: true, type: "Batter"})
db.dishEdges.save("dishStrings/5001", "dish/batter", {tasty: true, type: "Topping"})
db.dishEdges.save("dishStrings/5002", "dish/batter", {tasty: true, type: "Topping"})
db.dishEdges.save("dishStrings/5003", "dish/batter", {tasty: true, type: "Topping"})
db.dishEdges.save("dishStrings/5004", "dish/batter", {tasty: true, type: "Topping"})
db.dishEdges.save("dishStrings/5005", "dish/batter", {tasty: true, type: "Topping"})
db.dishEdges.save("dishStrings/5006", "dish/batter", {tasty: true, type: "Topping"})
db.dishEdges.save("dishStrings/5007", "dish/batter", {tasty: true, type: "Topping"})
db.dish.save({_key: "batter", tasty: true})
我们重新验证全文索引是否有效:
db._query("FOR oneDishStr IN FULLTEXT(dishStrings, 'name', 'Chocolate')" +
" RETURN oneDishStr").toArray()
(.toArray()
将在控制台上打印结果)
我们得到 3 次安打,一次击球,两次浇头。由于搜索字符串可能包含未经验证的字符串,我们宁愿 use bind variables to circumvent injections:
db._query("FOR oneDishStr IN FULLTEXT(dishStrings, 'name', @searchString) " +
" RETURN oneDishStr",
{searchString: "Chocolate"});
现在让我们使用边关系来查找连接的盘子:
db._query("FOR oneDishStr IN FULLTEXT(dishStrings, 'name', @searchString) "+
"RETURN {str: oneDishStr, " +
"dishes: NEIGHBORS(dishStrings, dishEdges, oneDishStr," +
" 'outbound')}",
{searchString: "Chocolate"})
这是使用图形的旧方法(最高 2.7),因为我们想使用快速过滤器,lets translate this to the new 2.8 syntax:
db._query("FOR oneDishStr IN FULLTEXT(dishStrings, 'name', @searchString) " +
" FOR v IN 1..1 OUTBOUND oneDishStr dishEdges RETURN " +
" {str: oneDishStr, dish: v}",
{searchString: "Chocolate"})
我们可以看到,在这两种情况下,我们对 Chocolate
的 3 个全文搜索命中中的每一个都进行了一次遍历。现在我们只对 Toppings
的命中感兴趣,因此我们将过滤掉所有那些不属于 Topping
:
db._query("FOR oneDishStr IN FULLTEXT(dishStrings, 'name', @searchString) "+
" FOR v, e IN 1..1 OUTBOUND oneDishStr dishEdges " +
" FILTER e.type == 'Topping' " +
" RETURN {str: oneDishStr, dish: v}",
{searchString: "Chocolate"})