以字符串为键的大型文档的 Couchdb 视图超时
Couchdb view on large documents with strings as keys times out
我正在尝试在 CouchDB 2.1 中为非常大的文档(每个文档大约 300k-900k 行,总共大约 15-20 个文档)创建一个视图。
文档如下所示:
{
"222123456": {
"_id": "222123456",
"type": "Order",
"0300": {
"51234567": {
"_id": "51234567",
"type": "Material",
"DS": "M532F1234567",
"HTZ": "M532-F1234-000-00",
"A name for some material": {
"_id": "A name for some material",
"type": "Description",
"0054": {
"600": {
"1": {
"_id": "1",
"type": "Amount",
"X": {
"11220": {
"_id": "11220",
"type": "row"
},
"_id": "X",
"type": "Bulk"
}
},
"_id": "600",
"type": "Site"
},
"_id": "0054",
"type": "Pos"
}
}
},
"51255111": {
// And another material
// ...
},
"_id": "0300",
"type": "Process"
}
// + more orders with more items
},
"222555666": {
// Another order with more processes which contain even more materials
// ...
},
"_id": "FileImport_001",
"_rev": "1-2f77e699332bb7c76a137b86f83bbe91",
"type": "Machine"
}
每个文档有 1-n 个订单,每个订单有 1-n 个流程,每个流程包含我要查询的 1-n materials。我当前的视图使用 for 循环遍历所有订单、流程和 materials。
这是我正在使用的视图:
function (doc) {
var splitMsn = doc._id.split("_"); // Split _id into [FileImport, 001] array
for (var key_order in doc) { // For every order in the document...
if (typeof doc[key_order] == 'object' && doc[key_order] != '') { // where the value is an object and not empty...
var order = doc[key_order]; // Save the order as a value
for (var key_process in order) { // ...and search all processes in that order nr
if (typeof order[key_process] == 'object' && order[key_process] != '') { // If process contains an object as value and it's not empty
var process = order[key_process]; // Save the process as a value
for (var key_matnr in process) { // For every material in the process
if (typeof process[key_matnr] == 'object' && process[key_matnr] != '') { // If material nr contains an object as value and not empty
var matnr = process[key_matnr]; // Save material nr as value
for (var key_matname in matnr) { // For every material name in the material number
if (typeof matnr[key_matname] == 'object' && matnr[key_matname] != '') { // Contains object and not empty
var matname = matnr[key_matname]; // Save material name
emit([splitMsn[1], key_order, key_process, key_matnr], matname); // emit [001, 222123456, 0300, 51234567], Material name
}
}
}
}
}
}
}
}
}
通过此视图,我可以查询特定的文档编号、订单、流程和 material 编号。在 return 中,我得到 material 名称和数量(例如我之后的 1)。
当我使用一个文档时,索引创建得很好,但即使使用第二个文档(更不用说 15 或 20 个),CouchDB 在创建视图时说 "OS process timed out"。
我的问题:是否有更快 and/or 更优雅的方法来遍历所有这些步骤,最终获得我需要的深埋 "amount" 价值?
非常感谢!
系统正在保护自己免受您的伤害。
一般来说,使用大型文档并不能满足 CouchDB 的需求。添加深层嵌套结构和非常复杂的映射,你的情况更糟。
我建议重新考虑您的数据模型。使用(很多)更小的文档(比如每个 material 一个)。您的地图功能也会简单得多。
我正在尝试在 CouchDB 2.1 中为非常大的文档(每个文档大约 300k-900k 行,总共大约 15-20 个文档)创建一个视图。
文档如下所示:
{
"222123456": {
"_id": "222123456",
"type": "Order",
"0300": {
"51234567": {
"_id": "51234567",
"type": "Material",
"DS": "M532F1234567",
"HTZ": "M532-F1234-000-00",
"A name for some material": {
"_id": "A name for some material",
"type": "Description",
"0054": {
"600": {
"1": {
"_id": "1",
"type": "Amount",
"X": {
"11220": {
"_id": "11220",
"type": "row"
},
"_id": "X",
"type": "Bulk"
}
},
"_id": "600",
"type": "Site"
},
"_id": "0054",
"type": "Pos"
}
}
},
"51255111": {
// And another material
// ...
},
"_id": "0300",
"type": "Process"
}
// + more orders with more items
},
"222555666": {
// Another order with more processes which contain even more materials
// ...
},
"_id": "FileImport_001",
"_rev": "1-2f77e699332bb7c76a137b86f83bbe91",
"type": "Machine"
}
每个文档有 1-n 个订单,每个订单有 1-n 个流程,每个流程包含我要查询的 1-n materials。我当前的视图使用 for 循环遍历所有订单、流程和 materials。
这是我正在使用的视图:
function (doc) {
var splitMsn = doc._id.split("_"); // Split _id into [FileImport, 001] array
for (var key_order in doc) { // For every order in the document...
if (typeof doc[key_order] == 'object' && doc[key_order] != '') { // where the value is an object and not empty...
var order = doc[key_order]; // Save the order as a value
for (var key_process in order) { // ...and search all processes in that order nr
if (typeof order[key_process] == 'object' && order[key_process] != '') { // If process contains an object as value and it's not empty
var process = order[key_process]; // Save the process as a value
for (var key_matnr in process) { // For every material in the process
if (typeof process[key_matnr] == 'object' && process[key_matnr] != '') { // If material nr contains an object as value and not empty
var matnr = process[key_matnr]; // Save material nr as value
for (var key_matname in matnr) { // For every material name in the material number
if (typeof matnr[key_matname] == 'object' && matnr[key_matname] != '') { // Contains object and not empty
var matname = matnr[key_matname]; // Save material name
emit([splitMsn[1], key_order, key_process, key_matnr], matname); // emit [001, 222123456, 0300, 51234567], Material name
}
}
}
}
}
}
}
}
}
通过此视图,我可以查询特定的文档编号、订单、流程和 material 编号。在 return 中,我得到 material 名称和数量(例如我之后的 1)。
当我使用一个文档时,索引创建得很好,但即使使用第二个文档(更不用说 15 或 20 个),CouchDB 在创建视图时说 "OS process timed out"。
我的问题:是否有更快 and/or 更优雅的方法来遍历所有这些步骤,最终获得我需要的深埋 "amount" 价值?
非常感谢!
系统正在保护自己免受您的伤害。
一般来说,使用大型文档并不能满足 CouchDB 的需求。添加深层嵌套结构和非常复杂的映射,你的情况更糟。
我建议重新考虑您的数据模型。使用(很多)更小的文档(比如每个 material 一个)。您的地图功能也会简单得多。