增量上传数据到德鲁伊
Upload Data to druid Incrementally
我需要将数据上传到现有模型。这必须每天进行。我想需要在索引文件中进行一些更改,但我无法弄清楚。我尝试推送具有相同型号名称的数据,但父数据已被删除。
如有任何帮助,我们将不胜感激。
这是摄取 json 文件:
{
"type" : "index",
"spec" : {
"dataSchema" : {
"dataSource" : "mksales",
"parser" : {
"type" : "string",
"parseSpec" : {
"format" : "json",
"dimensionsSpec" : {
"dimensions" : ["Address",
"City",
"Contract Name",
"Contract Sub Type",
"Contract Type",
"Customer Name",
"Domain",
"Nation",
"Contract Start End Date",
"Zip",
"Sales Rep Name"
]
},
"timestampSpec" : {
"format" : "auto",
"column" : "time"
}
}
},
"metricsSpec" : [
{ "type" : "count", "name" : "count", "type" : "count" },
{"name" : "Price","type" : "doubleSum","fieldName" : "Price"},
{"name" : "Sales","type" : "doubleSum","fieldName" : "Sales"},
{"name" : "Units","type" : "longSum","fieldName" : "Units"}],
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "day",
"queryGranularity" : "none",
"intervals" : ["2000-12-01T00:00:00Z/2030-06-30T00:00:00Z"],
"rollup" : true
}
},
"ioConfig" : {
"type" : "index",
"firehose" : {
"type" : "local",
"baseDir" : "mksales/",
"filter" : "mksales.json"
},
"appendToExisting" : false
},
"tuningConfig" : {
"type" : "index",
"targetPartitionSize" : 10000000,
"maxRowsInMemory" : 40000,
"forceExtendableShardSpecs" : true
}
}
}
您可以使用两种方式将数据 append/update 到现有段。
重建索引和增量摄取
每次新数据进入特定段时,您都需要重新索引您的数据。(在您的情况下是当天)对于重新索引,您需要提供当天所有包含数据的文件。
对于增量摄取,您需要使用 inputSpec type="multi"
您可以参考文档 link 了解更多详情 - http://druid.io/docs/latest/ingestion/update-existing-data.html
我需要将数据上传到现有模型。这必须每天进行。我想需要在索引文件中进行一些更改,但我无法弄清楚。我尝试推送具有相同型号名称的数据,但父数据已被删除。
如有任何帮助,我们将不胜感激。
这是摄取 json 文件:
{
"type" : "index",
"spec" : {
"dataSchema" : {
"dataSource" : "mksales",
"parser" : {
"type" : "string",
"parseSpec" : {
"format" : "json",
"dimensionsSpec" : {
"dimensions" : ["Address",
"City",
"Contract Name",
"Contract Sub Type",
"Contract Type",
"Customer Name",
"Domain",
"Nation",
"Contract Start End Date",
"Zip",
"Sales Rep Name"
]
},
"timestampSpec" : {
"format" : "auto",
"column" : "time"
}
}
},
"metricsSpec" : [
{ "type" : "count", "name" : "count", "type" : "count" },
{"name" : "Price","type" : "doubleSum","fieldName" : "Price"},
{"name" : "Sales","type" : "doubleSum","fieldName" : "Sales"},
{"name" : "Units","type" : "longSum","fieldName" : "Units"}],
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "day",
"queryGranularity" : "none",
"intervals" : ["2000-12-01T00:00:00Z/2030-06-30T00:00:00Z"],
"rollup" : true
}
},
"ioConfig" : {
"type" : "index",
"firehose" : {
"type" : "local",
"baseDir" : "mksales/",
"filter" : "mksales.json"
},
"appendToExisting" : false
},
"tuningConfig" : {
"type" : "index",
"targetPartitionSize" : 10000000,
"maxRowsInMemory" : 40000,
"forceExtendableShardSpecs" : true
}
}
}
您可以使用两种方式将数据 append/update 到现有段。
重建索引和增量摄取
每次新数据进入特定段时,您都需要重新索引您的数据。(在您的情况下是当天)对于重新索引,您需要提供当天所有包含数据的文件。
对于增量摄取,您需要使用 inputSpec type="multi"
您可以参考文档 link 了解更多详情 - http://druid.io/docs/latest/ingestion/update-existing-data.html