删除德鲁伊数据源中的数据
Delete data in druid datasource
我在 Druid 中已有一个数据源。我试图通过使用过滤器重新索引数据并覆盖现有数据来删除一些记录。
如果 ioConfig 中的 dataSource 是 my_datasource 并且 dataSchema 中的 dataSource 是 other_datasource,它工作正常并且 other_datasource 显示预期结果。
但是当两个数据源(ioConfig 和 dataSchema)相同时,现有数据不会根据应用的过滤器而改变。
这是配置示例:
{
"type": "index_parallel",
"spec": {
"dataSchema": {
"dataSource": "my_datasource",
"timestampSpec": {
"column": "RecordDate",
"format": "YYYY-MM-DD"
},
"dimensionsSpec": {
"dimensions":["RecordDate", "Column1", "Column2"]
},
"metricsSpec": [
],
"granularitySpec": {
"type": "uniform",
"queryGranularity": "none",
"segmentGranularity": "day",
"rollup": "false"
},
"transformSpec" : {
"filter" :{"type":"not", "field":{"type":"expression", "expression":"RecordDate >='1997-02-01' && RecordDate<='1997-02-28'"}},
"transforms" : [ ]
}
},
"ioConfig": {
"type": "index_parallel",
"inputSource": {
"type": "druid",
"dataSource": "my_datasource",
"interval": "1970-01-01/2021-12-26"
},
"appendToExisting":"false"
},
"tuningConfig": {
"type": "index_parallel",
"partitionsSpec": {
"type": "dynamic"
},
"maxNumConcurrentSubTasks": 4
}
}
}
我在这里错过了什么?有没有更好的方法来实现我想要做的事情?
感谢你的帮助。谢谢。
我通过将 dropExisting=true 添加到 ioConfig 使其工作。
此外,我将过滤器移动到 ioConfig 块中的数据源。
这是完整的配置。
{
"type": "index_parallel",
"spec": {
"dataSchema": {
"dataSource": "my_datasource",
"timestampSpec": {
"column": "RecordDate",
"format": "YYYY-MM-DD"
},
"dimensionsSpec": {
"dimensions":["RecordDate", "Column1", "Column2"]
},
"metricsSpec": [
],
"granularitySpec": {
"type": "uniform",
"queryGranularity": "none",
"segmentGranularity": "day",
"rollup": "false",
"intervals":["1970-01-01/2021-12-27"]
},
"transformSpec" : {
"transforms" : [ ]
}
},
"ioConfig": {
"type": "index_parallel",
"inputSource": {
"type": "druid",
"dataSource": "my_datasource",
"interval": "1970-01-01/2021-12-26",
"filter" :{"type":"not", "field":{"type":"expression", "expression":"RecordDate >='1997-02-01' && RecordDate<='1997-02-28'"}},
},
"appendToExisting":false,
"dropExisting":true
},
"tuningConfig": {
"type": "index_parallel",
"partitionsSpec": {
"type": "dynamic"
},
"maxNumConcurrentSubTasks": 4
}
}
}
我在 Druid 中已有一个数据源。我试图通过使用过滤器重新索引数据并覆盖现有数据来删除一些记录。 如果 ioConfig 中的 dataSource 是 my_datasource 并且 dataSchema 中的 dataSource 是 other_datasource,它工作正常并且 other_datasource 显示预期结果。 但是当两个数据源(ioConfig 和 dataSchema)相同时,现有数据不会根据应用的过滤器而改变。 这是配置示例:
{
"type": "index_parallel",
"spec": {
"dataSchema": {
"dataSource": "my_datasource",
"timestampSpec": {
"column": "RecordDate",
"format": "YYYY-MM-DD"
},
"dimensionsSpec": {
"dimensions":["RecordDate", "Column1", "Column2"]
},
"metricsSpec": [
],
"granularitySpec": {
"type": "uniform",
"queryGranularity": "none",
"segmentGranularity": "day",
"rollup": "false"
},
"transformSpec" : {
"filter" :{"type":"not", "field":{"type":"expression", "expression":"RecordDate >='1997-02-01' && RecordDate<='1997-02-28'"}},
"transforms" : [ ]
}
},
"ioConfig": {
"type": "index_parallel",
"inputSource": {
"type": "druid",
"dataSource": "my_datasource",
"interval": "1970-01-01/2021-12-26"
},
"appendToExisting":"false"
},
"tuningConfig": {
"type": "index_parallel",
"partitionsSpec": {
"type": "dynamic"
},
"maxNumConcurrentSubTasks": 4
}
}
}
我在这里错过了什么?有没有更好的方法来实现我想要做的事情? 感谢你的帮助。谢谢。
我通过将 dropExisting=true 添加到 ioConfig 使其工作。 此外,我将过滤器移动到 ioConfig 块中的数据源。 这是完整的配置。
{
"type": "index_parallel",
"spec": {
"dataSchema": {
"dataSource": "my_datasource",
"timestampSpec": {
"column": "RecordDate",
"format": "YYYY-MM-DD"
},
"dimensionsSpec": {
"dimensions":["RecordDate", "Column1", "Column2"]
},
"metricsSpec": [
],
"granularitySpec": {
"type": "uniform",
"queryGranularity": "none",
"segmentGranularity": "day",
"rollup": "false",
"intervals":["1970-01-01/2021-12-27"]
},
"transformSpec" : {
"transforms" : [ ]
}
},
"ioConfig": {
"type": "index_parallel",
"inputSource": {
"type": "druid",
"dataSource": "my_datasource",
"interval": "1970-01-01/2021-12-26",
"filter" :{"type":"not", "field":{"type":"expression", "expression":"RecordDate >='1997-02-01' && RecordDate<='1997-02-28'"}},
},
"appendToExisting":false,
"dropExisting":true
},
"tuningConfig": {
"type": "index_parallel",
"partitionsSpec": {
"type": "dynamic"
},
"maxNumConcurrentSubTasks": 4
}
}
}