获取元数据 Activity ADF V2
Get Metadata Activity ADF V2
谁能解释一下,ADF V2 中新引入的 Get Metadata Activity 有什么用?
实际上,docs.microsoft.com 中提供的信息不足以理解此 Activity 的用途。
获取元数据Activity的主要目的是:
- 验证任何数据的元数据信息
- 数据准备就绪/可用时触发管道
以下示例显示如何使用获取元数据从文件夹中增量加载更改的文件 Activity 获取文件名和修改的时间戳:
{
"name": "IncrementalloadfromSingleFolder",
"properties": {
"activities": [
{
"name": "GetFileList",
"type": "GetMetadata",
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false
},
"typeProperties": {
"dataset": {
"referenceName": "SrcLocalDir",
"type": "DatasetReference"
},
"fieldList": [
"childItems"
]
}
},
{
"name": "ForEachFile",
"type": "ForEach",
"dependsOn": [
{
"activity": "GetFileList",
"dependencyConditions": [
"Succeeded"
]
}
],
"typeProperties": {
"items": {
"value": "@activity('GetFileList').output.childItems",
"type": "Expression"
},
"activities": [
{
"name": "GetLastModifyfromFile",
"type": "GetMetadata",
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false
},
"typeProperties": {
"dataset": {
"referenceName": "SrcLocalFile",
"type": "DatasetReference"
},
"fieldList": [
"lastModified"
]
}
},
{
"name": "IfNewFile",
"type": "IfCondition",
"dependsOn": [
{
"activity": "GetLastModifyfromFile",
"dependencyConditions": [
"Succeeded"
]
}
],
"typeProperties": {
"expression": {
"value": "@and(less(activity('GetLastModifyfromFile').output.lastModified, pipeline().parameters.current_time), greaterOrEquals(activity('GetLastModifyfromFile').output.lastModified, pipeline().parameters.last_time))",
"type": "Expression"
},
"ifTrueActivities": [
{
"name": "CopyNewFiles",
"type": "Copy",
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false
},
"typeProperties": {
"source": {
"type": "FileSystemSource",
"recursive": false
},
"sink": {
"type": "BlobSink"
},
"enableStaging": false,
"dataIntegrationUnits": 0
},
"inputs": [
{
"referenceName": "SrcLocalFile",
"type": "DatasetReference"
}
],
"outputs": [
{
"referenceName": "TgtBooksBlob",
"type": "DatasetReference"
}
]
}
]
}
}
]
}
}
],
"parameters": {
"current_time": {
"type": "String",
"defaultValue": "2018-04-01T00:00:00Z"
},
"last_time": {
"type": "String",
"defaultValue": "2018-03-01T00:00:00Z"
}
},
"folder": {
"name": "IncrementalLoadSingleFolder"
}
},
"type": "Microsoft.DataFactory/factories/pipelines"
}
另请参阅最近更新的 documentation。
谁能解释一下,ADF V2 中新引入的 Get Metadata Activity 有什么用?
实际上,docs.microsoft.com 中提供的信息不足以理解此 Activity 的用途。
获取元数据Activity的主要目的是:
- 验证任何数据的元数据信息
- 数据准备就绪/可用时触发管道
以下示例显示如何使用获取元数据从文件夹中增量加载更改的文件 Activity 获取文件名和修改的时间戳:
{
"name": "IncrementalloadfromSingleFolder",
"properties": {
"activities": [
{
"name": "GetFileList",
"type": "GetMetadata",
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false
},
"typeProperties": {
"dataset": {
"referenceName": "SrcLocalDir",
"type": "DatasetReference"
},
"fieldList": [
"childItems"
]
}
},
{
"name": "ForEachFile",
"type": "ForEach",
"dependsOn": [
{
"activity": "GetFileList",
"dependencyConditions": [
"Succeeded"
]
}
],
"typeProperties": {
"items": {
"value": "@activity('GetFileList').output.childItems",
"type": "Expression"
},
"activities": [
{
"name": "GetLastModifyfromFile",
"type": "GetMetadata",
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false
},
"typeProperties": {
"dataset": {
"referenceName": "SrcLocalFile",
"type": "DatasetReference"
},
"fieldList": [
"lastModified"
]
}
},
{
"name": "IfNewFile",
"type": "IfCondition",
"dependsOn": [
{
"activity": "GetLastModifyfromFile",
"dependencyConditions": [
"Succeeded"
]
}
],
"typeProperties": {
"expression": {
"value": "@and(less(activity('GetLastModifyfromFile').output.lastModified, pipeline().parameters.current_time), greaterOrEquals(activity('GetLastModifyfromFile').output.lastModified, pipeline().parameters.last_time))",
"type": "Expression"
},
"ifTrueActivities": [
{
"name": "CopyNewFiles",
"type": "Copy",
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false
},
"typeProperties": {
"source": {
"type": "FileSystemSource",
"recursive": false
},
"sink": {
"type": "BlobSink"
},
"enableStaging": false,
"dataIntegrationUnits": 0
},
"inputs": [
{
"referenceName": "SrcLocalFile",
"type": "DatasetReference"
}
],
"outputs": [
{
"referenceName": "TgtBooksBlob",
"type": "DatasetReference"
}
]
}
]
}
}
]
}
}
],
"parameters": {
"current_time": {
"type": "String",
"defaultValue": "2018-04-01T00:00:00Z"
},
"last_time": {
"type": "String",
"defaultValue": "2018-03-01T00:00:00Z"
}
},
"folder": {
"name": "IncrementalLoadSingleFolder"
}
},
"type": "Microsoft.DataFactory/factories/pipelines"
}
另请参阅最近更新的 documentation。