Marklogic Data Hub 框架 5.2.2 映射
Marklogic Data Hub Frame 5.2.2 Mapping
我正在尝试学习 datahubframework 5.2.2 并作为实施小 project.Could 的一部分,有人帮助我理解以下几点。
- 创建步骤的主要用途是什么?(摄取、映射)。因为作为流程的一部分,我们用输入定义步骤非常清楚,outputs.Whats需要明确创建步骤及其目的是吗?
- 我正在尝试使用映射文件映射数据,但映射未完成,相同的摄取文件在没有映射的情况下加载到最终数据库中it.Please帮助我哪里做错了。
ingestionmapping.flow.json
{
"name": "ingestionmapping",
"description": "This is the default flow containing all of the default steps",
"batchSize": 100,
"threadCount": 4,
"options": {
"sourceQuery": null
},
"steps": {
"1": {
"name": "csv-ingest-step-json",
"description": "ingests json docs in JSON format to data-hub-STAGING",
"stepDefinitionName": "productIngestion",
"stepDefinitionType": "INGESTION",
"customHook" : {
"module" : "",
"parameters" : { },
"user" : "",
"runBefore" : false
},
"batchSize" : 100,
"threadCount" : 4,
"fileLocations": {
"inputFilePath": "input",
"outputURIReplacement": ".*input*.,'/mapping-flow/json'",
"inputFileType": "csv"
},
"options": {
"targetDatabase": "data-hub-STAGING",
"sourceQuery": "cts.collectionQuery([])",
"permissions": "data-hub-operator,read,data-hub-operator,update",
"outputFormat": "json",
"collections": [
"mapping-flow-ingestion-json"
],
"headers": {
"sources": [{"name": "ingestion_only-flow"}],
"createdOn" : "currentDateTime",
"createdBy" : "currentUser"
}
}
},
"2": {
"name": "mapping-step",
"description": "This is the default mapping step",
"stepDefinitionName": "productMapping",
"stepDefinitionType": "MAPPING",
"customHook" : {
"module" : "",
"parameters" : { },
"user" : "",
"runBefore" : false
},
"batchSize" : 100,
"threadCount" : 4,
"options": {
"sourceDatabase": "data-hub-STAGING",
"targetDatabase": "data-hub-FINAL",
"sourceQuery": "cts.collectionQuery('mapping-flow-ingestion-json')",
"permissions": "data-hub-operator,read,data-hub-operator,update",
"outputFormat": "json",
"collections": [
"mapping-flow-mapping-json",
"mdm-content"
],
"targetEntity": "modifiedproduct",
"mapping": {
"name": "ingestionmapping-productMapping",
"version": 1
},
"validateEntity": false
}
}
}
}
映射file:ingestionmapping-productMapping-1.mapping.json
{
"lang" : "zxx",
"name" : "ingestionmapping-productMapping",
"description" : "",
"version" : 1,
"targetEntityType" : "http://marklogic.com/modifiedproduct-0.0.1/modifiedproduct",
"sourceContext" : "/",
"sourceURI" : "/mapping-flow/json/....json",
"properties" : {
"mgame_id" : {
"sourcedFrom" : "game_id"
},
"mSKU" : {
"sourcedFrom" : "SKU"
},
"mtitle" : {
"sourcedFrom" : "title"
},
"mprice" : {
"sourcedFrom" : "price"
},
"mdescription" : {
"sourcedFrom" : "description"
},
"myears_active" : {
"sourcedFrom" : "years_active"
},
"mpublication_date" : {
"sourcedFrom" : "publication_date"
},
"mplayers" : {
"sourcedFrom" : "players"
},
"mage_range" : {
"sourcedFrom" : "age_range"
},
"msetup_time" : {
"sourcedFrom" : "setup_time"
},
"mplaying_time" : {
"sourcedFrom" : "playing_time"
},
"mchance" : {
"sourcedFrom" : "chance"
},
"mcategory" : {
"sourcedFrom" : "category"
},
"mhas_extensions" : {
"sourcedFrom" : "has_extensions"
},
"mhas_accessories" : {
"sourcedFrom" : "has_accessories"
},
"mhas_apparel" : {
"sourcedFrom" : "has_apparel"
},
"mpopularity_tier" : {
"sourcedFrom" : "popularity_tier"
},
"mprobability_apparel" : {
"sourcedFrom" : "probability_apparel"
},
"mprobability_accessories" : {
"sourcedFrom" : "probability_accessories"
},
"mprobability_extensions" : {
"sourcedFrom" : "probability_extensions"
}
}
}
实体名称:modifiedproduct
版本:0.0.1
我已经尝试了很多次调试问题,但无法找到问题所在。
结果,它在不使用映射属性的情况下将相同的 json 存储到最终数据库。
文件夹结构:
Folder structure screenshot
json 文件
{
"envelope": {
"headers": {
"sources": [
{
"name": "ingestion_only-flow"
}
],
"createdOn": "2020-07-02T09:49:57.5876177+02:00",
"createdBy": "admin",
"createdUsingFile": "C:\Users\Jhansi\IdeaProjects\MarklogicDataHubFramework5.2\input\board_games.csv"
},
"triples": [
],
"instance": {
"game_id": "1000130",
"SKU": "177897644317",
"title": "careful crack",
"price": "24.95",
"description": "",
"years_active": "0",
"publication_date": "0",
"players": "2-4",
"age_range": "",
"setup_time": "< 5 minutes",
"playing_time": "1 hour",
"chance": "High",
"category": "Board Game",
"has_extensions": "False",
"has_accessories": "True",
"has_apparel": "False",
"popularity_tier": "3",
"probability_apparel": "0.3",
"probability_accessories": "0.3",
"probability_extensions": "0.3"
},
"attachments": null
}
}
Data Hub would render desired mapping when MarkLogic Entity Services is properly deployed: (Notice the Entity declaration in the mapped document, the key takeaway from all that equation)
https://docs.marklogic.com/datahub//flows/flow-definition.html#flow-definition__custom-step-settings
stepDefinitionName
: .....Tip: If you are customizing a default step type (ingestion, mapping, or mastering), leave the value as default-ingestion
, default-mapping
, or default-mastering....
检查完以上内容后,请遵循 Data Hub 最佳实践并更正错误的手册 Steps
定义。如果您使用快速入门创建 Flow
和 Steps
,鉴于您对 MarkLogic Data Hub 的熟悉,则不会出现以下情况。
"steps": {
"1": {
……………
"stepDefinitionName": "productIngestion",
"stepDefinitionType": "INGESTION",
……………
"2": {
"name": "mapping-step",
"stepDefinitionName": "productMapping",
"stepDefinitionType": "MAPPING",
…………
"mapping": {
"name": "ingestionmapping-productMapping",
- Please cleanup your project structure and remove the contents in
step-definitions
folder. Project structure example ( the Pink part ):
- A working example of the
Steps
definitions is below. When in doubt, please validate the step in QuickStart.
{
"name" : "ingestionmapping",
"description" : "",
"batchSize" : 100,
"threadCount" : 4,
"stopOnError" : false,
"options" : { },
"version" : 0,
"steps" : {
"1" : {
"name" : "csv-ingest-step-json",
"description" : "",
"options" : {
"additionalCollections" : [ ],
"headers" : {
"sources" : [ {
"name" : "ingestionmapping"
} ],
"createdOn" : "currentDateTime",
"createdBy" : "currentUser"
},
"sourceQuery" : "cts.collectionQuery([])",
"collections" : [ "mapping-flow-ingestion-json" ],
"permissions" : "data-hub-operator,read,data-hub-operator,update",
"outputFormat" : "json",
"targetDatabase" : "store-hub-STAGING"
},
"customHook" : {
"module" : "",
"parameters" : { },
"user" : "",
"runBefore" : false
},
"retryLimit" : 0,
"batchSize" : 100,
"threadCount" : 4,
"stepDefinitionName" : "default-ingestion",
"stepDefinitionType" : "INGESTION",
"fileLocations" : {
"inputFilePath" : "/mldhf/STORE/data/products/games",
"inputFileType" : "csv",
"outputURIReplacement" : ".*games*.,'/mapping-flow/json'",
"separator" : ","
}
},
"2" : {
"name" : "mapping-step",
"description" : "",
"options" : {
"additionalCollections" : [ ],
"sourceQuery" : "cts.collectionQuery([\"mapping-flow-ingestion-json\"])",
"mapping" : {
"name" : "ingestionmapping-mapping-step",
"version" : 1
},
"targetEntity" : "modifiedproduct",
"sourceDatabase" : "store-hub-STAGING",
"collections" : [ "mapping-flow-mapping-json", "mdm-content" ],
"permissions" : "data-hub-operator,read,data-hub-operator,update",
"validateEntity" : false,
"sourceCollection" : "csv-ingest-step-json",
"outputFormat" : "json",
"targetDatabase" : "store-hub-FINAL"
},
"customHook" : {
"module" : "",
"parameters" : { },
"user" : "",
"runBefore" : false
},
"retryLimit" : null,
"batchSize" : 100,
"threadCount" : 4,
"stepDefinitionName" : "entity-services-mapping",
"stepDefinitionType" : "MAPPING"
}
}
}
我正在尝试学习 datahubframework 5.2.2 并作为实施小 project.Could 的一部分,有人帮助我理解以下几点。
- 创建步骤的主要用途是什么?(摄取、映射)。因为作为流程的一部分,我们用输入定义步骤非常清楚,outputs.Whats需要明确创建步骤及其目的是吗?
- 我正在尝试使用映射文件映射数据,但映射未完成,相同的摄取文件在没有映射的情况下加载到最终数据库中it.Please帮助我哪里做错了。
ingestionmapping.flow.json
{
"name": "ingestionmapping",
"description": "This is the default flow containing all of the default steps",
"batchSize": 100,
"threadCount": 4,
"options": {
"sourceQuery": null
},
"steps": {
"1": {
"name": "csv-ingest-step-json",
"description": "ingests json docs in JSON format to data-hub-STAGING",
"stepDefinitionName": "productIngestion",
"stepDefinitionType": "INGESTION",
"customHook" : {
"module" : "",
"parameters" : { },
"user" : "",
"runBefore" : false
},
"batchSize" : 100,
"threadCount" : 4,
"fileLocations": {
"inputFilePath": "input",
"outputURIReplacement": ".*input*.,'/mapping-flow/json'",
"inputFileType": "csv"
},
"options": {
"targetDatabase": "data-hub-STAGING",
"sourceQuery": "cts.collectionQuery([])",
"permissions": "data-hub-operator,read,data-hub-operator,update",
"outputFormat": "json",
"collections": [
"mapping-flow-ingestion-json"
],
"headers": {
"sources": [{"name": "ingestion_only-flow"}],
"createdOn" : "currentDateTime",
"createdBy" : "currentUser"
}
}
},
"2": {
"name": "mapping-step",
"description": "This is the default mapping step",
"stepDefinitionName": "productMapping",
"stepDefinitionType": "MAPPING",
"customHook" : {
"module" : "",
"parameters" : { },
"user" : "",
"runBefore" : false
},
"batchSize" : 100,
"threadCount" : 4,
"options": {
"sourceDatabase": "data-hub-STAGING",
"targetDatabase": "data-hub-FINAL",
"sourceQuery": "cts.collectionQuery('mapping-flow-ingestion-json')",
"permissions": "data-hub-operator,read,data-hub-operator,update",
"outputFormat": "json",
"collections": [
"mapping-flow-mapping-json",
"mdm-content"
],
"targetEntity": "modifiedproduct",
"mapping": {
"name": "ingestionmapping-productMapping",
"version": 1
},
"validateEntity": false
}
}
}
}
映射file:ingestionmapping-productMapping-1.mapping.json
{
"lang" : "zxx",
"name" : "ingestionmapping-productMapping",
"description" : "",
"version" : 1,
"targetEntityType" : "http://marklogic.com/modifiedproduct-0.0.1/modifiedproduct",
"sourceContext" : "/",
"sourceURI" : "/mapping-flow/json/....json",
"properties" : {
"mgame_id" : {
"sourcedFrom" : "game_id"
},
"mSKU" : {
"sourcedFrom" : "SKU"
},
"mtitle" : {
"sourcedFrom" : "title"
},
"mprice" : {
"sourcedFrom" : "price"
},
"mdescription" : {
"sourcedFrom" : "description"
},
"myears_active" : {
"sourcedFrom" : "years_active"
},
"mpublication_date" : {
"sourcedFrom" : "publication_date"
},
"mplayers" : {
"sourcedFrom" : "players"
},
"mage_range" : {
"sourcedFrom" : "age_range"
},
"msetup_time" : {
"sourcedFrom" : "setup_time"
},
"mplaying_time" : {
"sourcedFrom" : "playing_time"
},
"mchance" : {
"sourcedFrom" : "chance"
},
"mcategory" : {
"sourcedFrom" : "category"
},
"mhas_extensions" : {
"sourcedFrom" : "has_extensions"
},
"mhas_accessories" : {
"sourcedFrom" : "has_accessories"
},
"mhas_apparel" : {
"sourcedFrom" : "has_apparel"
},
"mpopularity_tier" : {
"sourcedFrom" : "popularity_tier"
},
"mprobability_apparel" : {
"sourcedFrom" : "probability_apparel"
},
"mprobability_accessories" : {
"sourcedFrom" : "probability_accessories"
},
"mprobability_extensions" : {
"sourcedFrom" : "probability_extensions"
}
}
}
实体名称:modifiedproduct 版本:0.0.1
我已经尝试了很多次调试问题,但无法找到问题所在。 结果,它在不使用映射属性的情况下将相同的 json 存储到最终数据库。
文件夹结构: Folder structure screenshot
json 文件
{
"envelope": {
"headers": {
"sources": [
{
"name": "ingestion_only-flow"
}
],
"createdOn": "2020-07-02T09:49:57.5876177+02:00",
"createdBy": "admin",
"createdUsingFile": "C:\Users\Jhansi\IdeaProjects\MarklogicDataHubFramework5.2\input\board_games.csv"
},
"triples": [
],
"instance": {
"game_id": "1000130",
"SKU": "177897644317",
"title": "careful crack",
"price": "24.95",
"description": "",
"years_active": "0",
"publication_date": "0",
"players": "2-4",
"age_range": "",
"setup_time": "< 5 minutes",
"playing_time": "1 hour",
"chance": "High",
"category": "Board Game",
"has_extensions": "False",
"has_accessories": "True",
"has_apparel": "False",
"popularity_tier": "3",
"probability_apparel": "0.3",
"probability_accessories": "0.3",
"probability_extensions": "0.3"
},
"attachments": null
}
}
Data Hub would render desired mapping when MarkLogic Entity Services is properly deployed: (Notice the Entity declaration in the mapped document, the key takeaway from all that equation)
https://docs.marklogic.com/datahub//flows/flow-definition.html#flow-definition__custom-step-settings
stepDefinitionName
: .....Tip: If you are customizing a default step type (ingestion, mapping, or mastering), leave the value asdefault-ingestion
,default-mapping
, or default-mastering....
检查完以上内容后,请遵循 Data Hub 最佳实践并更正错误的手册 Steps
定义。如果您使用快速入门创建 Flow
和 Steps
,鉴于您对 MarkLogic Data Hub 的熟悉,则不会出现以下情况。
"steps": {
"1": {
……………
"stepDefinitionName": "productIngestion",
"stepDefinitionType": "INGESTION",
……………
"2": {
"name": "mapping-step",
"stepDefinitionName": "productMapping",
"stepDefinitionType": "MAPPING",
…………
"mapping": {
"name": "ingestionmapping-productMapping",
- Please cleanup your project structure and remove the contents in
step-definitions
folder. Project structure example ( the Pink part ):
- A working example of the
Steps
definitions is below. When in doubt, please validate the step in QuickStart.
{
"name" : "ingestionmapping",
"description" : "",
"batchSize" : 100,
"threadCount" : 4,
"stopOnError" : false,
"options" : { },
"version" : 0,
"steps" : {
"1" : {
"name" : "csv-ingest-step-json",
"description" : "",
"options" : {
"additionalCollections" : [ ],
"headers" : {
"sources" : [ {
"name" : "ingestionmapping"
} ],
"createdOn" : "currentDateTime",
"createdBy" : "currentUser"
},
"sourceQuery" : "cts.collectionQuery([])",
"collections" : [ "mapping-flow-ingestion-json" ],
"permissions" : "data-hub-operator,read,data-hub-operator,update",
"outputFormat" : "json",
"targetDatabase" : "store-hub-STAGING"
},
"customHook" : {
"module" : "",
"parameters" : { },
"user" : "",
"runBefore" : false
},
"retryLimit" : 0,
"batchSize" : 100,
"threadCount" : 4,
"stepDefinitionName" : "default-ingestion",
"stepDefinitionType" : "INGESTION",
"fileLocations" : {
"inputFilePath" : "/mldhf/STORE/data/products/games",
"inputFileType" : "csv",
"outputURIReplacement" : ".*games*.,'/mapping-flow/json'",
"separator" : ","
}
},
"2" : {
"name" : "mapping-step",
"description" : "",
"options" : {
"additionalCollections" : [ ],
"sourceQuery" : "cts.collectionQuery([\"mapping-flow-ingestion-json\"])",
"mapping" : {
"name" : "ingestionmapping-mapping-step",
"version" : 1
},
"targetEntity" : "modifiedproduct",
"sourceDatabase" : "store-hub-STAGING",
"collections" : [ "mapping-flow-mapping-json", "mdm-content" ],
"permissions" : "data-hub-operator,read,data-hub-operator,update",
"validateEntity" : false,
"sourceCollection" : "csv-ingest-step-json",
"outputFormat" : "json",
"targetDatabase" : "store-hub-FINAL"
},
"customHook" : {
"module" : "",
"parameters" : { },
"user" : "",
"runBefore" : false
},
"retryLimit" : null,
"batchSize" : 100,
"threadCount" : 4,
"stepDefinitionName" : "entity-services-mapping",
"stepDefinitionType" : "MAPPING"
}
}
}