Marklogic Data Hub 框架 5.2.2 映射

Marklogic Data Hub Frame 5.2.2 Mapping

我正在尝试学习 datahubframework 5.2.2 并作为实施小 project.Could 的一部分,有人帮助我理解以下几点。

ingestionmapping.flow.json

{
  "name": "ingestionmapping",
  "description": "This is the default flow containing all of the default steps",
  "batchSize": 100,
  "threadCount": 4,
  "options": {
    "sourceQuery": null
  },
  "steps": {
    "1": {
      "name": "csv-ingest-step-json",
      "description": "ingests json docs in JSON format to data-hub-STAGING",
      "stepDefinitionName": "productIngestion",
      "stepDefinitionType": "INGESTION",
      "customHook" : {
        "module" : "",
        "parameters" : { },
        "user" : "",
        "runBefore" : false
      },
      "batchSize" : 100,
      "threadCount" : 4,
      "fileLocations": {
        "inputFilePath": "input",
        "outputURIReplacement": ".*input*.,'/mapping-flow/json'",
        "inputFileType": "csv"
      },
      "options": {
        "targetDatabase": "data-hub-STAGING",
        "sourceQuery": "cts.collectionQuery([])",
        "permissions": "data-hub-operator,read,data-hub-operator,update",
        "outputFormat": "json",
        "collections": [
          "mapping-flow-ingestion-json"
        ],
        "headers": {
          "sources": [{"name":  "ingestion_only-flow"}],
          "createdOn" : "currentDateTime",
          "createdBy" : "currentUser"
        }
      }
    },
    "2": {
      "name": "mapping-step",
      "description": "This is the default mapping step",
      "stepDefinitionName": "productMapping",
      "stepDefinitionType": "MAPPING",
      "customHook" : {
        "module" : "",
        "parameters" : { },
        "user" : "",
        "runBefore" : false
      },
      "batchSize" : 100,
      "threadCount" : 4,
      "options": {
        "sourceDatabase": "data-hub-STAGING",
        "targetDatabase": "data-hub-FINAL",
        "sourceQuery": "cts.collectionQuery('mapping-flow-ingestion-json')",
        "permissions": "data-hub-operator,read,data-hub-operator,update",
        "outputFormat": "json",
        "collections": [
          "mapping-flow-mapping-json",
          "mdm-content"
        ],
        "targetEntity": "modifiedproduct",
        "mapping": {
          "name": "ingestionmapping-productMapping",
          "version": 1
        },
        "validateEntity": false
      }
    }
  }
}

映射file:ingestionmapping-productMapping-1.mapping.json

{
  "lang" : "zxx",
  "name" : "ingestionmapping-productMapping",
  "description" : "",
  "version" : 1,
  "targetEntityType" : "http://marklogic.com/modifiedproduct-0.0.1/modifiedproduct",
  "sourceContext" : "/",
  "sourceURI" : "/mapping-flow/json/....json",
  "properties" : {
    "mgame_id" : {
      "sourcedFrom" : "game_id"
    },
    "mSKU" : {
      "sourcedFrom" : "SKU"
    },
    "mtitle" : {
      "sourcedFrom" : "title"
    },
    "mprice" : {
      "sourcedFrom" : "price"
    },
    "mdescription" : {
      "sourcedFrom" : "description"
    },
    "myears_active" : {
      "sourcedFrom" : "years_active"
    },
    "mpublication_date" : {
      "sourcedFrom" : "publication_date"
    },
    "mplayers" : {
      "sourcedFrom" : "players"
    },
    "mage_range" : {
      "sourcedFrom" : "age_range"
    },
    "msetup_time" : {
      "sourcedFrom" : "setup_time"
    },
    "mplaying_time" : {
      "sourcedFrom" : "playing_time"
    },
    "mchance" : {
      "sourcedFrom" : "chance"
    },
    "mcategory" : {
      "sourcedFrom" : "category"
    },
    "mhas_extensions" : {
      "sourcedFrom" : "has_extensions"
    },
    "mhas_accessories" : {
      "sourcedFrom" : "has_accessories"
    },
    "mhas_apparel" : {
      "sourcedFrom" : "has_apparel"
    },
    "mpopularity_tier" : {
      "sourcedFrom" : "popularity_tier"
    },
    "mprobability_apparel" : {
      "sourcedFrom" : "probability_apparel"
    },
    "mprobability_accessories" : {
      "sourcedFrom" : "probability_accessories"
    },
    "mprobability_extensions" : {
      "sourcedFrom" : "probability_extensions"
    }
  }
}

实体名称:modifiedproduct 版本:0.0.1

我已经尝试了很多次调试问题,但无法找到问题所在。 结果,它在不使用映射属性的情况下将相同的 json 存储到最终数据库。

文件夹结构: Folder structure screenshot

json 文件

{
"envelope": {
"headers": {
"sources": [
{
"name": "ingestion_only-flow"
}
], 
"createdOn": "2020-07-02T09:49:57.5876177+02:00", 
"createdBy": "admin", 
"createdUsingFile": "C:\Users\Jhansi\IdeaProjects\MarklogicDataHubFramework5.2\input\board_games.csv"
}, 
"triples": [
], 
"instance": {
"game_id": "1000130", 
"SKU": "177897644317", 
"title": "careful crack", 
"price": "24.95", 
"description": "", 
"years_active": "0", 
"publication_date": "0", 
"players": "2-4", 
"age_range": "", 
"setup_time": "< 5 minutes", 
"playing_time": "1 hour", 
"chance": "High", 
"category": "Board Game", 
"has_extensions": "False", 
"has_accessories": "True", 
"has_apparel": "False", 
"popularity_tier": "3", 
"probability_apparel": "0.3", 
"probability_accessories": "0.3", 
"probability_extensions": "0.3"
}, 
"attachments": null
}
}

Data Hub would render desired mapping when MarkLogic Entity Services is properly deployed: (Notice the Entity declaration in the mapped document, the key takeaway from all that equation)

https://docs.marklogic.com/datahub//flows/flow-definition.html#flow-definition__custom-step-settings

stepDefinitionName: .....Tip: If you are customizing a default step type (ingestion, mapping, or mastering), leave the value as default-ingestion, default-mapping, or default-mastering....

检查完以上内容后,请遵循 Data Hub 最佳实践并更正错误的手册 Steps 定义。如果您使用快速入门创建 FlowSteps,鉴于您对 MarkLogic Data Hub 的熟悉,则不会出现以下情况。

"steps": {
    "1": {
……………
      "stepDefinitionName": "productIngestion",
      "stepDefinitionType": "INGESTION",
……………

    

"2": {
      "name": "mapping-step",

      "stepDefinitionName": "productMapping",
      "stepDefinitionType": "MAPPING",
…………

        "mapping": {
          "name": "ingestionmapping-productMapping",
  1. Please cleanup your project structure and remove the contents in step-definitions folder. Project structure example ( the Pink part ):

  1. A working example of the Steps definitions is below. When in doubt, please validate the step in QuickStart.
{
  "name" : "ingestionmapping",
  "description" : "",
  "batchSize" : 100,
  "threadCount" : 4,
  "stopOnError" : false,
  "options" : { },
  "version" : 0,
  "steps" : {
    "1" : {
      "name" : "csv-ingest-step-json",
      "description" : "",
      "options" : {
        "additionalCollections" : [ ],
        "headers" : {
          "sources" : [ {
            "name" : "ingestionmapping"
          } ],
          "createdOn" : "currentDateTime",
          "createdBy" : "currentUser"
        },
        "sourceQuery" : "cts.collectionQuery([])",
        "collections" : [ "mapping-flow-ingestion-json" ],
        "permissions" : "data-hub-operator,read,data-hub-operator,update",
        "outputFormat" : "json",
        "targetDatabase" : "store-hub-STAGING"
      },
      "customHook" : {
        "module" : "",
        "parameters" : { },
        "user" : "",
        "runBefore" : false
      },
      "retryLimit" : 0,
      "batchSize" : 100,
      "threadCount" : 4,
      "stepDefinitionName" : "default-ingestion",
      "stepDefinitionType" : "INGESTION",
      "fileLocations" : {
        "inputFilePath" : "/mldhf/STORE/data/products/games",
        "inputFileType" : "csv",
        "outputURIReplacement" : ".*games*.,'/mapping-flow/json'",
        "separator" : ","
      }
    },
    "2" : {
      "name" : "mapping-step",
      "description" : "",
      "options" : {
        "additionalCollections" : [ ],
        "sourceQuery" : "cts.collectionQuery([\"mapping-flow-ingestion-json\"])",
        "mapping" : {
          "name" : "ingestionmapping-mapping-step",
          "version" : 1
        },
        "targetEntity" : "modifiedproduct",
        "sourceDatabase" : "store-hub-STAGING",
        "collections" : [ "mapping-flow-mapping-json", "mdm-content" ],
        "permissions" : "data-hub-operator,read,data-hub-operator,update",
        "validateEntity" : false,
        "sourceCollection" : "csv-ingest-step-json",
        "outputFormat" : "json",
        "targetDatabase" : "store-hub-FINAL"
      },
      "customHook" : {
        "module" : "",
        "parameters" : { },
        "user" : "",
        "runBefore" : false
      },
      "retryLimit" : null,
      "batchSize" : 100,
      "threadCount" : 4,
      "stepDefinitionName" : "entity-services-mapping",
      "stepDefinitionType" : "MAPPING"
    }
  }
}