Marklogic - 灵活的复制
Marklogic - flexible replication
我们一直在尝试在使用 MarkLogic 数据库的系统中设置灵活的复制。
我们按照 https://docs.marklogic.com/8.0/guide/flexrep/quick_start 的说明进行操作,并且已经能够在两个 MarkLogic 服务器之间设置灵活的复制。我们已验证在主文件中创建的新文档已复制到副本。但是,在我们配置复制之前,master 数据库目前有超过 4700 万条记录。一旦复制过程被触发,我们观察到文档被复制到副本的速度非常慢。在最初的两个小时内复制了大约 20,000 份文件。速度非常慢,要完全复制旧记录需要几个月的时间。
我们的问题是:
我们正在研究增加两台服务器的硬件规格,但除此之外,有人对我们如何加速复制有任何建议或文档吗?我找不到与此相关的任何现有文档?
如果做不到这一点,是否可以设置灵活的复制而无需复制初始数据集?仅供参考,我们还尝试克隆主数据库并将克隆用作副本。 (我们认为这可能意味着不必复制旧记录。)但是,在这种情况下,我们在副本服务器上遇到了 XDMP-NEWSTAMP 和 XDMP-EXTIME 错误,因此我们放弃了这种方法。副本上遇到的错误示例如下:
2017-08-03 18:45:04.376 Notice: exp-rest-content-flexrep:
XDMP-NEWSTAMP: Timestamp too new for forest exp-rest-content-001-1
(15017569242290900) 2017-08-03 18:45:04.376 Notice:
exp-rest-content-flexrep: in /apply.xqy [1.0-ml] 2017-08-03
18:45:04.379 Notice: TaskServer: XDMP-EXTIME: try { let
$raw-module-name := module-path($action-to-execute/p:module) let
$module-kind := module-kind($raw-module-name) let $module-name := if
($module-kind = "xquery" or $module-kind = "javascript") then
$raw-module-name else $cpfi:xslt-action return if ($module-name = "")
then fn:error((), "CPF-ACTIONNOTFOUND", "Default success") else if
($module-kind = "javascript") then (xdmp:trace("CPF Action Invoke",
fn:string-join(($caller, xdmp:get-current-user(), $uri,
$state-or-status, $raw-module-name), " ")), xdmp:invoke($module-name,
(fn:QName("","uri"), $uri, xs:QName("cpf:document-uri"), $uri,
fn:QName("","transition"), $chosen-transition,
options-var-js($action-to-execute)), $invoke-options)) else
(xdmp:trace("CPF Action Invoke", fn:string-join(($caller,
xdmp:get-current-user(), $uri, $state-or-status, $raw-module-name), "
")), xdmp:invoke($module-name, ($vars, xs:QName("cpf:transition"),
$chosen-transition, options-var($action-to-execute), if ($module-kind
= "xslt") then (xs:QName("cpf:stylesheet-uri"), $raw-module-name) else ()), $invoke-options)) } catch ($e) { let $trace := let $context :=
fn:concat($caller, " ", $uri, " action failed") return
(cpf:log(fn:string-join(($context, $e/err:format-string), " "),
"error"), cpf:log(($context, $e), "fine")) let $failure-action :=
($pipelines/p:failure-action)[1] let $raw-failure-module :=
module-path($failure-action/p:module) let $failure-kind :=
module-kind($raw-failure-module) let $failure-module := if
($failure-kind = "xquery" or $failure-kind = "javascript") then
$raw-failure-module else $cpfi:xslt-action return if ($failure-module
= "") then fn:error((), "CPF-ACTIONNOTFOUND", "Default failure action") else xdmp:invoke($failure-module, ($vars,
xs:QName("cpf:transition"), $chosen-transition,
options-var($failure-action), xs:QName("cpf:exception"), $e, if
($failure-kind = "xslt") then (xs:QName("cpf:stylesheet-uri"),
$raw-failure-module) else ()), $invoke-options) } -- Time limit
exceeded 2017-08-03 18:45:04.379 Notice: TaskServer: in
/MarkLogic/cpf/triggers/internal-cpf.xqy, at 213:4, 2017-08-03
18:45:04.379 Notice: TaskServer: in execute-action("on-state-enter",
"http://marklogic.com/states/initial", "/_smslogs/5849823.xml",
(xs:QName("trgr:uri"), "/_smslogs/5849823.xml",
xs:QName("trgr:trigger"), ...), different-transactiont...,
(fn:doc("http://marklogic.com/cpf/pipelines/12349495875628658916.xml")/p:pipeline,
fn:doc("http://marklogic.com/cpf/pipelines/3358424510998587926.xml")/p:pipeline,
fn:doc("http://marklogic.com/cpf/pipelines/13179541037342910978.xml")/p:pipeline,
...),
fn:doc("http://marklogic.com/cpf/pipelines/3358424510998587926.xml")/p:pipeline/p:state-transition[3]/p:default-action, fn:doc("http://marklogic.com/cpf/pipelines/3358424510998587926.xml")/p:pipeline/p:state-transition[3])
[1.0-ml] 2017-08-03 18:45:04.379 Notice: TaskServer: $caller =
"on-state-enter" 2017-08-03 18:45:04.379 Notice: TaskServer:
$state-or-status = "http://marklogic.com/states/initial" 2017-08-03
18:45:04.379 Notice: TaskServer: $uri = "/_smslogs/5849823.xml"
2017-08-03 18:45:04.379 Notice: TaskServer: $vars =
(xs:QName("trgr:uri"), "/_smslogs/5849823.xml",
xs:QName("trgr:trigger"), ...) 2017-08-03 18:45:04.379 Notice:
TaskServer: $invoke-options = different-transactiont...
2017-08-03 18:45:04.379 Notice: TaskServer: $pipelines =
(fn:doc("http://marklogic.com/cpf/pipelines/12349495875628658916.xml")/p:pipeline,
fn:doc("http://marklogic.com/cpf/pipelines/3358424510998587926.xml")/p:pipeline,
fn:doc("http://marklogic.com/cpf/pipelines/13179541037342910978.xml")/p:pipeline,
...) 2017-08-03 18:45:04.379 Notice: TaskServer: $action-to-execute
= fn:doc("http://marklogic.com/cpf/pipelines/3358424510998587926.xml")/p:pipeline/p:state-transition[3]/p:default-action 2017-08-03 18:45:04.379 Notice: TaskServer: $chosen-transition =
fn:doc("http://marklogic.com/cpf/pipelines/3358424510998587926.xml")/p:pipeline/p:state-transition[3]
2017-08-03 18:45:04.379 Notice: TaskServer: $e = XDMP-NEWSTAMP 2017-08-03 18:45:04.379 Notice: TaskServer: in
/MarkLogic/cpf/triggers/internal-cpf.xqy, at 342:6, 2017-08-03
18:45:04.379 Notice: TaskServer: in
execute-transition("on-state-enter",
"http://marklogic.com/states/initial", "/_smslogs/5849823.xml",
(xs:QName("trgr:uri"), "/_smslogs/5849823.xml",
xs:QName("trgr:trigger"), ...), 6551367241994447650, (fn:doc("http://marklogic.com/cpf/pipelines/12349495875628658916.xml")/p:pipeline,
fn:doc("http://marklogic.com/cpf/pipelines/3358424510998587926.xml")/p:pipeline,
fn:doc("http://marklogic.com/cpf/pipelines/13179541037342910978.xml")/p:pipeline,
...),
(fn:doc("http://marklogic.com/cpf/pipelines/12349495875628658916.xml")/p:pipeline/p:state-transition[2],
fn:doc("http://marklogic.com/cpf/pipelines/3358424510998587926.xml")/p:pipeline/p:state-transition[3],
fn:doc("http://marklogic.com/cpf/pipelines/13179541037342910978.xml")/p:pipeline/p:state-transition[1],
...), http://marklogic.com/states/initial)
[1.0-ml] 2017-08-03 18:45:04.379 Notice: TaskServer: $caller =
cpf:state("http://marklogic.com/states/initial") 2017-08-03
18:45:04.379 Notice: TaskServer: $state-or-status = () 2017-08-03
18:45:04.379 Notice: TaskServer: $uri = (xs:QName("trgr:uri"),
"/_smslogs/5849823.xml", xs:QName("trgr:trigger"), ...) 2017-08-03
18:45:04.379 Notice: TaskServer: in
/MarkLogic/cpf/triggers/internal-cpf.xqy, at 358:3, 2017-08-03
18:45:04.379 Notice: TaskServer: in
int:execute-state-transition("on-state-enter",
cpf:state("http://marklogic.com/states/initial"),
"/_smslogs/5849823.xml", (xs:QName("trgr:uri"),
"/_smslogs/5849823.xml", xs:QName("trgr:trigger"), ...), 6551367241994447650) [1.0-ml] 2017-08-03 18:45:04.379 Notice: TaskServer: $caller =
cpf:state("http://marklogic.com/states/initial") 2017-08-03
18:45:04.379 Notice: TaskServer: $state = () 2017-08-03 18:45:04.379
Notice: TaskServer: $uri = (xs:QName("trgr:uri"),
"/_smslogs/5849823.xml", xs:QName("trgr:trigger"), ...) 2017-08-03
18:45:04.379 Notice: TaskServer: in
/MarkLogic/cpf/triggers/on-state-enter.xqy, at 41:6 [1.0-ml]
2017-08-03 18:45:04.379 Notice: TaskServer: $state =
cpf:state("http://marklogic.com/states/initial") 2017-08-03
18:45:04.379 Notice: TaskServer: $trace = () 2017-08-03 18:45:04.379
Notice: TaskServer: $vars = (xs:QName("trgr:uri"),
"/_smslogs/5849823.xml", xs:QName("trgr:trigger"), ...) 2017-08-03
18:45:04.379 Notice: TaskServer: XDMP-NEWSTAMP: Timestamp too new for
forest exp-rest-content-001-1 (15017569242290900) 2017-08-03
18:45:04.379 Notice: exp-rest-content-flexrep: XDMP-NEWSTAMP:
Timestamp too new for forest exp-rest-content-001-1
(15017569242290900) 2017-08-03 18:45:04.379 Notice:
exp-rest-content-flexrep: in /apply.xqy [1.0-ml]
根据您引用的说明,您似乎正在使用立即复制,它将复制数据库中的所有 new/updated 记录,但不会复制现有记录。
要启用数据库中现有文档的复制(0 天文档),您需要在灵活复制目标配置页面(数据库 > [数据库名称] > 灵活复制 > 域 > [域名] > [目标名称] : 摘要 )
在页面底部你会看到立即推送选项,也就是true 默认情况下。如果将其设置为 false,则复制将由 push-local-forest 进程处理,并开始拾取 0 Day 文档。
至于提高文档从源复制到目标的速率,主要设置是每批文档数。我听到的指导是增加值,同时观察 push-local-forest.xqy 进程的执行时间(Configure > Groups > [group name] > Task Server: 状态并点击显示更多按钮;或http://servername:8002/dashboard查询执行 选项卡)。
通常 push-local-forest 每分钟处理 运行 秒,因此您要确保您的批次大小经过优化以能够在那一刻,否则你可以 运行 进入下一个 push-local-forest 进程将在第一个进程完成之前开始的情况,并且它们有可能开始堆叠并且死锁。
增加吞吐量的其他技巧是减少目标上的索引数量(如果可能),and/or使用带有巨型帧的 VLAN 进行集群间通信。
我们一直在尝试在使用 MarkLogic 数据库的系统中设置灵活的复制。 我们按照 https://docs.marklogic.com/8.0/guide/flexrep/quick_start 的说明进行操作,并且已经能够在两个 MarkLogic 服务器之间设置灵活的复制。我们已验证在主文件中创建的新文档已复制到副本。但是,在我们配置复制之前,master 数据库目前有超过 4700 万条记录。一旦复制过程被触发,我们观察到文档被复制到副本的速度非常慢。在最初的两个小时内复制了大约 20,000 份文件。速度非常慢,要完全复制旧记录需要几个月的时间。
我们的问题是:
我们正在研究增加两台服务器的硬件规格,但除此之外,有人对我们如何加速复制有任何建议或文档吗?我找不到与此相关的任何现有文档?
如果做不到这一点,是否可以设置灵活的复制而无需复制初始数据集?仅供参考,我们还尝试克隆主数据库并将克隆用作副本。 (我们认为这可能意味着不必复制旧记录。)但是,在这种情况下,我们在副本服务器上遇到了 XDMP-NEWSTAMP 和 XDMP-EXTIME 错误,因此我们放弃了这种方法。副本上遇到的错误示例如下:
2017-08-03 18:45:04.376 Notice: exp-rest-content-flexrep: XDMP-NEWSTAMP: Timestamp too new for forest exp-rest-content-001-1 (15017569242290900) 2017-08-03 18:45:04.376 Notice: exp-rest-content-flexrep: in /apply.xqy [1.0-ml] 2017-08-03 18:45:04.379 Notice: TaskServer: XDMP-EXTIME: try { let $raw-module-name := module-path($action-to-execute/p:module) let $module-kind := module-kind($raw-module-name) let $module-name := if ($module-kind = "xquery" or $module-kind = "javascript") then $raw-module-name else $cpfi:xslt-action return if ($module-name = "") then fn:error((), "CPF-ACTIONNOTFOUND", "Default success") else if ($module-kind = "javascript") then (xdmp:trace("CPF Action Invoke", fn:string-join(($caller, xdmp:get-current-user(), $uri, $state-or-status, $raw-module-name), " ")), xdmp:invoke($module-name, (fn:QName("","uri"), $uri, xs:QName("cpf:document-uri"), $uri, fn:QName("","transition"), $chosen-transition, options-var-js($action-to-execute)), $invoke-options)) else (xdmp:trace("CPF Action Invoke", fn:string-join(($caller, xdmp:get-current-user(), $uri, $state-or-status, $raw-module-name), " ")), xdmp:invoke($module-name, ($vars, xs:QName("cpf:transition"), $chosen-transition, options-var($action-to-execute), if ($module-kind = "xslt") then (xs:QName("cpf:stylesheet-uri"), $raw-module-name) else ()), $invoke-options)) } catch ($e) { let $trace := let $context := fn:concat($caller, " ", $uri, " action failed") return (cpf:log(fn:string-join(($context, $e/err:format-string), " "), "error"), cpf:log(($context, $e), "fine")) let $failure-action := ($pipelines/p:failure-action)[1] let $raw-failure-module := module-path($failure-action/p:module) let $failure-kind := module-kind($raw-failure-module) let $failure-module := if ($failure-kind = "xquery" or $failure-kind = "javascript") then $raw-failure-module else $cpfi:xslt-action return if ($failure-module = "") then fn:error((), "CPF-ACTIONNOTFOUND", "Default failure action") else xdmp:invoke($failure-module, ($vars, xs:QName("cpf:transition"), $chosen-transition, options-var($failure-action), xs:QName("cpf:exception"), $e, if ($failure-kind = "xslt") then (xs:QName("cpf:stylesheet-uri"), $raw-failure-module) else ()), $invoke-options) } -- Time limit exceeded 2017-08-03 18:45:04.379 Notice: TaskServer: in /MarkLogic/cpf/triggers/internal-cpf.xqy, at 213:4, 2017-08-03 18:45:04.379 Notice: TaskServer: in execute-action("on-state-enter", "http://marklogic.com/states/initial", "/_smslogs/5849823.xml", (xs:QName("trgr:uri"), "/_smslogs/5849823.xml", xs:QName("trgr:trigger"), ...), different-transactiont..., (fn:doc("http://marklogic.com/cpf/pipelines/12349495875628658916.xml")/p:pipeline, fn:doc("http://marklogic.com/cpf/pipelines/3358424510998587926.xml")/p:pipeline, fn:doc("http://marklogic.com/cpf/pipelines/13179541037342910978.xml")/p:pipeline, ...), fn:doc("http://marklogic.com/cpf/pipelines/3358424510998587926.xml")/p:pipeline/p:state-transition[3]/p:default-action, fn:doc("http://marklogic.com/cpf/pipelines/3358424510998587926.xml")/p:pipeline/p:state-transition[3]) [1.0-ml] 2017-08-03 18:45:04.379 Notice: TaskServer: $caller = "on-state-enter" 2017-08-03 18:45:04.379 Notice: TaskServer:
$state-or-status = "http://marklogic.com/states/initial" 2017-08-03 18:45:04.379 Notice: TaskServer: $uri = "/_smslogs/5849823.xml" 2017-08-03 18:45:04.379 Notice: TaskServer: $vars = (xs:QName("trgr:uri"), "/_smslogs/5849823.xml", xs:QName("trgr:trigger"), ...) 2017-08-03 18:45:04.379 Notice: TaskServer: $invoke-options = different-transactiont... 2017-08-03 18:45:04.379 Notice: TaskServer: $pipelines = (fn:doc("http://marklogic.com/cpf/pipelines/12349495875628658916.xml")/p:pipeline, fn:doc("http://marklogic.com/cpf/pipelines/3358424510998587926.xml")/p:pipeline, fn:doc("http://marklogic.com/cpf/pipelines/13179541037342910978.xml")/p:pipeline, ...) 2017-08-03 18:45:04.379 Notice: TaskServer: $action-to-execute = fn:doc("http://marklogic.com/cpf/pipelines/3358424510998587926.xml")/p:pipeline/p:state-transition[3]/p:default-action 2017-08-03 18:45:04.379 Notice: TaskServer: $chosen-transition = fn:doc("http://marklogic.com/cpf/pipelines/3358424510998587926.xml")/p:pipeline/p:state-transition[3] 2017-08-03 18:45:04.379 Notice: TaskServer: $e = XDMP-NEWSTAMP 2017-08-03 18:45:04.379 Notice: TaskServer: in /MarkLogic/cpf/triggers/internal-cpf.xqy, at 342:6, 2017-08-03 18:45:04.379 Notice: TaskServer: in execute-transition("on-state-enter", "http://marklogic.com/states/initial", "/_smslogs/5849823.xml", (xs:QName("trgr:uri"), "/_smslogs/5849823.xml", xs:QName("trgr:trigger"), ...), 6551367241994447650, (fn:doc("http://marklogic.com/cpf/pipelines/12349495875628658916.xml")/p:pipeline, fn:doc("http://marklogic.com/cpf/pipelines/3358424510998587926.xml")/p:pipeline, fn:doc("http://marklogic.com/cpf/pipelines/13179541037342910978.xml")/p:pipeline, ...), (fn:doc("http://marklogic.com/cpf/pipelines/12349495875628658916.xml")/p:pipeline/p:state-transition[2], fn:doc("http://marklogic.com/cpf/pipelines/3358424510998587926.xml")/p:pipeline/p:state-transition[3], fn:doc("http://marklogic.com/cpf/pipelines/13179541037342910978.xml")/p:pipeline/p:state-transition[1], ...), http://marklogic.com/states/initial) [1.0-ml] 2017-08-03 18:45:04.379 Notice: TaskServer: $caller = cpf:state("http://marklogic.com/states/initial") 2017-08-03 18:45:04.379 Notice: TaskServer: $state-or-status = () 2017-08-03 18:45:04.379 Notice: TaskServer: $uri = (xs:QName("trgr:uri"), "/_smslogs/5849823.xml", xs:QName("trgr:trigger"), ...) 2017-08-03 18:45:04.379 Notice: TaskServer: in /MarkLogic/cpf/triggers/internal-cpf.xqy, at 358:3, 2017-08-03 18:45:04.379 Notice: TaskServer: in int:execute-state-transition("on-state-enter", cpf:state("http://marklogic.com/states/initial"), "/_smslogs/5849823.xml", (xs:QName("trgr:uri"), "/_smslogs/5849823.xml", xs:QName("trgr:trigger"), ...), 6551367241994447650) [1.0-ml] 2017-08-03 18:45:04.379 Notice: TaskServer: $caller = cpf:state("http://marklogic.com/states/initial") 2017-08-03 18:45:04.379 Notice: TaskServer: $state = () 2017-08-03 18:45:04.379 Notice: TaskServer: $uri = (xs:QName("trgr:uri"), "/_smslogs/5849823.xml", xs:QName("trgr:trigger"), ...) 2017-08-03 18:45:04.379 Notice: TaskServer: in /MarkLogic/cpf/triggers/on-state-enter.xqy, at 41:6 [1.0-ml] 2017-08-03 18:45:04.379 Notice: TaskServer: $state = cpf:state("http://marklogic.com/states/initial") 2017-08-03 18:45:04.379 Notice: TaskServer: $trace = () 2017-08-03 18:45:04.379 Notice: TaskServer: $vars = (xs:QName("trgr:uri"), "/_smslogs/5849823.xml", xs:QName("trgr:trigger"), ...) 2017-08-03 18:45:04.379 Notice: TaskServer: XDMP-NEWSTAMP: Timestamp too new for forest exp-rest-content-001-1 (15017569242290900) 2017-08-03 18:45:04.379 Notice: exp-rest-content-flexrep: XDMP-NEWSTAMP: Timestamp too new for forest exp-rest-content-001-1 (15017569242290900) 2017-08-03 18:45:04.379 Notice: exp-rest-content-flexrep: in /apply.xqy [1.0-ml]
根据您引用的说明,您似乎正在使用立即复制,它将复制数据库中的所有 new/updated 记录,但不会复制现有记录。
要启用数据库中现有文档的复制(0 天文档),您需要在灵活复制目标配置页面(数据库 > [数据库名称] > 灵活复制 > 域 > [域名] > [目标名称] : 摘要 )
在页面底部你会看到立即推送选项,也就是true 默认情况下。如果将其设置为 false,则复制将由 push-local-forest 进程处理,并开始拾取 0 Day 文档。
至于提高文档从源复制到目标的速率,主要设置是每批文档数。我听到的指导是增加值,同时观察 push-local-forest.xqy 进程的执行时间(Configure > Groups > [group name] > Task Server: 状态并点击显示更多按钮;或http://servername:8002/dashboard查询执行 选项卡)。
通常 push-local-forest 每分钟处理 运行 秒,因此您要确保您的批次大小经过优化以能够在那一刻,否则你可以 运行 进入下一个 push-local-forest 进程将在第一个进程完成之前开始的情况,并且它们有可能开始堆叠并且死锁。
增加吞吐量的其他技巧是减少目标上的索引数量(如果可能),and/or使用带有巨型帧的 VLAN 进行集群间通信。