Marklogic 中的 NEAR 查询问题

Issue with NEAR query in Marklogic

我有一个关于近查询的 st运行ge 问题..

let $xml :=
  <titles count="6">
    <title type="source">ASIA-PACIFIC JOURNAL OF CLINICAL ONCOLOGY</title>
    <title type="source_abbrev">ASIA-PAC J CLIN ONCO</title>
    <title type="abbrev_iso">Asia-Pac. J. Clin. Oncol.</title>
    <title type="abbrev_11">ASIA-PAC J</title>
    <title type="abbrev_29">ASIA-PAC J CLIN ONCOL</title>
    <title type="item">Phase II study of cetuximab with irinotecan for KRAS wild-type colorectal cancer in Japanese patients</title>
   </titles>

最初我运行这个查询

let $q1 := 
      cts:element-query((xs:QName("title")),
          cts:word-query(("phase 0","phase 1","phase 2","phase 3","phase 4","phase I","phase ii","phase iii","phase iv"),
          ("case-insensitive", "wildcarded"))
        )
return
  cts:highlight($xml,$q1, <b>{$cts:text}</b>)

我得到了结果,是正确的

现在我运行这个,我得到了以下正确的结果

let $q2 := 
      cts:element-query((xs:QName("title")),
          cts:word-query(("trial*", "study", "studies*"),
          ("case-insensitive", "wildcarded"))
        )

return
  cts:highlight($xml,$q2, <b>{$cts:text}</b>)

然后我 运行 使用 NEAR/0 进行以下查询,但我没有得到任何

let $q3 :=
    cts:near-query((
              cts:element-query((xs:QName("title")),
                cts:word-query(("phase 0","phase 1","phase 2","phase 3","phase 4","phase I","phase ii","phase iii","phase iv"),
                  ("case-insensitive", "wildcarded")))
          ,
             cts:element-query((xs:QName("title")),
                cts:word-query(("trial*", "study", "studies*"),
                  ("case-insensitive", "wildcarded")))
         ),
         0,
         ('ordered'))

return
  cts:highlight($xml,$q3, <b>{$cts:text}</b>)

但后来我 运行 使用 NEAR/1 进行查询,我得到了结果.. 但这是为什么呢? pharse 1 紧接着 pharse 2 。所以 NEAR 距离应该是 0 对吗?

let $q3 :=
    cts:near-query((
              cts:element-query((xs:QName("title")),
                cts:word-query(("phase 0","phase 1","phase 2","phase 3","phase 4","phase I","phase ii","phase iii","phase iv"),
                  ("case-insensitive", "wildcarded")))
          ,
             cts:element-query((xs:QName("title")),
                cts:word-query(("trial*", "study", "studies*"),
                  ("case-insensitive", "wildcarded")))
         ),
         1,
         ('ordered'))

return
  cts:highlight($xml,$q3, <b>{$cts:text}</b>)

我相信 MarkLogic 索引单词距离,从位置 0 的锚词开始,随后的标记距离 1 等。为了查询相邻单词,您需要使用 1 的近查询距离。您示例中的查询执行正确。

借用 MarkLogic cts:near-query 文档:

xquery version "1.0-ml";
let $x := <p>Now is the winter of our discontent</p>
return
cts:contains($x, cts:near-query(
                    ("now", "the"),
                    2, "ordered"));

(: => returns true, "the" is 2 words from "now" :)

let $x := <p>Now is the winter of our discontent</p>
return
cts:contains($x, cts:near-query(
                    ("now", "is"),
                    1, "ordered"));
(: => returns true, "is" is 1 word from "now" :)

let $x := <p>Now is the winter of our discontent</p>
return
cts:contains($x, cts:near-query(
                    ("now", "is"),
                    0, "ordered"));

(: => returns false, "is" is 1 word from "now" :)