Marklogic 搜索语法问题
Marklogic search grammar issue
我的印象是,当搜索短语用双引号引起来时,它会进行精确搜索。但是我也得到了部分匹配(即使分数很低)。我期待它应该完全匹配。以下是我的示例代码..我错过了什么吗
xquery version "1.0-ml";
declare namespace html = "http://www.w3.org/1999/xhtml";
import module namespace search = "http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy";
import module namespace functx = "http://www.functx.com" at "/MarkLogic/functx/functx-1.0-doc-2007-01.xqy";
let $q := '(“protein degradation”) AND ((context:PCS)) AND (sort:date_desc)'
let $options :=
<options xmlns="http://marklogic.com/appservices/search">
<additional-query>
<cts:collection-query xmlns:cts="http://marklogic.com/cts">
<cts:uri>http://XXXXX/type/envelope</cts:uri>
</cts:collection-query>
</additional-query>
<operator name="sort">
<state name="date_desc">
<sort-order type="xs:dateTime" direction="descending">
<field name="upload_date"/>
</sort-order>
</state>
<state name="date_asc">
<sort-order type="xs:dateTime" direction="ascending">
<field name="upload_date"/>
</sort-order>
</state>
</operator>
<constraint name="context">
<range type="xs:string" facet="true">
<element name="context" ns="http://XXXXX/metadata"/>
<facet-option>frequency-order</facet-option>
<facet-option>descending</facet-option>
</range>
</constraint>
<constraint name="type">
<range type="xs:string" facet="true">
<element name="type" ns="http://XXXXX/metadata"/>
<facet-option>frequency-order</facet-option>
<facet-option>descending</facet-option>
</range>
</constraint>
<term>
<term-option>case-insensitive</term-option>
<term-option>punctuation-insensitive</term-option>
<term-option>whitespace-insensitive</term-option>
<term-option>wildcarded</term-option>
</term>
<search-option>unfiltered</search-option>
</options>
let $start := 1
let $page-length :=1
let $result := search:search($q, $options, $start, $page-length)
return $result
下面是我得到的..我很困惑..我不是为什么下面的结果很受欢迎
<search:result index="1" uri="/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml" path="fn:doc("/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml")" score="58624" confidence="0.329381" fitness="0.5856407">
<search:snippet>
<search:match path="fn:doc("/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml")/*:document-envelope/*:metadata/*:context">
<search:highlight>PCS</search:highlight>
</search:match>
<search:match path="fn:doc("/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml")/*:document-envelope/*:extractedText/*:html/*:body/*:p[1]">
Analysis of the Safety Risks Associated with Hydrazine as a <search:highlight>Degradation</search:highlight> Product in LCIG RD12714 ra-rd12714-hydrazine</search:match>
<search:match path="fn:doc("/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml")/*:document-envelope/*:extractedText/*:html/*:body/*:p[9]">...of the Safety Risks Associated with Hydrazine as a <search:highlight>Degradation</search:highlight> Product in...</search:match>
</search:snippet>
</search:result>
如果我们在上面的结果中注意到它匹配 <search:highlight>Degradation</search:highlight>
... 为什么当我们尝试进行精确搜索时它会进行部分匹配?
----- 添加了 search:parse 输出 ------
<cts:and-query xmlns:cts="http://marklogic.com/cts" xmlns:search="http://marklogic.com/appservices/search">
<cts:word-query>
<cts:text xml:lang="en">“protein</cts:text>
<cts:option>case-insensitive</cts:option>
<cts:option>punctuation-insensitive</cts:option>
<cts:option>whitespace-insensitive</cts:option>
<cts:option>wildcarded</cts:option>
</cts:word-query>
<cts:word-query>
<cts:text xml:lang="en">degradation”</cts:text>
<cts:option>case-insensitive</cts:option>
<cts:option>punctuation-insensitive</cts:option>
<cts:option>whitespace-insensitive</cts:option>
<cts:option>wildcarded</cts:option>
</cts:word-query>
<cts:element-range-query operator="=">
<cts:element xmlns:_1="http://XXXXX/metadata">_1:context</cts:element>
<cts:value xsi:type="xs:string" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">PCS</cts:value>
<cts:option>collation=http://marklogic.com/collation/</cts:option>
</cts:element-range-query>
<cts:annotation operator-ref="sort" state-ref="date_desc">
</cts:annotation>
</cts:and-query>
我建议您使用名为 "exact"
的 term-option
来自文档:
"exact"
精确匹配查询。 Shorthand 用于 "case-sensitive"、"diacritic-sensitive"、"punctuation-sensitive"、"whitespace-sensitive"、"unstemmed" 和 "unwildcarded"。
我认为问题出在花哨的引号上:
import module namespace search = "http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy";
search:parse('"protein degradation"')
给出:
<cts:word-query xmlns:cts="http://marklogic.com/cts">
<cts:text xml:lang="en">protein degradation</cts:text>
</cts:word-query>
同时:
import module namespace search = "http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy";
search:parse('“protein degradation”')
给出:
<cts:and-query xmlns:cts="http://marklogic.com/cts">
<cts:word-query>
<cts:text xml:lang="en">“protein</cts:text>
</cts:word-query>
<cts:word-query>
<cts:text xml:lang="en">degradation”</cts:text>
</cts:word-query>
</cts:and-query>
我的印象是,当搜索短语用双引号引起来时,它会进行精确搜索。但是我也得到了部分匹配(即使分数很低)。我期待它应该完全匹配。以下是我的示例代码..我错过了什么吗
xquery version "1.0-ml";
declare namespace html = "http://www.w3.org/1999/xhtml";
import module namespace search = "http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy";
import module namespace functx = "http://www.functx.com" at "/MarkLogic/functx/functx-1.0-doc-2007-01.xqy";
let $q := '(“protein degradation”) AND ((context:PCS)) AND (sort:date_desc)'
let $options :=
<options xmlns="http://marklogic.com/appservices/search">
<additional-query>
<cts:collection-query xmlns:cts="http://marklogic.com/cts">
<cts:uri>http://XXXXX/type/envelope</cts:uri>
</cts:collection-query>
</additional-query>
<operator name="sort">
<state name="date_desc">
<sort-order type="xs:dateTime" direction="descending">
<field name="upload_date"/>
</sort-order>
</state>
<state name="date_asc">
<sort-order type="xs:dateTime" direction="ascending">
<field name="upload_date"/>
</sort-order>
</state>
</operator>
<constraint name="context">
<range type="xs:string" facet="true">
<element name="context" ns="http://XXXXX/metadata"/>
<facet-option>frequency-order</facet-option>
<facet-option>descending</facet-option>
</range>
</constraint>
<constraint name="type">
<range type="xs:string" facet="true">
<element name="type" ns="http://XXXXX/metadata"/>
<facet-option>frequency-order</facet-option>
<facet-option>descending</facet-option>
</range>
</constraint>
<term>
<term-option>case-insensitive</term-option>
<term-option>punctuation-insensitive</term-option>
<term-option>whitespace-insensitive</term-option>
<term-option>wildcarded</term-option>
</term>
<search-option>unfiltered</search-option>
</options>
let $start := 1
let $page-length :=1
let $result := search:search($q, $options, $start, $page-length)
return $result
下面是我得到的..我很困惑..我不是为什么下面的结果很受欢迎
<search:result index="1" uri="/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml" path="fn:doc("/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml")" score="58624" confidence="0.329381" fitness="0.5856407">
<search:snippet>
<search:match path="fn:doc("/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml")/*:document-envelope/*:metadata/*:context">
<search:highlight>PCS</search:highlight>
</search:match>
<search:match path="fn:doc("/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml")/*:document-envelope/*:extractedText/*:html/*:body/*:p[1]">
Analysis of the Safety Risks Associated with Hydrazine as a <search:highlight>Degradation</search:highlight> Product in LCIG RD12714 ra-rd12714-hydrazine</search:match>
<search:match path="fn:doc("/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml")/*:document-envelope/*:extractedText/*:html/*:body/*:p[9]">...of the Safety Risks Associated with Hydrazine as a <search:highlight>Degradation</search:highlight> Product in...</search:match>
</search:snippet>
</search:result>
如果我们在上面的结果中注意到它匹配 <search:highlight>Degradation</search:highlight>
... 为什么当我们尝试进行精确搜索时它会进行部分匹配?
----- 添加了 search:parse 输出 ------
<cts:and-query xmlns:cts="http://marklogic.com/cts" xmlns:search="http://marklogic.com/appservices/search">
<cts:word-query>
<cts:text xml:lang="en">“protein</cts:text>
<cts:option>case-insensitive</cts:option>
<cts:option>punctuation-insensitive</cts:option>
<cts:option>whitespace-insensitive</cts:option>
<cts:option>wildcarded</cts:option>
</cts:word-query>
<cts:word-query>
<cts:text xml:lang="en">degradation”</cts:text>
<cts:option>case-insensitive</cts:option>
<cts:option>punctuation-insensitive</cts:option>
<cts:option>whitespace-insensitive</cts:option>
<cts:option>wildcarded</cts:option>
</cts:word-query>
<cts:element-range-query operator="=">
<cts:element xmlns:_1="http://XXXXX/metadata">_1:context</cts:element>
<cts:value xsi:type="xs:string" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">PCS</cts:value>
<cts:option>collation=http://marklogic.com/collation/</cts:option>
</cts:element-range-query>
<cts:annotation operator-ref="sort" state-ref="date_desc">
</cts:annotation>
</cts:and-query>
我建议您使用名为 "exact"
的 term-option来自文档: "exact" 精确匹配查询。 Shorthand 用于 "case-sensitive"、"diacritic-sensitive"、"punctuation-sensitive"、"whitespace-sensitive"、"unstemmed" 和 "unwildcarded"。
我认为问题出在花哨的引号上:
import module namespace search = "http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy";
search:parse('"protein degradation"')
给出:
<cts:word-query xmlns:cts="http://marklogic.com/cts">
<cts:text xml:lang="en">protein degradation</cts:text>
</cts:word-query>
同时:
import module namespace search = "http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy";
search:parse('“protein degradation”')
给出:
<cts:and-query xmlns:cts="http://marklogic.com/cts">
<cts:word-query>
<cts:text xml:lang="en">“protein</cts:text>
</cts:word-query>
<cts:word-query>
<cts:text xml:lang="en">degradation”</cts:text>
</cts:word-query>
</cts:and-query>