Solr 使用相邻查询关键字获得更好的搜索结果
Solr Better search result with adjacent query keyword
我已经为我的电子商务应用程序配置了 solr(主要包含书籍数据)。搜索结果似乎return我所期望的。
配置如下
schema.xml
`
<field name="namespace" type="string" indexed="true" stored="false" />
<field name="id" type="string" indexed="true" stored="true" />
<field name="productId" type="long" indexed="true" stored="true" />
<field name="skuId" type="long" indexed="true" stored="true" />
<field name="category" type="long" indexed="true" stored="false" multiValued="true" />
<field name="explicitCategory" type="long" indexed="true" stored="false" multiValued="true" />
<field name="searchable" type="text_general" indexed="true" stored="false" />
<dynamicField name="*_searchable" type="text_general" indexed="true" stored="false" />
<dynamicField name="*_i" type="int" indexed="true" stored="false" />
<dynamicField name="*_is" type="int" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_s" type="string" indexed="true" stored="false" />
<dynamicField name="*_ss" type="string" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_l" type="long" indexed="true" stored="false" />
<dynamicField name="*_ls" type="long" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_t" type="text_general" indexed="true" stored="false" />
<dynamicField name="*_txt" type="text_general" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_b" type="boolean" indexed="true" stored="false" />
<dynamicField name="*_bs" type="boolean" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_d" type="double" indexed="true" stored="false" />
<dynamicField name="*_ds" type="double" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_p" type="double" indexed="true" stored="false" />
<dynamicField name="*_dt" type="date" indexed="true" stored="false" />
<dynamicField name="*_dts" type="date" indexed="true" stored="false" multiValued="true" />
<!-- some trie-coded dynamic fields for faster range queries -->
<dynamicField name="*_ti" type="tint" indexed="true" stored="false" />
<dynamicField name="*_tl" type="tlong" indexed="true" stored="false" />
<dynamicField name="*_td" type="tdouble" indexed="true" stored="false" />
<dynamicField name="*_tdt" type="tdate" indexed="true" stored="false" />
<!-- Both field types required for geolocation searches. First stores the
lat and lon components for the "coordinate" FieldType. Second stores
the coordinate. -->
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
<dynamicField name="*_c" type="coordinate" indexed="true" stored="false"/>
</fields>
<uniqueKey>id</uniqueKey>
<types>
<!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
<!-- boolean type: "true" or "false" -->
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" />
<!-- Default numeric field types. For faster range queries, consider the
tint/tlong/tdouble types. -->
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0" />
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" />
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0" />
<!-- Numeric field types that index each value at various levels of precision
to accelerate range queries when the number of values between the range endpoints
is large. See the javadoc for NumericRangeQuery for internal implementation
details. Smaller precisionStep values (specified in bits) will lead to more
tokens indexed per value, slightly larger index size, and faster range queries.
A precisionStep of 0 disables indexing at different precision levels. -->
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0" />
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0" />
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0" />
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z,
and is a more restricted form of the canonical representation of dateTime
http://www.w3.org/TR/xmlschema-2/#dateTime The trailing "Z" designates UTC
time and is mandatory. Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
All other components are mandatory. Expressions can also be used to denote
calculations that should be performed relative to "NOW" to determine the
value, ie... NOW/HOUR ... Round to the start of the current hour NOW-1DAY
... Exactly 1 day prior to now NOW/DAY+6MONTHS+3DAYS ... 6 months and 3 days
in the future from the start of the current day Consult the DateField javadocs
for more information. Note: For faster range queries, consider the tdate
type -->
<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0" />
<!-- A Trie based date field for faster date range queries and date faceting. -->
<fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0" />
<!-- A general text field that has reasonable, generic cross-language defaults:
it tokenizes with StandardTokenizer and down cases. -->
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
<fieldType name="coordinate" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
</types>
`
solrconfig.xml
<?xml version="1.0" encoding="UTF-8" ?>
<config>
<luceneMatchVersion>4.10.3</luceneMatchVersion>
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}" />
<updateHandler class="solr.DirectUpdateHandler2" />
<query>
<maxBooleanClauses>1024</maxBooleanClauses>
<filterCache class="solr.FastLRUCache" size="512" initialSize="512" autowarmCount="0" />
<queryResultCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0" />
<documentCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0" />
<cache name="perSegFilter" class="solr.search.LRUCache" size="10" initialSize="0" autowarmCount="10"
regenerator="solr.NoOpRegenerator" />
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<queryResultWindowSize>20</queryResultWindowSize>
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
<listener event="newSearcher" class="solr.QuerySenderListener" />
<listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst>
<str name="q">static firstSearcher warming in solrconfig.xml</str>
</lst>
</arr>
</listener>
<useColdSearcher>false</useColdSearcher>
<maxWarmingSearchers>2</maxWarmingSearchers>
</query>
<requestDispatcher handleSelect="false">
<requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048000" formdataUploadLimitInKB="2048"
addHttpRequestToContext="false"/>
<httpCaching never304="true" />
</requestDispatcher>
<requestHandler name="/select" class="solr.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
<int name="rowsa">10</int>
<str name="df">name_t</str>
</lst>
</requestHandler>
<queryResponseWriter name="json" class="solr.JSONResponseWriter">
<str name="content-type">text/plain; charset=UTF-8</str>
</queryResponseWriter>
例如,当我搜索 2 个州时,它会给我很多随机结果,标题中甚至不包含 2 个州。
然而,当我在短语“2 States”中搜索 2 states 时,我确实得到了相关结果
我不想将每次搜索都限制在引号中,因为用户可能会搜索像 "book by author" 这样的组合,如果在词组中搜索肯定会得到 0 个结果,因为它不会匹配确切的词组。
我怎样才能改进我的搜索,以便我可以在顶部列出最相关的结果。
您可以在 edismax
处理程序中使用 pf2
和 pf3
参数来提升两个 (pf2
) 或三个 (pf3
) 您的术语在该字段中相继出现。
defType=edismax&pf2=title^4
您还有 pf
常规 dismax 处理程序的参数,但这是建立在所有项都靠得很近的假设之上的。这可能会有所帮助,但 pf2
或 pf3
听起来更适合您的需要。
我已经为我的电子商务应用程序配置了 solr(主要包含书籍数据)。搜索结果似乎return我所期望的。
配置如下
schema.xml `
<field name="namespace" type="string" indexed="true" stored="false" />
<field name="id" type="string" indexed="true" stored="true" />
<field name="productId" type="long" indexed="true" stored="true" />
<field name="skuId" type="long" indexed="true" stored="true" />
<field name="category" type="long" indexed="true" stored="false" multiValued="true" />
<field name="explicitCategory" type="long" indexed="true" stored="false" multiValued="true" />
<field name="searchable" type="text_general" indexed="true" stored="false" />
<dynamicField name="*_searchable" type="text_general" indexed="true" stored="false" />
<dynamicField name="*_i" type="int" indexed="true" stored="false" />
<dynamicField name="*_is" type="int" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_s" type="string" indexed="true" stored="false" />
<dynamicField name="*_ss" type="string" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_l" type="long" indexed="true" stored="false" />
<dynamicField name="*_ls" type="long" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_t" type="text_general" indexed="true" stored="false" />
<dynamicField name="*_txt" type="text_general" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_b" type="boolean" indexed="true" stored="false" />
<dynamicField name="*_bs" type="boolean" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_d" type="double" indexed="true" stored="false" />
<dynamicField name="*_ds" type="double" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_p" type="double" indexed="true" stored="false" />
<dynamicField name="*_dt" type="date" indexed="true" stored="false" />
<dynamicField name="*_dts" type="date" indexed="true" stored="false" multiValued="true" />
<!-- some trie-coded dynamic fields for faster range queries -->
<dynamicField name="*_ti" type="tint" indexed="true" stored="false" />
<dynamicField name="*_tl" type="tlong" indexed="true" stored="false" />
<dynamicField name="*_td" type="tdouble" indexed="true" stored="false" />
<dynamicField name="*_tdt" type="tdate" indexed="true" stored="false" />
<!-- Both field types required for geolocation searches. First stores the
lat and lon components for the "coordinate" FieldType. Second stores
the coordinate. -->
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
<dynamicField name="*_c" type="coordinate" indexed="true" stored="false"/>
</fields>
<uniqueKey>id</uniqueKey>
<types>
<!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
<!-- boolean type: "true" or "false" -->
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" />
<!-- Default numeric field types. For faster range queries, consider the
tint/tlong/tdouble types. -->
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0" />
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" />
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0" />
<!-- Numeric field types that index each value at various levels of precision
to accelerate range queries when the number of values between the range endpoints
is large. See the javadoc for NumericRangeQuery for internal implementation
details. Smaller precisionStep values (specified in bits) will lead to more
tokens indexed per value, slightly larger index size, and faster range queries.
A precisionStep of 0 disables indexing at different precision levels. -->
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0" />
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0" />
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0" />
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z,
and is a more restricted form of the canonical representation of dateTime
http://www.w3.org/TR/xmlschema-2/#dateTime The trailing "Z" designates UTC
time and is mandatory. Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
All other components are mandatory. Expressions can also be used to denote
calculations that should be performed relative to "NOW" to determine the
value, ie... NOW/HOUR ... Round to the start of the current hour NOW-1DAY
... Exactly 1 day prior to now NOW/DAY+6MONTHS+3DAYS ... 6 months and 3 days
in the future from the start of the current day Consult the DateField javadocs
for more information. Note: For faster range queries, consider the tdate
type -->
<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0" />
<!-- A Trie based date field for faster date range queries and date faceting. -->
<fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0" />
<!-- A general text field that has reasonable, generic cross-language defaults:
it tokenizes with StandardTokenizer and down cases. -->
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
<fieldType name="coordinate" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
</types>
`
solrconfig.xml
<?xml version="1.0" encoding="UTF-8" ?>
<config>
<luceneMatchVersion>4.10.3</luceneMatchVersion>
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}" />
<updateHandler class="solr.DirectUpdateHandler2" />
<query>
<maxBooleanClauses>1024</maxBooleanClauses>
<filterCache class="solr.FastLRUCache" size="512" initialSize="512" autowarmCount="0" />
<queryResultCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0" />
<documentCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0" />
<cache name="perSegFilter" class="solr.search.LRUCache" size="10" initialSize="0" autowarmCount="10"
regenerator="solr.NoOpRegenerator" />
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<queryResultWindowSize>20</queryResultWindowSize>
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
<listener event="newSearcher" class="solr.QuerySenderListener" />
<listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst>
<str name="q">static firstSearcher warming in solrconfig.xml</str>
</lst>
</arr>
</listener>
<useColdSearcher>false</useColdSearcher>
<maxWarmingSearchers>2</maxWarmingSearchers>
</query>
<requestDispatcher handleSelect="false">
<requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048000" formdataUploadLimitInKB="2048"
addHttpRequestToContext="false"/>
<httpCaching never304="true" />
</requestDispatcher>
<requestHandler name="/select" class="solr.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
<int name="rowsa">10</int>
<str name="df">name_t</str>
</lst>
</requestHandler>
<queryResponseWriter name="json" class="solr.JSONResponseWriter">
<str name="content-type">text/plain; charset=UTF-8</str>
</queryResponseWriter>
例如,当我搜索 2 个州时,它会给我很多随机结果,标题中甚至不包含 2 个州。
然而,当我在短语“2 States”中搜索 2 states 时,我确实得到了相关结果
我不想将每次搜索都限制在引号中,因为用户可能会搜索像 "book by author" 这样的组合,如果在词组中搜索肯定会得到 0 个结果,因为它不会匹配确切的词组。
我怎样才能改进我的搜索,以便我可以在顶部列出最相关的结果。
您可以在 edismax
处理程序中使用 pf2
和 pf3
参数来提升两个 (pf2
) 或三个 (pf3
) 您的术语在该字段中相继出现。
defType=edismax&pf2=title^4
您还有 pf
常规 dismax 处理程序的参数,但这是建立在所有项都靠得很近的假设之上的。这可能会有所帮助,但 pf2
或 pf3
听起来更适合您的需要。