用于评分搜索的 Solr 配置
Solr configuration for scored search
我正在尝试设置一个 Solr 索引来搜索产品信息数据库。为此,我填充了一个产品详细信息数据库并使用了 Solr 6.0.0。对于给定的产品详细信息(标题、品牌、其他关键字),我想知道数据库中是否有与给定详细信息非常匹配的产品。我已经开始数据导入并创建了索引。但是,我搜索时,尽管列出的产品不同,但匹配的产品的分数都是一样的。我尝试了不同的搜索关键字组合,但结果在每种情况下都是相似的。我也尝试过使用不同的分词器和过滤器。
我试过的schema.xml
样本是:
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="example" version="1.5">
<field name="id" type="Int" indexed="true" stored="true"/>
<field name="name" type="text_general" indexed="true" stored="true" />
<field name="brand" type="text_general" indexed="true" stored="true"/>
<field name="category" type="text_general" indexed="true" stored="true"/>
<field name="description" type="text_general" indexed="true" stored="true" />
<field name="catchall" type="text_general" indexed="true" stored="true" multiValued="true" />
<copyField source="id" dest="catchall" />
<copyField source="name" dest="catchall" />
<copyField source="brand" dest="catchall" />
<copyField source="category" dest="catchall" />
<copyField source="description" dest="catchall" />
<uniqueKey>id</uniqueKey>
<defaultSearchField>catchall</defaultSearchField>
<types>
<fieldtype name="string" class="solr.StrField" sortMissingLast="true" />
<fieldtype name="Int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
<fieldtype name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1"
splitOnNumerics="1"
splitOnCaseChange="1"
generateNumberParts="1"
catenateWords="0"
catenateNumbers="0"
catenateAll="0"
preserveOriginal="1"
/>
<filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="true"/>
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1"
splitOnNumerics="1"
splitOnCaseChange="1"
generateNumberParts="1"
catenateWords="0"
catenateNumbers="0"
catenateAll="0"
preserveOriginal="1"
/>
<filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="true"/>
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
</types>
</schema>
编辑
来自data-config.xml
的实体定义如下
<entity name="master_products"
pk="id"
query="select p.* ,b.* from master_products p ,master_brands b where b.id=p.brand_id"
deltaImportQuery="SELECT * FROM master_products WHERE product_name='${dataimporter.delta.product_name}' "
>
<!-- or b.brnad='${dataimporter.delta.brand}' -->
<field column="product_name" name="name"/>
<field column="product_description" name="description"/>
<field column="id" name="id"/>
<field column="mrp" name="mrp"/>
<field column="brand" name="brand"/>
<entity name="master_brands"
query="select * from master_brands"
deltaImportQuery="select * from master_brands where id ={master_products.brand_id}" processor="SqlEntityProcessor" cacheImpl="SortedMapBackedCache" >
</entity>
<entity name="master_product_categories"
query="select * from master_product_categories"
deltaImportQuery="select * from master_product_categories where id ={master_products. product_category_id}" processor="SqlEntityProcessor" cacheImpl="SortedMapBackedCache" >
<field column="category" name="category" />
</entity>
</entity>
编辑
查询如下
http://localhost:8983/solr/myproducts/select?fl=* score&fq=brand:Nikon&fq=mrp:28950*&indent=on&q=name:*"Nikon D3200 (Black) DSLR with AF-S 18-55mm VR Kit Lens"*&wt=json
我希望获得帮助以实现我的目标。您能否指导我创建符合我目的的正确配置?提前致谢。
Wildcard queries are constant scoring,这意味着他们不会改变匹配文档的分数。您可能希望使用常规查询(而不是通配符)在文档之间获得正确的评分。
Range queries [a TO z], prefix queries a*, and wildcard queries a*b are constant-scoring (all matching documents get an equal score). The scoring factors tf, idf, index boost, and coord are not used. There is no limitation on the number of terms that match (as there was in past versions of Lucene).
fq
词不影响分数,它们只是过滤结果集。
我正在尝试设置一个 Solr 索引来搜索产品信息数据库。为此,我填充了一个产品详细信息数据库并使用了 Solr 6.0.0。对于给定的产品详细信息(标题、品牌、其他关键字),我想知道数据库中是否有与给定详细信息非常匹配的产品。我已经开始数据导入并创建了索引。但是,我搜索时,尽管列出的产品不同,但匹配的产品的分数都是一样的。我尝试了不同的搜索关键字组合,但结果在每种情况下都是相似的。我也尝试过使用不同的分词器和过滤器。
我试过的schema.xml
样本是:
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="example" version="1.5">
<field name="id" type="Int" indexed="true" stored="true"/>
<field name="name" type="text_general" indexed="true" stored="true" />
<field name="brand" type="text_general" indexed="true" stored="true"/>
<field name="category" type="text_general" indexed="true" stored="true"/>
<field name="description" type="text_general" indexed="true" stored="true" />
<field name="catchall" type="text_general" indexed="true" stored="true" multiValued="true" />
<copyField source="id" dest="catchall" />
<copyField source="name" dest="catchall" />
<copyField source="brand" dest="catchall" />
<copyField source="category" dest="catchall" />
<copyField source="description" dest="catchall" />
<uniqueKey>id</uniqueKey>
<defaultSearchField>catchall</defaultSearchField>
<types>
<fieldtype name="string" class="solr.StrField" sortMissingLast="true" />
<fieldtype name="Int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
<fieldtype name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1"
splitOnNumerics="1"
splitOnCaseChange="1"
generateNumberParts="1"
catenateWords="0"
catenateNumbers="0"
catenateAll="0"
preserveOriginal="1"
/>
<filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="true"/>
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1"
splitOnNumerics="1"
splitOnCaseChange="1"
generateNumberParts="1"
catenateWords="0"
catenateNumbers="0"
catenateAll="0"
preserveOriginal="1"
/>
<filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="true"/>
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
</types>
</schema>
编辑
来自data-config.xml
的实体定义如下
<entity name="master_products"
pk="id"
query="select p.* ,b.* from master_products p ,master_brands b where b.id=p.brand_id"
deltaImportQuery="SELECT * FROM master_products WHERE product_name='${dataimporter.delta.product_name}' "
>
<!-- or b.brnad='${dataimporter.delta.brand}' -->
<field column="product_name" name="name"/>
<field column="product_description" name="description"/>
<field column="id" name="id"/>
<field column="mrp" name="mrp"/>
<field column="brand" name="brand"/>
<entity name="master_brands"
query="select * from master_brands"
deltaImportQuery="select * from master_brands where id ={master_products.brand_id}" processor="SqlEntityProcessor" cacheImpl="SortedMapBackedCache" >
</entity>
<entity name="master_product_categories"
query="select * from master_product_categories"
deltaImportQuery="select * from master_product_categories where id ={master_products. product_category_id}" processor="SqlEntityProcessor" cacheImpl="SortedMapBackedCache" >
<field column="category" name="category" />
</entity>
</entity>
编辑 查询如下
http://localhost:8983/solr/myproducts/select?fl=* score&fq=brand:Nikon&fq=mrp:28950*&indent=on&q=name:*"Nikon D3200 (Black) DSLR with AF-S 18-55mm VR Kit Lens"*&wt=json
我希望获得帮助以实现我的目标。您能否指导我创建符合我目的的正确配置?提前致谢。
Wildcard queries are constant scoring,这意味着他们不会改变匹配文档的分数。您可能希望使用常规查询(而不是通配符)在文档之间获得正确的评分。
Range queries [a TO z], prefix queries a*, and wildcard queries a*b are constant-scoring (all matching documents get an equal score). The scoring factors tf, idf, index boost, and coord are not used. There is no limitation on the number of terms that match (as there was in past versions of Lucene).
fq
词不影响分数,它们只是过滤结果集。