基于文档值分布的 Solr 功能
Solr feature that is based on the distribution of a document value
我今天正在尝试对查询进行重新排名。
我的 features.json 看起来像这样:
[
{
"name" : "documentRecency",
"class" : "org.apache.solr.ltr.feature.SolrFeature",
"params" : {
"q" : "{!func}recip( ms(NOW,timestamp), 3.16e-11, 1, 1)"
}
},
{
"name" : "textLengthScore",
"class" : "org.apache.solr.ltr.feature.SolrFeature",
"params" : {
"q" : "{!func}recip(rord(scale(textLength, 0, 1)), 1,1000,1000)"
}
},
{
"name" : "numCategoriesScore",
"class" : "org.apache.solr.ltr.feature.SolrFeature",
"params" : {
"q" : "{!func}recip(rord(scale(numCategories, 0, 1)), 1,1000,1000)"
}
},
{
"name" : "numSectionsScore",
"class" : "org.apache.solr.ltr.feature.SolrFeature",
"params" : {
"q" : "{!func}recip( rord(scale(numSections, 0, 1)), 1,1000,1000)"
}
},
{
"name" : "numLinksScore",
"class" : "org.apache.solr.ltr.feature.SolrFeature",
"params" : {
"q" : "{!func}recip( rord(scale(numLinks, 0, 1)), 1,1000,1000)"
}
},
{
"name" : "originalScore",
"class" : "org.apache.solr.ltr.feature.OriginalScoreFeature",
"params" : {}
}
]
我的 model.json 看起来像这样:
{
"class" : "org.apache.solr.ltr.model.LinearModel",
"name" : "myModel",
"features" : [
{ "name" : "documentRecency" },
{ "name" : "textLengthScore" },
{ "name" : "numCategoriesScore" },
{ "name" : "numSectionsScore" },
{ "name" : "numLinksScore" },
{ "name" : "originalScore" }
],
"params" : {
"weights" : {
"documentRecency" : 0.2,
"textLengthScore" : 0.5,
"numCategoriesScore" : 0.3,
"numSectionsScore": 0.6,
"numLinksScore" : 0.4,
"originalScore" : 0.3
}
}
}
重新排列结果时出现以下错误:
"java.lang.RuntimeException: Exception from createWeight for SolrFeature [name=textLengthScore, params={q={!func}recip(rord(scale(textLength, 0, 1)), 1,1000,1000)}] Failed to parse feature query."
这个查询有什么问题?
我试图通过从 0,1 缩放所有 textLengths 来获得“textLengthScore”,然后像在 documentRecency 中所做的那样对它们进行评分。
哦,是的,这是 schema.xml:
<?xml version="1.0" encoding="UTF-8"?>
<schema name="sem" version="1.6">
<uniqueKey>id</uniqueKey>
<fieldType name="string" class="solr.StrField"/>
<fieldType name="longstring" class="solr.TextField">
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true" tokenizerFactory="solr.StandardTokenizerFactory"/>
</analyzer>
<similarity class="solr.ClassicSimilarityFactory"/>
</fieldType>
<fieldType name="tstamp" class="solr.DatePointField"/>
<fieldType name="number" class="solr.TrieIntField" precisionStep="0" docValues="true"/>
<field name="id" type="string" indexed="true" required="true" stored="true"/>
<field name="title" type="longstring" indexed="true" required="true" stored="true"/>
<field name="timestamp" type="tstamp" indexed="true" required="true" stored="true"/>
<field name="categories" type="longstring" indexed="true" multiValued="true" stored="true"/>
<field name="text" type="longstring" indexed="true" multiValued="true" stored="true"/>
<field name="fullText" type="longstring" indexed="true" stored="true"/>
<field name="links" type="longstring" indexed="true" multiValued="true" stored="true"/>
<field name="textLength" type="number" indexed="true" stored="true"/>
<field name="numCategories" type="number" indexed="true" stored="true"/>
<field name="numLinks" type="number" indexed="true" stored="true"/>
<field name="numSections" type="number" indexed="true" stored="true"/>
</schema>
我认为错误来自 rord()
期望只有一个字段名作为参数。
在使用 rord() 或 ord() 之前不需要对值进行缩放,缩放不会影响它们的字典顺序,因此每个值索引将保持与不缩放时相同。
也就是说"{!func}recip(rord(textLength), 1,1000,1000)"
应该没问题。
我今天正在尝试对查询进行重新排名。
我的 features.json 看起来像这样:
[
{
"name" : "documentRecency",
"class" : "org.apache.solr.ltr.feature.SolrFeature",
"params" : {
"q" : "{!func}recip( ms(NOW,timestamp), 3.16e-11, 1, 1)"
}
},
{
"name" : "textLengthScore",
"class" : "org.apache.solr.ltr.feature.SolrFeature",
"params" : {
"q" : "{!func}recip(rord(scale(textLength, 0, 1)), 1,1000,1000)"
}
},
{
"name" : "numCategoriesScore",
"class" : "org.apache.solr.ltr.feature.SolrFeature",
"params" : {
"q" : "{!func}recip(rord(scale(numCategories, 0, 1)), 1,1000,1000)"
}
},
{
"name" : "numSectionsScore",
"class" : "org.apache.solr.ltr.feature.SolrFeature",
"params" : {
"q" : "{!func}recip( rord(scale(numSections, 0, 1)), 1,1000,1000)"
}
},
{
"name" : "numLinksScore",
"class" : "org.apache.solr.ltr.feature.SolrFeature",
"params" : {
"q" : "{!func}recip( rord(scale(numLinks, 0, 1)), 1,1000,1000)"
}
},
{
"name" : "originalScore",
"class" : "org.apache.solr.ltr.feature.OriginalScoreFeature",
"params" : {}
}
]
我的 model.json 看起来像这样:
{
"class" : "org.apache.solr.ltr.model.LinearModel",
"name" : "myModel",
"features" : [
{ "name" : "documentRecency" },
{ "name" : "textLengthScore" },
{ "name" : "numCategoriesScore" },
{ "name" : "numSectionsScore" },
{ "name" : "numLinksScore" },
{ "name" : "originalScore" }
],
"params" : {
"weights" : {
"documentRecency" : 0.2,
"textLengthScore" : 0.5,
"numCategoriesScore" : 0.3,
"numSectionsScore": 0.6,
"numLinksScore" : 0.4,
"originalScore" : 0.3
}
}
}
重新排列结果时出现以下错误:
"java.lang.RuntimeException: Exception from createWeight for SolrFeature [name=textLengthScore, params={q={!func}recip(rord(scale(textLength, 0, 1)), 1,1000,1000)}] Failed to parse feature query."
这个查询有什么问题? 我试图通过从 0,1 缩放所有 textLengths 来获得“textLengthScore”,然后像在 documentRecency 中所做的那样对它们进行评分。
哦,是的,这是 schema.xml:
<?xml version="1.0" encoding="UTF-8"?>
<schema name="sem" version="1.6">
<uniqueKey>id</uniqueKey>
<fieldType name="string" class="solr.StrField"/>
<fieldType name="longstring" class="solr.TextField">
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true" tokenizerFactory="solr.StandardTokenizerFactory"/>
</analyzer>
<similarity class="solr.ClassicSimilarityFactory"/>
</fieldType>
<fieldType name="tstamp" class="solr.DatePointField"/>
<fieldType name="number" class="solr.TrieIntField" precisionStep="0" docValues="true"/>
<field name="id" type="string" indexed="true" required="true" stored="true"/>
<field name="title" type="longstring" indexed="true" required="true" stored="true"/>
<field name="timestamp" type="tstamp" indexed="true" required="true" stored="true"/>
<field name="categories" type="longstring" indexed="true" multiValued="true" stored="true"/>
<field name="text" type="longstring" indexed="true" multiValued="true" stored="true"/>
<field name="fullText" type="longstring" indexed="true" stored="true"/>
<field name="links" type="longstring" indexed="true" multiValued="true" stored="true"/>
<field name="textLength" type="number" indexed="true" stored="true"/>
<field name="numCategories" type="number" indexed="true" stored="true"/>
<field name="numLinks" type="number" indexed="true" stored="true"/>
<field name="numSections" type="number" indexed="true" stored="true"/>
</schema>
我认为错误来自 rord()
期望只有一个字段名作为参数。
在使用 rord() 或 ord() 之前不需要对值进行缩放,缩放不会影响它们的字典顺序,因此每个值索引将保持与不缩放时相同。
也就是说"{!func}recip(rord(textLength), 1,1000,1000)"
应该没问题。