ElasticSearch 2.x 属性映射忽略 "NotAnalyzed"
ElasticSearch 2.x attribute mapping ignores "NotAnalyzed"
我正在尝试将我的应用程序从 ElasticSearch Nest 1.7 升级到 2.4,基于属性的映射看起来 应该 工作,但它没有(完全)。我有一个这样的模型 class:
[DataContract]
[ElasticsearchType(IdProperty = "Id")]
public class Series
{
[DataMember]
[String(Index = FieldIndexOption.Analyzed, Analyzer = "custom_en")]
public string Description { get; set; }
[DataMember]
[String(Index = FieldIndexOption.NotAnalyzed)]
public HashSet<Role> ReleasableTo { get; set; }
}
Nest 1.x 中的等效声明正在运行,我对该字段的术语查询返回了我期望的结果。当我没有收到任何结果时,我检查了映射,令我惊讶的是 Index = FieldIndexOption.NotAnalyzed
没有得到遵守。我生成的映射是这样的:
"properties" : {
"description" : {
"type": "string"
}
"releasableTo" : {
"type": "string"
}
}
所以我设置了自定义分析器的字段都没有标记好,我不需要分析的字段也没有标记好。
这是我用来初始化一切的代码:
var indexDescriptor = new CreateIndexDescriptor(DefaultIndex)
.Mappings(ms => ms
.Map<Series>(m => m.AutoMap())
)
);
indexDescriptor.Settings(s => s
.NumberOfShards(3)
.NumberOfReplicas(2)
.Analysis(a => a
.CharFilters(c => c.Mapping("&_to_and", mf => mf.Mappings( "&=> and ")))
.TokenFilters(t => t.Stop("en_stopwords", tf=>tf.StopWords(new StopWords(stopwords)).IgnoreCase()))
.Analyzers(z => z
.Custom("custom_en", ca => ca
.CharFilters("html_strip", "&_to_and")
.Tokenizer("standard")
.Filters("lowercase", "en_stopwords")
)
)
)
);
client.CreateIndex(indexDescriptor);
注意:client
是 elasticsearch 客户端。
我知道 DataContract
属性并不严格适用于 ElasticSearch,但我还需要将这些对象序列化到磁盘进行处理。使用 Nest 1.x 没有冲突,因此没有造成任何问题。
我不关心分析器的创建。我担心映射不考虑比类型更具体的任何内容。
如何让 Nest 2.x 遵守属性中的附加信息,这样我就不必在声明映射时手动映射它们?
事实证明映射的问题与同时映射的其他类型有关。我没有捕捉到来自索引的无效响应。完成工作非常令人沮丧,但映射现在工作正常。
我不确定这是否是一个错字,但是你的属性类型是 Series
但你正在映射一个类型 Service
.
我无法使用 NEST 2.5.0 重现您所看到的内容。这是一个完整的例子
void Main()
{
var pool = new SingleNodeConnectionPool(new Uri("http://localhost:9200"));
var defaultIndex = "default-index";
var connectionSettings = new ConnectionSettings(pool, new InMemoryConnection())
.DefaultIndex(defaultIndex)
.PrettyJson()
.DisableDirectStreaming()
.OnRequestCompleted(response =>
{
// log out the request
if (response.RequestBodyInBytes != null)
{
Console.WriteLine(
$"{response.HttpMethod} {response.Uri} \n" +
$"{Encoding.UTF8.GetString(response.RequestBodyInBytes)}");
}
else
{
Console.WriteLine($"{response.HttpMethod} {response.Uri}");
}
Console.WriteLine();
// log out the response
if (response.ResponseBodyInBytes != null)
{
Console.WriteLine($"Status: {response.HttpStatusCode}\n" +
$"{Encoding.UTF8.GetString(response.ResponseBodyInBytes)}\n" +
$"{new string('-', 30)}\n");
}
else
{
Console.WriteLine($"Status: {response.HttpStatusCode}\n" +
$"{new string('-', 30)}\n");
}
});
var client = new ElasticClient(connectionSettings);
var stopwords = "stopwords";
var indexDescriptor = new CreateIndexDescriptor(defaultIndex)
.Mappings(ms => ms
.Map<Series>(m => m.AutoMap())
);
indexDescriptor.Settings(s => s
.NumberOfShards(3)
.NumberOfReplicas(2)
.Analysis(a => a
.CharFilters(c => c.Mapping("&_to_and", mf => mf.Mappings("&=> and ")))
.TokenFilters(t => t.Stop("en_stopwords", tf => tf.StopWords(new StopWords(stopwords)).IgnoreCase()))
.Analyzers(z => z
.Custom("custom_en", ca => ca
.CharFilters("html_strip", "&_to_and")
.Tokenizer("standard")
.Filters("lowercase", "en_stopwords")
)
)
)
);
client.CreateIndex(indexDescriptor);
}
[DataContract]
[ElasticsearchType(IdProperty = "Id")]
public class Series
{
[DataMember]
[String(Index = FieldIndexOption.Analyzed, Analyzer = "custom_en")]
public string Description { get; set; }
[DataMember]
[String(Index = FieldIndexOption.NotAnalyzed)]
public HashSet<Role> ReleasableTo { get; set; }
}
这使用 InMemoryConnection
,因此不会向 Elasticsearch 发出任何请求(这可以删除以实际发送请求)。创建索引请求看起来像
{
"settings": {
"index.number_of_replicas": 2,
"index.number_of_shards": 3,
"analysis": {
"analyzer": {
"custom_en": {
"type": "custom",
"char_filter": [
"html_strip",
"&_to_and"
],
"filter": [
"lowercase",
"en_stopwords"
],
"tokenizer": "standard"
}
},
"char_filter": {
"&_to_and": {
"type": "mapping",
"mappings": [
"&=> and "
]
}
},
"filter": {
"en_stopwords": {
"type": "stop",
"stopwords": "stopwords",
"ignore_case": true
}
}
}
},
"mappings": {
"series": {
"properties": {
"description": {
"type": "string",
"index": "analyzed",
"analyzer": "custom_en"
},
"releasableTo": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
具有相应的 属性 映射。请记住,如果索引已经存在,则不会应用映射更改,因此在这种情况下您需要删除并创建索引。
我正在尝试将我的应用程序从 ElasticSearch Nest 1.7 升级到 2.4,基于属性的映射看起来 应该 工作,但它没有(完全)。我有一个这样的模型 class:
[DataContract]
[ElasticsearchType(IdProperty = "Id")]
public class Series
{
[DataMember]
[String(Index = FieldIndexOption.Analyzed, Analyzer = "custom_en")]
public string Description { get; set; }
[DataMember]
[String(Index = FieldIndexOption.NotAnalyzed)]
public HashSet<Role> ReleasableTo { get; set; }
}
Nest 1.x 中的等效声明正在运行,我对该字段的术语查询返回了我期望的结果。当我没有收到任何结果时,我检查了映射,令我惊讶的是 Index = FieldIndexOption.NotAnalyzed
没有得到遵守。我生成的映射是这样的:
"properties" : {
"description" : {
"type": "string"
}
"releasableTo" : {
"type": "string"
}
}
所以我设置了自定义分析器的字段都没有标记好,我不需要分析的字段也没有标记好。
这是我用来初始化一切的代码:
var indexDescriptor = new CreateIndexDescriptor(DefaultIndex)
.Mappings(ms => ms
.Map<Series>(m => m.AutoMap())
)
);
indexDescriptor.Settings(s => s
.NumberOfShards(3)
.NumberOfReplicas(2)
.Analysis(a => a
.CharFilters(c => c.Mapping("&_to_and", mf => mf.Mappings( "&=> and ")))
.TokenFilters(t => t.Stop("en_stopwords", tf=>tf.StopWords(new StopWords(stopwords)).IgnoreCase()))
.Analyzers(z => z
.Custom("custom_en", ca => ca
.CharFilters("html_strip", "&_to_and")
.Tokenizer("standard")
.Filters("lowercase", "en_stopwords")
)
)
)
);
client.CreateIndex(indexDescriptor);
注意:client
是 elasticsearch 客户端。
我知道 DataContract
属性并不严格适用于 ElasticSearch,但我还需要将这些对象序列化到磁盘进行处理。使用 Nest 1.x 没有冲突,因此没有造成任何问题。
我不关心分析器的创建。我担心映射不考虑比类型更具体的任何内容。
如何让 Nest 2.x 遵守属性中的附加信息,这样我就不必在声明映射时手动映射它们?
事实证明映射的问题与同时映射的其他类型有关。我没有捕捉到来自索引的无效响应。完成工作非常令人沮丧,但映射现在工作正常。
我不确定这是否是一个错字,但是你的属性类型是 Series
但你正在映射一个类型 Service
.
我无法使用 NEST 2.5.0 重现您所看到的内容。这是一个完整的例子
void Main()
{
var pool = new SingleNodeConnectionPool(new Uri("http://localhost:9200"));
var defaultIndex = "default-index";
var connectionSettings = new ConnectionSettings(pool, new InMemoryConnection())
.DefaultIndex(defaultIndex)
.PrettyJson()
.DisableDirectStreaming()
.OnRequestCompleted(response =>
{
// log out the request
if (response.RequestBodyInBytes != null)
{
Console.WriteLine(
$"{response.HttpMethod} {response.Uri} \n" +
$"{Encoding.UTF8.GetString(response.RequestBodyInBytes)}");
}
else
{
Console.WriteLine($"{response.HttpMethod} {response.Uri}");
}
Console.WriteLine();
// log out the response
if (response.ResponseBodyInBytes != null)
{
Console.WriteLine($"Status: {response.HttpStatusCode}\n" +
$"{Encoding.UTF8.GetString(response.ResponseBodyInBytes)}\n" +
$"{new string('-', 30)}\n");
}
else
{
Console.WriteLine($"Status: {response.HttpStatusCode}\n" +
$"{new string('-', 30)}\n");
}
});
var client = new ElasticClient(connectionSettings);
var stopwords = "stopwords";
var indexDescriptor = new CreateIndexDescriptor(defaultIndex)
.Mappings(ms => ms
.Map<Series>(m => m.AutoMap())
);
indexDescriptor.Settings(s => s
.NumberOfShards(3)
.NumberOfReplicas(2)
.Analysis(a => a
.CharFilters(c => c.Mapping("&_to_and", mf => mf.Mappings("&=> and ")))
.TokenFilters(t => t.Stop("en_stopwords", tf => tf.StopWords(new StopWords(stopwords)).IgnoreCase()))
.Analyzers(z => z
.Custom("custom_en", ca => ca
.CharFilters("html_strip", "&_to_and")
.Tokenizer("standard")
.Filters("lowercase", "en_stopwords")
)
)
)
);
client.CreateIndex(indexDescriptor);
}
[DataContract]
[ElasticsearchType(IdProperty = "Id")]
public class Series
{
[DataMember]
[String(Index = FieldIndexOption.Analyzed, Analyzer = "custom_en")]
public string Description { get; set; }
[DataMember]
[String(Index = FieldIndexOption.NotAnalyzed)]
public HashSet<Role> ReleasableTo { get; set; }
}
这使用 InMemoryConnection
,因此不会向 Elasticsearch 发出任何请求(这可以删除以实际发送请求)。创建索引请求看起来像
{
"settings": {
"index.number_of_replicas": 2,
"index.number_of_shards": 3,
"analysis": {
"analyzer": {
"custom_en": {
"type": "custom",
"char_filter": [
"html_strip",
"&_to_and"
],
"filter": [
"lowercase",
"en_stopwords"
],
"tokenizer": "standard"
}
},
"char_filter": {
"&_to_and": {
"type": "mapping",
"mappings": [
"&=> and "
]
}
},
"filter": {
"en_stopwords": {
"type": "stop",
"stopwords": "stopwords",
"ignore_case": true
}
}
}
},
"mappings": {
"series": {
"properties": {
"description": {
"type": "string",
"index": "analyzed",
"analyzer": "custom_en"
},
"releasableTo": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
具有相应的 属性 映射。请记住,如果索引已经存在,则不会应用映射更改,因此在这种情况下您需要删除并创建索引。