如何搜索附加在弹性搜索索引中的文档的内容
How to search a content of a document attached in elasticsearch index
我已经在 elasticsearch 中创建了索引
this.client.CreateIndex("documents", c => c.Mappings(mp => mp.Map<DocUpload>
(m => m.Properties(ps => ps.Attachment
(a => a.Name(o => o.Document)
.TitleField(t => t.Name(x => x.Title).TermVector(TermVectorOption.WithPositionsOffsets))
)))));
附件在索引之前经过 base64 编码。我无法在任何文档中搜索内容。 base64 编码是否会产生任何问题。有人可以帮忙吗?
浏览器响应类似
{
"documents": {
"aliases": {},
"mappings": {
"indexdocument": {
"properties": {
"document": {
"type": "attachment",
"fields": {
"content": {
"type": "string"
},
"author": {
"type": "string"
},
"title": {
"type": "string",
"term_vector": "with_positions_offsets"
},
"name": {
"type": "string"
},
"date": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"keywords": {
"type": "string"
},
"content_type": {
"type": "string"
},
"content_length": {
"type": "integer"
},
"language": {
"type": "string"
}
}
},
"documentType": {
"type": "string"
},
"id": {
"type": "long"
},
"lastModifiedDate": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"location": {
"type": "string"
},
"title": {
"type": "string"
}
}
}
},
"settings": {
"index": {
"creation_date": "1465193502636",
"number_of_shards": "5",
"number_of_replicas": "1",
"uuid": "5kCRvhmsQAGyndkswLhLrg",
"version": {
"created": "2030399"
}
}
},
"warmers": {}
}
}
我通过添加分析器找到了解决方案。
var fullNameFilters = new List<string> { "lowercase", "snowball" };
client.CreateIndex("mydocs", c => c
.Settings(st => st
.Analysis(anl => anl
.Analyzers(h => h
.Custom("full", ff => ff
.Filters(fullNameFilters)
.Tokenizer("standard"))
)
.TokenFilters(ba => ba
.Snowball("snowball", sn => sn
.Language(SnowballLanguage.English)))
))
.Mappings(mp => mp
.Map<IndexDocument>(ms => ms
.AutoMap()
.Properties(ps => ps
.Nested<Attachment>(n => n
.Name(sc => sc.File)
.AutoMap()
))
.Properties(at => at
.Attachment(a => a.Name(o => o.File)
.FileField(fl=>fl.Analyzer("full"))
.TitleField(t => t.Name(x => x.Title)
.Analyzer("full")
.TermVector(TermVectorOption.WithPositionsOffsets)
)))
))
);
我已经在 elasticsearch 中创建了索引
this.client.CreateIndex("documents", c => c.Mappings(mp => mp.Map<DocUpload>
(m => m.Properties(ps => ps.Attachment
(a => a.Name(o => o.Document)
.TitleField(t => t.Name(x => x.Title).TermVector(TermVectorOption.WithPositionsOffsets))
)))));
附件在索引之前经过 base64 编码。我无法在任何文档中搜索内容。 base64 编码是否会产生任何问题。有人可以帮忙吗?
浏览器响应类似
{
"documents": {
"aliases": {},
"mappings": {
"indexdocument": {
"properties": {
"document": {
"type": "attachment",
"fields": {
"content": {
"type": "string"
},
"author": {
"type": "string"
},
"title": {
"type": "string",
"term_vector": "with_positions_offsets"
},
"name": {
"type": "string"
},
"date": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"keywords": {
"type": "string"
},
"content_type": {
"type": "string"
},
"content_length": {
"type": "integer"
},
"language": {
"type": "string"
}
}
},
"documentType": {
"type": "string"
},
"id": {
"type": "long"
},
"lastModifiedDate": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"location": {
"type": "string"
},
"title": {
"type": "string"
}
}
}
},
"settings": {
"index": {
"creation_date": "1465193502636",
"number_of_shards": "5",
"number_of_replicas": "1",
"uuid": "5kCRvhmsQAGyndkswLhLrg",
"version": {
"created": "2030399"
}
}
},
"warmers": {}
}
}
我通过添加分析器找到了解决方案。
var fullNameFilters = new List<string> { "lowercase", "snowball" };
client.CreateIndex("mydocs", c => c
.Settings(st => st
.Analysis(anl => anl
.Analyzers(h => h
.Custom("full", ff => ff
.Filters(fullNameFilters)
.Tokenizer("standard"))
)
.TokenFilters(ba => ba
.Snowball("snowball", sn => sn
.Language(SnowballLanguage.English)))
))
.Mappings(mp => mp
.Map<IndexDocument>(ms => ms
.AutoMap()
.Properties(ps => ps
.Nested<Attachment>(n => n
.Name(sc => sc.File)
.AutoMap()
))
.Properties(at => at
.Attachment(a => a.Name(o => o.File)
.FileField(fl=>fl.Analyzer("full"))
.TitleField(t => t.Name(x => x.Title)
.Analyzer("full")
.TermVector(TermVectorOption.WithPositionsOffsets)
)))
))
);