忽略 Elasticsearch 中的空格
Ignore spaces in Elasticsearch
对于我的搜索,我想考虑 "space"
字符在过滤请求中不是必需的这一事实。
例如:
当我过滤 "THE ONE"
时,我看到了相应的文档。
我写"THEONE"
.
也想看
这就是我今天构建查询的方式:
boolQueryBuilder.must(QueryBuilders.boolQuery()
.should(QueryBuilders.wildcardQuery("description", "*" +
searchedWord.toLowerCase() + "*"))
.should(QueryBuilders.wildcardQuery("id", "*" +
searchedWord.toUpperCase() + "*"))
.should(QueryBuilders.wildcardQuery("label", "*" +
searchedWord.toUpperCase() + "*"))
.minimumShouldMatch("1"));
我想要的是添加这个过滤器:(Writing a space-ignoring autocompleter with ElasticSearch)
"word_joiner": {
"type": "word_delimiter",
"catenate_all": true
}
但我不知道如何使用 API 执行此操作。
有什么想法吗?
谢谢!
编辑:按照@raam86 的建议,我添加了自己的自定义分析器:
{
"index": {
"number_of_shards": 1,
"analysis": {
"filter": {
"word_joiner": {
"type": "word_delimiter",
"catenate_all": true
}
},
"analyzer": {
"custom_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"word_joiner"
]
}
}
}
}
}
这是文档:
@Document(indexName = "cake", type = "pa")
@Setting(settingPath = "/elasticsearch/config/settings.json")
public class PaElasticEntity implements Serializable {
@Field(type = FieldType.String, analyzer = "custom_analyzer")
private String maker;
}
还是不行...
您需要 shingle token filter。简单的例子。
1.使用设置创建索引
PUT joinword
{
"settings": {
"analysis": {
"filter": {
"word_joiner": {
"type": "shingle",
"output_unigrams": "true",
"token_separator": ""
}
},
"analyzer": {
"word_join_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"word_joiner"
]
}
}
}
}
}
2。检查分析器是否按预期工作
GET joinword/_analyze?pretty
{
"analyzer": "word_join_analyzer",
"text": "ONE TWO"
}
输出:
{
"tokens" : [ {
"token" : "one",
"start_offset" : 0,
"end_offset" : 3,
"type" : "<ALPHANUM>",
"position" : 0
}, {
"token" : "onetwo",
"start_offset" : 0,
"end_offset" : 7,
"type" : "shingle",
"position" : 0
}, {
"token" : "two",
"start_offset" : 4,
"end_offset" : 7,
"type" : "<ALPHANUM>",
"position" : 1
} ]
}
现在您可以通过one
、two
或onetwo
找到该文档。搜索将不区分大小写。
工作Spring例子
完整项目available on GitHub。
实体:
@Document(indexName = "document", type = "document", createIndex = false)
@Setting(settingPath = "elasticsearch/document_index_settings.json")
public class DocumentES {
@Id()
private String id;
@Field(type = String, analyzer = "word_join_analyzer")
private String title;
public DocumentES() {
}
public DocumentES(java.lang.String title) {
this.title = title;
}
public java.lang.String getId() {
return id;
}
public void setId(java.lang.String id) {
this.id = id;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
@Override
public java.lang.String toString() {
return "DocumentES{" +
"id='" + id + '\'' +
", title='" + title + '\'' +
'}';
}
}
主线:
@SpringBootApplication
@EnableConfigurationProperties(value = {ElasticsearchProperties.class})
public class Application implements CommandLineRunner {
@Autowired
ElasticsearchTemplate elasticsearchTemplate;
public static void main(String[] args) {
SpringApplication.run(Application.class);
}
@Override
public void run(String... args) throws Exception {
elasticsearchTemplate.createIndex(DocumentES.class);
elasticsearchTemplate.putMapping(DocumentES.class);
elasticsearchTemplate.index(new IndexQueryBuilder()
.withIndexName("document")
.withType("document")
.withObject(new DocumentES("ONE TWO")).build()
);
Thread.sleep(2000);
NativeSearchQuery query = new NativeSearchQueryBuilder()
.withIndices("document")
.withTypes("document")
.withQuery(matchQuery("title", "ONEtWO"))
.build();
List<DocumentES> result = elasticsearchTemplate.queryForList(query, DocumentES.class);
result.forEach (System.out::println);
}
}
对于我的搜索,我想考虑 "space"
字符在过滤请求中不是必需的这一事实。
例如:
当我过滤 "THE ONE"
时,我看到了相应的文档。
我写"THEONE"
.
也想看
这就是我今天构建查询的方式:
boolQueryBuilder.must(QueryBuilders.boolQuery()
.should(QueryBuilders.wildcardQuery("description", "*" +
searchedWord.toLowerCase() + "*"))
.should(QueryBuilders.wildcardQuery("id", "*" +
searchedWord.toUpperCase() + "*"))
.should(QueryBuilders.wildcardQuery("label", "*" +
searchedWord.toUpperCase() + "*"))
.minimumShouldMatch("1"));
我想要的是添加这个过滤器:(Writing a space-ignoring autocompleter with ElasticSearch)
"word_joiner": {
"type": "word_delimiter",
"catenate_all": true
}
但我不知道如何使用 API 执行此操作。
有什么想法吗?
谢谢!
编辑:按照@raam86 的建议,我添加了自己的自定义分析器:
{
"index": {
"number_of_shards": 1,
"analysis": {
"filter": {
"word_joiner": {
"type": "word_delimiter",
"catenate_all": true
}
},
"analyzer": {
"custom_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"word_joiner"
]
}
}
}
}
}
这是文档:
@Document(indexName = "cake", type = "pa")
@Setting(settingPath = "/elasticsearch/config/settings.json")
public class PaElasticEntity implements Serializable {
@Field(type = FieldType.String, analyzer = "custom_analyzer")
private String maker;
}
还是不行...
您需要 shingle token filter。简单的例子。
1.使用设置创建索引
PUT joinword
{
"settings": {
"analysis": {
"filter": {
"word_joiner": {
"type": "shingle",
"output_unigrams": "true",
"token_separator": ""
}
},
"analyzer": {
"word_join_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"word_joiner"
]
}
}
}
}
}
2。检查分析器是否按预期工作
GET joinword/_analyze?pretty
{
"analyzer": "word_join_analyzer",
"text": "ONE TWO"
}
输出:
{
"tokens" : [ {
"token" : "one",
"start_offset" : 0,
"end_offset" : 3,
"type" : "<ALPHANUM>",
"position" : 0
}, {
"token" : "onetwo",
"start_offset" : 0,
"end_offset" : 7,
"type" : "shingle",
"position" : 0
}, {
"token" : "two",
"start_offset" : 4,
"end_offset" : 7,
"type" : "<ALPHANUM>",
"position" : 1
} ]
}
现在您可以通过one
、two
或onetwo
找到该文档。搜索将不区分大小写。
工作Spring例子
完整项目available on GitHub。
实体:
@Document(indexName = "document", type = "document", createIndex = false)
@Setting(settingPath = "elasticsearch/document_index_settings.json")
public class DocumentES {
@Id()
private String id;
@Field(type = String, analyzer = "word_join_analyzer")
private String title;
public DocumentES() {
}
public DocumentES(java.lang.String title) {
this.title = title;
}
public java.lang.String getId() {
return id;
}
public void setId(java.lang.String id) {
this.id = id;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
@Override
public java.lang.String toString() {
return "DocumentES{" +
"id='" + id + '\'' +
", title='" + title + '\'' +
'}';
}
}
主线:
@SpringBootApplication
@EnableConfigurationProperties(value = {ElasticsearchProperties.class})
public class Application implements CommandLineRunner {
@Autowired
ElasticsearchTemplate elasticsearchTemplate;
public static void main(String[] args) {
SpringApplication.run(Application.class);
}
@Override
public void run(String... args) throws Exception {
elasticsearchTemplate.createIndex(DocumentES.class);
elasticsearchTemplate.putMapping(DocumentES.class);
elasticsearchTemplate.index(new IndexQueryBuilder()
.withIndexName("document")
.withType("document")
.withObject(new DocumentES("ONE TWO")).build()
);
Thread.sleep(2000);
NativeSearchQuery query = new NativeSearchQueryBuilder()
.withIndices("document")
.withTypes("document")
.withQuery(matchQuery("title", "ONEtWO"))
.build();
List<DocumentES> result = elasticsearchTemplate.queryForList(query, DocumentES.class);
result.forEach (System.out::println);
}
}