在弹性搜索中配置分析器
Configuring analyzer in elastic search
我写了下面的程序来理解如何使用弹性搜索来进行全文搜索。在这里,当我搜索单个词时,它可以正常工作,但我想搜索词的组合,但它不起作用。
package in.blogspot.randomcompiler.elastic_search_demo;
import in.blogspot.randomcompiler.elastic_search_impl.Event;
import java.util.Date;
import org.elasticsearch.action.count.CountRequestBuilder;
import org.elasticsearch.action.count.CountResponse;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.index.query.FilterBuilder;
import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import com.fasterxml.jackson.core.JsonProcessingException;
public class ElasticSearchDemo
{
public static void main( String[] args ) throws JsonProcessingException
{
Client client = new TransportClient()
.addTransportAddress(new InetSocketTransportAddress("localhost", 9301));
DeleteResponse deleteResponse1 = client.prepareDelete("chat-data", "event", "1").execute().actionGet();
DeleteResponse deleteResponse2 = client.prepareDelete("chat-data", "event", "2").execute().actionGet();
DeleteResponse deleteResponse3 = client.prepareDelete("chat-data", "event", "3").execute().actionGet();
Event e1 = new Event("LOGIN", new Date(), "Agent1 logged into chat");
String e1Json = e1.prepareJson();
System.out.println("JSON: " + e1Json);
IndexResponse indexResponse1 = client.prepareIndex("chat-data", "event", "1").setSource(e1Json).execute().actionGet();
printIndexResponse("e1", indexResponse1);
Event e2 = new Event("LOGOUT", new Date(), "Agent1 logged out of chat");
String e2Json = e2.prepareJson();
System.out.println("JSON: " + e2Json);
IndexResponse indexResponse2 = client.prepareIndex("chat-data", "event", "2").setSource(e2Json).execute().actionGet();
printIndexResponse("e2", indexResponse2);
Event e3 = new Event("BREAK", new Date(), "Agent1 went on break in the middle of a chat");
String e3Json = e3.prepareJson();
System.out.println("JSON: " + e3Json);
IndexResponse indexResponse3 = client.prepareIndex("chat-data", "event", "3").setSource(e3Json).execute().actionGet();
printIndexResponse("e3", indexResponse3);
FilterBuilder filterBuilder = FilterBuilders.termFilter("value", "break middle");
SearchRequestBuilder searchBuilder = client.prepareSearch();
searchBuilder.setPostFilter(filterBuilder);
CountRequestBuilder countBuilder = client.prepareCount();
countBuilder.setQuery(QueryBuilders.constantScoreQuery(filterBuilder));
CountResponse countResponse1 = countBuilder.execute().actionGet();
System.out.println("HITS: " + countResponse1.getCount());
SearchResponse searchResponse1 = searchBuilder.execute().actionGet();
SearchHits hits = searchResponse1.getHits();
for(int i=0; i<hits.hits().length; i++) {
SearchHit hit = hits.getAt(i);
System.out.println("[" + i + "] " + hit.getId() + " : " +hit.sourceAsString());
}
client.close();
}
private static void printIndexResponse(String description, IndexResponse response) {
System.out.println("Index response for: " + description);
System.out.println("Index name: " + response.getIndex());
System.out.println("Index type: " + response.getType());
System.out.println("Index id: " + response.getId());
System.out.println("Index version: " + response.getVersion());
}
}
我面临的问题是,当我搜索 "break middle" 时,它 return 什么都没有,期望它应该 return 第三个事件。
我知道我需要配置一个不同的分析器而不是默认的分析器来使其正确索引。
有人可以帮助我理解如何做到这一点。一些完整的例子会很棒。
问题是因为您使用了Term过滤器:
FilterBuilder filterBuilder = FilterBuilders.termFilter("value", "break middle");
术语过滤器不分析查询字符串中的数据 - 因此 Elasticsearch 正在寻找确切的字符串 "break middle"。
然而,第三个文档可能已被 ES 分解为如下单独的术语:
Agent1
went
on
break
in
the
middle
of
a
chat
要解决此问题,请使用过滤器或查询来分析您传递的字符串 - 例如使用 Query_String query or Match 查询。
例如:
QueryBuilder qb = QueryBuilders.matchQuery("event", "break middle");
或:
QueryBuilder qb = QueryBuilders.queryString("break middle");
有关详细信息,请参阅 Java API documentation for Elasticsearch。
我写了下面的程序来理解如何使用弹性搜索来进行全文搜索。在这里,当我搜索单个词时,它可以正常工作,但我想搜索词的组合,但它不起作用。
package in.blogspot.randomcompiler.elastic_search_demo;
import in.blogspot.randomcompiler.elastic_search_impl.Event;
import java.util.Date;
import org.elasticsearch.action.count.CountRequestBuilder;
import org.elasticsearch.action.count.CountResponse;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.index.query.FilterBuilder;
import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import com.fasterxml.jackson.core.JsonProcessingException;
public class ElasticSearchDemo
{
public static void main( String[] args ) throws JsonProcessingException
{
Client client = new TransportClient()
.addTransportAddress(new InetSocketTransportAddress("localhost", 9301));
DeleteResponse deleteResponse1 = client.prepareDelete("chat-data", "event", "1").execute().actionGet();
DeleteResponse deleteResponse2 = client.prepareDelete("chat-data", "event", "2").execute().actionGet();
DeleteResponse deleteResponse3 = client.prepareDelete("chat-data", "event", "3").execute().actionGet();
Event e1 = new Event("LOGIN", new Date(), "Agent1 logged into chat");
String e1Json = e1.prepareJson();
System.out.println("JSON: " + e1Json);
IndexResponse indexResponse1 = client.prepareIndex("chat-data", "event", "1").setSource(e1Json).execute().actionGet();
printIndexResponse("e1", indexResponse1);
Event e2 = new Event("LOGOUT", new Date(), "Agent1 logged out of chat");
String e2Json = e2.prepareJson();
System.out.println("JSON: " + e2Json);
IndexResponse indexResponse2 = client.prepareIndex("chat-data", "event", "2").setSource(e2Json).execute().actionGet();
printIndexResponse("e2", indexResponse2);
Event e3 = new Event("BREAK", new Date(), "Agent1 went on break in the middle of a chat");
String e3Json = e3.prepareJson();
System.out.println("JSON: " + e3Json);
IndexResponse indexResponse3 = client.prepareIndex("chat-data", "event", "3").setSource(e3Json).execute().actionGet();
printIndexResponse("e3", indexResponse3);
FilterBuilder filterBuilder = FilterBuilders.termFilter("value", "break middle");
SearchRequestBuilder searchBuilder = client.prepareSearch();
searchBuilder.setPostFilter(filterBuilder);
CountRequestBuilder countBuilder = client.prepareCount();
countBuilder.setQuery(QueryBuilders.constantScoreQuery(filterBuilder));
CountResponse countResponse1 = countBuilder.execute().actionGet();
System.out.println("HITS: " + countResponse1.getCount());
SearchResponse searchResponse1 = searchBuilder.execute().actionGet();
SearchHits hits = searchResponse1.getHits();
for(int i=0; i<hits.hits().length; i++) {
SearchHit hit = hits.getAt(i);
System.out.println("[" + i + "] " + hit.getId() + " : " +hit.sourceAsString());
}
client.close();
}
private static void printIndexResponse(String description, IndexResponse response) {
System.out.println("Index response for: " + description);
System.out.println("Index name: " + response.getIndex());
System.out.println("Index type: " + response.getType());
System.out.println("Index id: " + response.getId());
System.out.println("Index version: " + response.getVersion());
}
}
我面临的问题是,当我搜索 "break middle" 时,它 return 什么都没有,期望它应该 return 第三个事件。
我知道我需要配置一个不同的分析器而不是默认的分析器来使其正确索引。
有人可以帮助我理解如何做到这一点。一些完整的例子会很棒。
问题是因为您使用了Term过滤器:
FilterBuilder filterBuilder = FilterBuilders.termFilter("value", "break middle");
术语过滤器不分析查询字符串中的数据 - 因此 Elasticsearch 正在寻找确切的字符串 "break middle"。
然而,第三个文档可能已被 ES 分解为如下单独的术语:
Agent1
went
on
break
in
the
middle
of
a
chat
要解决此问题,请使用过滤器或查询来分析您传递的字符串 - 例如使用 Query_String query or Match 查询。
例如:
QueryBuilder qb = QueryBuilders.matchQuery("event", "break middle");
或:
QueryBuilder qb = QueryBuilders.queryString("break middle");
有关详细信息,请参阅 Java API documentation for Elasticsearch。