Hibernate Search 没有索引电子邮件地址?
Hibernate Search not indexing email address?
我想使用 Hibernate Search 对实体中的电子邮件地址进行全文搜索。
给定以下具有索引字段 "email" 的实体 "Person":
Person.groovy
package com.example
import javax.persistence.Entity
import javax.persistence.GeneratedValue
import javax.persistence.GenerationType
import javax.persistence.Id
import org.hibernate.search.annotations.Field
import org.hibernate.search.annotations.Indexed
@Entity
@Indexed
class Person {
@Id
@GeneratedValue(strategy=GenerationType.AUTO)
Long id
@Field
String email
}
并给出存储库
SearchRepository.groovy
package com.example
import javax.persistence.EntityManager
import org.apache.lucene.search.Query
import org.hibernate.search.jpa.FullTextEntityManager
import org.hibernate.search.jpa.Search
import org.hibernate.search.query.dsl.QueryBuilder
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.stereotype.Repository
@Repository
class SearchRepository {
@Autowired
EntityManager entityManager
FullTextEntityManager getFullTextEntityManager() {
Search.getFullTextEntityManager(entityManager)
}
List<Person> findPeople(String searchText){
searchText = searchText.toLowerCase()+'*'
QueryBuilder qb = fullTextEntityManager.searchFactory
.buildQueryBuilder().forEntity(Person).get()
Query query =
qb
.keyword()
.wildcard()
.onField('email')
.matching(searchText)
.createQuery()
javax.persistence.Query jpaQuery =
fullTextEntityManager.createFullTextQuery(query, Person)
jpaQuery.resultList
}
}
然后以下测试失败:
SearchWildcardTest.groovy
package com.example
import javax.persistence.EntityManager
import org.hibernate.search.jpa.FullTextEntityManager
import org.hibernate.search.jpa.Search
import org.junit.Test
import org.junit.runner.RunWith
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.boot.test.SpringApplicationConfiguration
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner
import org.springframework.transaction.annotation.Transactional
@RunWith(SpringJUnit4ClassRunner)
@SpringApplicationConfiguration(classes = HibernateSearchWildcardApplication)
@Transactional
class SearchWildcardTest {
@Autowired
SearchRepository searchRepo
@Autowired
PersonRepository personRepo
@Autowired
EntityManager em
FullTextEntityManager getFullTextEntityManager() {
Search.getFullTextEntityManager(em)
}
@Test
void findTeamsByNameWithWildcard() {
Person person = personRepo.save new Person(email: 'foo@bar.com')
fullTextEntityManager.createIndexer().startAndWait()
fullTextEntityManager.flushToIndexes()
List<Person> people = searchRepo.findPeople('foo@bar.com')
assert people.contains(person) // this assertion fails! Why?
}
}
PersonRepository.groovy
package com.example
import org.springframework.data.repository.CrudRepository
interface PersonRepository extends CrudRepository<Person, Long>{
}
build.gradle
buildscript {
ext {
springBootVersion = '1.2.7.RELEASE'
}
repositories {
mavenCentral()
}
dependencies {
classpath("org.springframework.boot:spring-boot-gradle-plugin:${springBootVersion}")
classpath('io.spring.gradle:dependency-management-plugin:0.5.2.RELEASE')
}
}
apply plugin: 'groovy'
apply plugin: 'eclipse'
apply plugin: 'spring-boot'
apply plugin: 'io.spring.dependency-management'
jar {
baseName = 'hibernate-search-email'
version = '0.0.1-SNAPSHOT'
}
sourceCompatibility = 1.8
targetCompatibility = 1.8
repositories {
mavenCentral()
}
dependencies {
compile('org.springframework.boot:spring-boot-starter-data-jpa')
compile('org.codehaus.groovy:groovy')
compile('org.hibernate:hibernate-search:5.3.0.Final')
testCompile('com.h2database:h2')
testCompile('org.springframework.boot:spring-boot-starter-test')
}
task wrapper(type: Wrapper) {
gradleVersion = '2.8'
}
以下是 Luke 在 运行 测试后从生成的 Lucene 索引中显示的内容:
在我看来,电子邮件地址 "foo@bar.com" 并没有完全存储在索引中,而是被撕成两个字符串 "foo" 和 "bar.com".
来自官方的"Getting started"指南Hibernate Search website
表示
[...] The standard tokenizer splits words at punctuation characters and hyphens while keeping email addresses and internet hostnames intact. It is a good general purpose tokenizer. [...]
我一定是这里遗漏了一些东西,但没能弄明白。
我的问题:
- 为什么我的代码没有索引完整的电子邮件地址?
- 如何实现对地址进行索引以便测试通过?
文档似乎没有正确反映底层 Lucene API 的变化。
[K]eeping email addresses and internet hostnames intact...
这对于传统的 StandardTokenizer
来说曾经是正确的,此后在 Lucene 方面进行了更改。现在可以在 ClassicTokenizer
中找到它的行为。
所以下面的配置应该能满足您的需求:
@Entity
@Indexed
@AnalyzerDef(
name = "emailanalyzer",
tokenizer = @TokenizerDef(factory = ClassicTokenizerFactory.class),
filters = {
@TokenFilterDef(factory = LowerCaseFilterFactory.class),
}
)
class Person {
// ...
@Field
@Analyzer(definition = "emailanalyzer")
String email;
}
请注意,修剪也适用于此配置。我们将相应地调整 HSEARCH 文档,感谢您发现这一点!
我想使用 Hibernate Search 对实体中的电子邮件地址进行全文搜索。
给定以下具有索引字段 "email" 的实体 "Person":
Person.groovy
package com.example
import javax.persistence.Entity
import javax.persistence.GeneratedValue
import javax.persistence.GenerationType
import javax.persistence.Id
import org.hibernate.search.annotations.Field
import org.hibernate.search.annotations.Indexed
@Entity
@Indexed
class Person {
@Id
@GeneratedValue(strategy=GenerationType.AUTO)
Long id
@Field
String email
}
并给出存储库
SearchRepository.groovy
package com.example
import javax.persistence.EntityManager
import org.apache.lucene.search.Query
import org.hibernate.search.jpa.FullTextEntityManager
import org.hibernate.search.jpa.Search
import org.hibernate.search.query.dsl.QueryBuilder
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.stereotype.Repository
@Repository
class SearchRepository {
@Autowired
EntityManager entityManager
FullTextEntityManager getFullTextEntityManager() {
Search.getFullTextEntityManager(entityManager)
}
List<Person> findPeople(String searchText){
searchText = searchText.toLowerCase()+'*'
QueryBuilder qb = fullTextEntityManager.searchFactory
.buildQueryBuilder().forEntity(Person).get()
Query query =
qb
.keyword()
.wildcard()
.onField('email')
.matching(searchText)
.createQuery()
javax.persistence.Query jpaQuery =
fullTextEntityManager.createFullTextQuery(query, Person)
jpaQuery.resultList
}
}
然后以下测试失败:
SearchWildcardTest.groovy
package com.example
import javax.persistence.EntityManager
import org.hibernate.search.jpa.FullTextEntityManager
import org.hibernate.search.jpa.Search
import org.junit.Test
import org.junit.runner.RunWith
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.boot.test.SpringApplicationConfiguration
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner
import org.springframework.transaction.annotation.Transactional
@RunWith(SpringJUnit4ClassRunner)
@SpringApplicationConfiguration(classes = HibernateSearchWildcardApplication)
@Transactional
class SearchWildcardTest {
@Autowired
SearchRepository searchRepo
@Autowired
PersonRepository personRepo
@Autowired
EntityManager em
FullTextEntityManager getFullTextEntityManager() {
Search.getFullTextEntityManager(em)
}
@Test
void findTeamsByNameWithWildcard() {
Person person = personRepo.save new Person(email: 'foo@bar.com')
fullTextEntityManager.createIndexer().startAndWait()
fullTextEntityManager.flushToIndexes()
List<Person> people = searchRepo.findPeople('foo@bar.com')
assert people.contains(person) // this assertion fails! Why?
}
}
PersonRepository.groovy
package com.example
import org.springframework.data.repository.CrudRepository
interface PersonRepository extends CrudRepository<Person, Long>{
}
build.gradle
buildscript {
ext {
springBootVersion = '1.2.7.RELEASE'
}
repositories {
mavenCentral()
}
dependencies {
classpath("org.springframework.boot:spring-boot-gradle-plugin:${springBootVersion}")
classpath('io.spring.gradle:dependency-management-plugin:0.5.2.RELEASE')
}
}
apply plugin: 'groovy'
apply plugin: 'eclipse'
apply plugin: 'spring-boot'
apply plugin: 'io.spring.dependency-management'
jar {
baseName = 'hibernate-search-email'
version = '0.0.1-SNAPSHOT'
}
sourceCompatibility = 1.8
targetCompatibility = 1.8
repositories {
mavenCentral()
}
dependencies {
compile('org.springframework.boot:spring-boot-starter-data-jpa')
compile('org.codehaus.groovy:groovy')
compile('org.hibernate:hibernate-search:5.3.0.Final')
testCompile('com.h2database:h2')
testCompile('org.springframework.boot:spring-boot-starter-test')
}
task wrapper(type: Wrapper) {
gradleVersion = '2.8'
}
以下是 Luke 在 运行 测试后从生成的 Lucene 索引中显示的内容:
在我看来,电子邮件地址 "foo@bar.com" 并没有完全存储在索引中,而是被撕成两个字符串 "foo" 和 "bar.com".
来自官方的"Getting started"指南Hibernate Search website 表示
[...] The standard tokenizer splits words at punctuation characters and hyphens while keeping email addresses and internet hostnames intact. It is a good general purpose tokenizer. [...]
我一定是这里遗漏了一些东西,但没能弄明白。
我的问题:
- 为什么我的代码没有索引完整的电子邮件地址?
- 如何实现对地址进行索引以便测试通过?
文档似乎没有正确反映底层 Lucene API 的变化。
[K]eeping email addresses and internet hostnames intact...
这对于传统的 StandardTokenizer
来说曾经是正确的,此后在 Lucene 方面进行了更改。现在可以在 ClassicTokenizer
中找到它的行为。
所以下面的配置应该能满足您的需求:
@Entity
@Indexed
@AnalyzerDef(
name = "emailanalyzer",
tokenizer = @TokenizerDef(factory = ClassicTokenizerFactory.class),
filters = {
@TokenFilterDef(factory = LowerCaseFilterFactory.class),
}
)
class Person {
// ...
@Field
@Analyzer(definition = "emailanalyzer")
String email;
}
请注意,修剪也适用于此配置。我们将相应地调整 HSEARCH 文档,感谢您发现这一点!