无法使用 Scala 在 Lucene 6.2 中搜索
Can't search in Lucene 6.2 using Scala
我正在尝试使用 Lucene 6.2 索引来自 MySQL(在 Scala 中使用 Slick)的数据。下面是代码
package oc.api.services
/**
* Created by sujit on 9/7/16.
*/
import org.apache.lucene.document._
import org.apache.lucene.analysis.standard.StandardAnalyzer
import org.apache.lucene.index._
import org.apache.lucene.search.IndexSearcher
import java.io.{File, IOException}
import java.nio.file.Paths
import akka.actor.ActorSystem
import akka.event.{Logging, LoggingAdapter}
import akka.stream.ActorMaterializer
import oc.api.utils.{Config, DatabaseService}
import org.apache.lucene.analysis.core.KeywordAnalyzer
import org.apache.lucene.index.IndexWriterConfig.OpenMode
import org.apache.lucene.queryparser.classic.{QueryParser}
import org.apache.lucene.store.FSDirectory
import scala.concurrent.ExecutionContext
class Indexer extends Config {
implicit val actorSystem = ActorSystem()
implicit val executor: ExecutionContext = actorSystem.dispatcher
implicit val log: LoggingAdapter = Logging(actorSystem, getClass)
implicit val materializer: ActorMaterializer = ActorMaterializer()
val databaseService = new DatabaseService(jdbcUrl, dbUser, dbPassword)
val notesService = new NotesService(databaseService)
def setIndex = {
val IndexStoreDir = Paths.get("/var/www/html/LuceneIndex")
val analyzer = new KeywordAnalyzer()
val writerConfig = new IndexWriterConfig(analyzer)
writerConfig.setOpenMode(OpenMode.CREATE)
writerConfig.setRAMBufferSizeMB(500)
val directory = FSDirectory.open(IndexStoreDir)
var writer = new IndexWriter(directory, writerConfig)
val notes = notesService.getNotes() //Gets all notes from slick. Data is coming in getNotes()
var doc = new Document()
var count = 0
val stringType = new FieldType()
notes.map(_.foreach{
case(note) =>
doc = new Document()
var field = new TextField("title", note.title, Field.Store.YES)
doc.add(field)
field = new TextField("teaser", note.teaser, Field.Store.YES)
doc.add(field)
field = new TextField("description", note.description, Field.Store.YES)
doc.add(field)
writer.addDocument(doc)
})
writer.commit()
}
def search(keyword: String) = {
val IndexStoreDir = Paths.get("/var/www/html/LuceneIndex")
var directoryReader = DirectoryReader.open(FSDirectory.open(IndexStoreDir))
val analyzer = new StandardAnalyzer()
val searcher = new IndexSearcher(directoryReader)
val mqp = new QueryParser("title", analyzer) //MultiFieldQueryParser(filesToSearch,analyzer)
val query = mqp.parse(keyword)
val hits = searcher.search(query,10)
val scoreDoc = hits.scoreDocs
println(scoreDoc.length)
}
}
object Indexer extends App {
val index = new Indexer
index.setIndex
index.search("Donec")
}
setIndex 函数在提供的路径中按预期工作。但是当我根据关键字搜索索引时,它会抛出 0 个结果。 seach功能有没有错误?如何解决?
这里的主要原因可能是您的分析器不匹配。您使用 KeywordAnalyzer
进行索引,根本不分析。对于搜索,您使用 StandardAnalyzer
。在您的示例中,查询 "Donec"
将被解析并分析为 title:donec
,就像您使用了 new TermQuery(new Term("title", "donec"))
一样。这只会匹配具有确切标题 donec
的文档,因为您在索引时使用了关键字分析器。您也应该尝试使用相同的分析器进行索引。
另一件事可能是——我只能猜测——notesService.getNotes()
可能是 Future[_]
(或类似的异步类型),因为它涉及 slick。如果是,您将所有文档添加到 .map()
的调用中,计划在未来解决后发生。然而,writer.commit()
调用发生在调用线程中,可能在您添加所有文档之前,因此您也应该将提交移至 map
回调中。
我正在尝试使用 Lucene 6.2 索引来自 MySQL(在 Scala 中使用 Slick)的数据。下面是代码
package oc.api.services
/**
* Created by sujit on 9/7/16.
*/
import org.apache.lucene.document._
import org.apache.lucene.analysis.standard.StandardAnalyzer
import org.apache.lucene.index._
import org.apache.lucene.search.IndexSearcher
import java.io.{File, IOException}
import java.nio.file.Paths
import akka.actor.ActorSystem
import akka.event.{Logging, LoggingAdapter}
import akka.stream.ActorMaterializer
import oc.api.utils.{Config, DatabaseService}
import org.apache.lucene.analysis.core.KeywordAnalyzer
import org.apache.lucene.index.IndexWriterConfig.OpenMode
import org.apache.lucene.queryparser.classic.{QueryParser}
import org.apache.lucene.store.FSDirectory
import scala.concurrent.ExecutionContext
class Indexer extends Config {
implicit val actorSystem = ActorSystem()
implicit val executor: ExecutionContext = actorSystem.dispatcher
implicit val log: LoggingAdapter = Logging(actorSystem, getClass)
implicit val materializer: ActorMaterializer = ActorMaterializer()
val databaseService = new DatabaseService(jdbcUrl, dbUser, dbPassword)
val notesService = new NotesService(databaseService)
def setIndex = {
val IndexStoreDir = Paths.get("/var/www/html/LuceneIndex")
val analyzer = new KeywordAnalyzer()
val writerConfig = new IndexWriterConfig(analyzer)
writerConfig.setOpenMode(OpenMode.CREATE)
writerConfig.setRAMBufferSizeMB(500)
val directory = FSDirectory.open(IndexStoreDir)
var writer = new IndexWriter(directory, writerConfig)
val notes = notesService.getNotes() //Gets all notes from slick. Data is coming in getNotes()
var doc = new Document()
var count = 0
val stringType = new FieldType()
notes.map(_.foreach{
case(note) =>
doc = new Document()
var field = new TextField("title", note.title, Field.Store.YES)
doc.add(field)
field = new TextField("teaser", note.teaser, Field.Store.YES)
doc.add(field)
field = new TextField("description", note.description, Field.Store.YES)
doc.add(field)
writer.addDocument(doc)
})
writer.commit()
}
def search(keyword: String) = {
val IndexStoreDir = Paths.get("/var/www/html/LuceneIndex")
var directoryReader = DirectoryReader.open(FSDirectory.open(IndexStoreDir))
val analyzer = new StandardAnalyzer()
val searcher = new IndexSearcher(directoryReader)
val mqp = new QueryParser("title", analyzer) //MultiFieldQueryParser(filesToSearch,analyzer)
val query = mqp.parse(keyword)
val hits = searcher.search(query,10)
val scoreDoc = hits.scoreDocs
println(scoreDoc.length)
}
}
object Indexer extends App {
val index = new Indexer
index.setIndex
index.search("Donec")
}
setIndex 函数在提供的路径中按预期工作。但是当我根据关键字搜索索引时,它会抛出 0 个结果。 seach功能有没有错误?如何解决?
这里的主要原因可能是您的分析器不匹配。您使用 KeywordAnalyzer
进行索引,根本不分析。对于搜索,您使用 StandardAnalyzer
。在您的示例中,查询 "Donec"
将被解析并分析为 title:donec
,就像您使用了 new TermQuery(new Term("title", "donec"))
一样。这只会匹配具有确切标题 donec
的文档,因为您在索引时使用了关键字分析器。您也应该尝试使用相同的分析器进行索引。
另一件事可能是——我只能猜测——notesService.getNotes()
可能是 Future[_]
(或类似的异步类型),因为它涉及 slick。如果是,您将所有文档添加到 .map()
的调用中,计划在未来解决后发生。然而,writer.commit()
调用发生在调用线程中,可能在您添加所有文档之前,因此您也应该将提交移至 map
回调中。