如何使用 PyLucene 从所有索引文档中检索特定字段？

Question

在 java 中，可以使用 "MatchAllDocsQuery()" 完成，但是没有 Pylucene 的文档提到如何完成。

这是 python 代码，用于 post 个别查询，然后从检索到的文档中提取所有字段。

INDEX_DIR = "directory/where/the/document/index/is/stored"

import sys, os, lucene

from java.nio.file import Paths
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import DirectoryReader
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.store import SimpleFSDirectory
from org.apache.lucene.search import IndexSearcher

def run(searcher, analyzer):
    while True:
        print
        print("Hit enter with no input to quit.")
        command = input("Query:")
        if command == '':
            return

        print
        print("Searching for:", command)
        query = QueryParser("contents", analyzer).parse(command)
        #query = "MatchAllDocsQuery()"
        scoreDocs = searcher.search(query, 50).scoreDocs
        print("%s total matching documents." % len(scoreDocs))

        for scoreDoc in scoreDocs:
            doc = searcher.doc(scoreDoc.doc)
            table = dict((field.name(), field.stringValue()) for field in doc.getFields())
            print(table['doi'])
            #print('path:', doc.get("path"), 'name:', doc.get("name"), 'title:', doc.get("text"))


if __name__ == '__main__':
    lucene.initVM()
    print('lucene', lucene.VERSION)
    base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
    directory = SimpleFSDirectory.open(Paths.get(INDEX_DIR))
    print("Directory name is given below")
    print(directory)

    searcher = IndexSearcher(DirectoryReader.open(directory))
    print(searcher)
    analyzer = StandardAnalyzer()

    # Calling the run function for execution
    run(searcher, analyzer)
    del searcher

Answer 1

查询中的微小变化可以使 Lucene 检索所有索引文档。这只是将命令变量替换为 (command = ".*.")。 .✱。搜索所有文档中的所有字段和字段值（使用星号标记）。

INDEX_DIR = "directory/where/the/document/index/is/stored"

import sys, os, lucene

from java.nio.file import Paths
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import DirectoryReader
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.store import SimpleFSDirectory
from org.apache.lucene.search import IndexSearcher

def run(searcher, analyzer):
    command = ".*."
    print("Searching for:", command)
    query = QueryParser("contents", analyzer).parse(command)
    #query = "MatchAllDocsQuery()"
    scoreDocs = searcher.search(query, 50).scoreDocs
    print("%s total matching documents." % len(scoreDocs))

    for scoreDoc in scoreDocs:
        doc = searcher.doc(scoreDoc.doc)
        table = dict((field.name(), field.stringValue()) for field in doc.getFields())
        print(table['doi'])
            #print('path:', doc.get("path"), 'name:', doc.get("name"), 'title:', doc.get("text"))


if __name__ == '__main__':
    lucene.initVM()
    print('lucene', lucene.VERSION)
    base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
    directory = SimpleFSDirectory.open(Paths.get(INDEX_DIR))
    print("Directory name is given below")
    print(directory)

    searcher = IndexSearcher(DirectoryReader.open(directory))
    print(searcher)
    analyzer = StandardAnalyzer()

    # Calling the run function for execution
    run(searcher, analyzer)
    del searcher

如何使用 PyLucene 从所有索引文档中检索特定字段？

How can one retrieve a particular field from all the indexed documents using PyLucene?

python

lucene

pylucene