Nest:索引 PDF,接收状态代码 400,"No Content Provided"

Nest: Indexing a PDF, recieving Status Code 400, "No Content Provided"

我正在尝试根据我在这里学到的知识索引和搜索 PDF: ElasticSearch & attachment type (NEST C#)

但是我收到 "Status Code 400, No Content Provided" 错误。 pdf 大约 7KB 大小,我能做的就这么小,里面只有一些文字。

建议?下面的代码和输出。谢谢!

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Nest;
using System.IO;
using System.Threading;

namespace SearchPDFConsole
{
    class Program
    {
        static void Main(string[] args)
        {
            // create es client
            string index = "pdftestitems";

            Console.WriteLine("client stuff...");

            var node = new Uri("http://tns-dev.pts-eden.org:9200");
            var settings = new ConnectionSettings(node).SetDefaultIndex(index);
            var client = new ElasticClient(settings);

            Console.WriteLine("deleting index...");

            // delete index if any
            var di = client.DeleteIndex(index);

            Console.WriteLine(di.ConnectionStatus.HttpStatusCode);

            Console.WriteLine("creating index...");

            // Create your index explicitly before you index any instances of your class. If you don't do this, it will use dynamic mapping and ignore your attribute mapping. If you change your mapping in the future, always recreate the index.
            var ci = client.CreateIndex(index, c => c.AddMapping<Document>(m => m.MapFromAttributes()));

            Console.WriteLine(di.ConnectionStatus.HttpStatusCode);

            string path = "test2.pdf";

            var attachment = new Attachment();
            attachment.Content = Convert.ToBase64String(File.ReadAllBytes(path));
            attachment.ContentType = "application/pdf";
            attachment.Name = "test2.pdf";

            var doc = new Document()
            {
                ID = 2,
                Title = "test2",
                Content = "This is a test."
            };
            var doc2 = new Document()
            {
                ID = 1,
                Title = "test",
                Content = "good luck",
                File = attachment
            };

            Console.WriteLine("Indexing document 1...");

            var status = client.Index<Document>(doc);

            Console.WriteLine(status.ConnectionStatus);

            Console.WriteLine("Indexing document 2...");

            var status2 = client.Index<Document>(doc2);

            Console.WriteLine(status2.ConnectionStatus);

            Console.WriteLine("sleeping 1s...");
            Thread.Sleep(1000);

            string stringsearch = "test";

            //var searchResults = client.Search<Document>(s => s.Type("document").Query(qs => qs.QueryString(q => q.Query(stringsearch))));
            var searchResults = client.Search<Document>(s => s.Query(qs => qs.QueryString(q => q.Query(stringsearch))));

            Console.WriteLine(searchResults.Documents.Count());

            foreach (var sd in searchResults.Documents)
            {
                Console.WriteLine(sd.Title);
            }

        }
    }

    [ElasticType(Name = "document")]
    public class Document
    {
        public int ID { get; set; }

        [ElasticProperty(Store = true)]
        public string Title { get; set; }

        [ElasticProperty(Store = true)]
        public string Content { get; set; }

        [ElasticProperty(Type = FieldType.Attachment, TermVector = TermVectorOption.WithPositionsOffsets, Store = true)]
        public Attachment File { get; set; }
    }

    public class Attachment
    {
        [ElasticProperty(Name = "_content")]
        public string Content { get; set; }

        [ElasticProperty(Name = "_content_type")]
        public string ContentType { get; set; }

        [ElasticProperty(Name = "_name")]
        public string Name { get; set; }
    }
}

我的程序输出:

C:\PROGRAMMING\SearchPDFTest\SearchPDFConsole\bin\Debug>SearchPDFConsole.exe
client stuff...
deleting index...
200
creating index...
200
Indexing document 1...
StatusCode: 201,
        Method: PUT,
        Url: http://tns-dev.pts-eden.org:9200/pdftestitems/document/2,
        Request: {
  "iD": 2,
  "title": "test2",
  "content": "This is a test."
},
        Response: <Response stream not captured or already read to completion by
 serializer, set ExposeRawResponse() on connectionsettings to force it to be set
 on>
Indexing document 2...
StatusCode: 400,
        Method: PUT,
        Url: http://tns-dev.pts-eden.org:9200/pdftestitems/document/1,
        Request: {
  "iD": 1,
  "title": "test",
  "content": "good luck",
  "file": {
    "_content": "JVBERi0xLjYNJeLjz9MNCjE5IDAgb2JqDTw8L0ZpbHRlci9GbGF0ZURlY29kZS9
GaXJzdCA5L0xlbmd0aCAxMzkvTiAyL1R5cGUvT2JqU3RtPj5zdHJlYW0NCmjeTM3BCsIwDAbgV/mfwLQ
brQijB3cUYQxvY4figuzSjrYDfXvbenCH/JCEL2khoKAVuo56v7sESbd1iZPKi3GmOy+rvfr3JE4CpbR
scp4vOWd6fDamwb44GlMPuMQuRcimaOqD36pFdUUdLx6ngw1Zoq1u5Oj38OQI/et9sokh/v+M+QowAEh
[LOTS MORE BASE64 ENCODED STUFF]
mL1dbMSAyIDFdPj5zdHJlYW0NCmjeYmIAAiZGpmUMTAwMPkDWP0UGpv/8a4Gsj8GMQDHG/yACxGIAsRi
YIaz/Aun/gKwaoDamDJDeqSBWIZBgfAoiZoAIRyDx8g1I9iWIuwpISGkyAAQYAArYEhcNCmVuZHN0cmV
hbQ1lbmRvYmoNc3RhcnR4cmVmDQo3MTE2DQolJUVPRg0K",
    "_content_type": "application/pdf",
    "_name": "test2.pdf"
  }
},
        Response: <Response stream not captured or already read to completion by
 serializer, set ExposeRawResponse() on connectionsettings to force it to be set
 on>
        ExceptionMessage: No content is provided.
         StackTrace:
sleeping 1s...
1
test2

我用在 Word 中创建的简单 PDF 尝试了您的代码,它似乎对我来说工作正常。

我是 运行 ES 1.4.4 和 elasticsearch-mapper-attachments 2.4.3。 两个索引操作 return 正确的 2xx 状态代码,之后我可以搜索 PDF。

你能确定你的PDF是正确的吗?试试用另一个程序创建一个新程序?