如何搜索附加在弹性搜索索引中的文档的内容

How to search a content of a document attached in elasticsearch index

我已经在 elasticsearch 中创建了索引

this.client.CreateIndex("documents", c => c.Mappings(mp => mp.Map<DocUpload>
              (m => m.Properties(ps => ps.Attachment
                                     (a => a.Name(o => o.Document)
                                            .TitleField(t => t.Name(x =>  x.Title).TermVector(TermVectorOption.WithPositionsOffsets))
                                             )))));

附件在索引之前经过 base64 编码。我无法在任何文档中搜索内容。 base64 编码是否会产生任何问题。有人可以帮忙吗?

浏览器响应类似

    {
 "documents": {
   "aliases": {},
   "mappings": {
  "indexdocument": {
    "properties": {
      "document": {
        "type": "attachment",
        "fields": {
          "content": {
            "type": "string"
          },
          "author": {
            "type": "string"
          },
          "title": {
            "type": "string",
            "term_vector": "with_positions_offsets"
          },
          "name": {
            "type": "string"
          },
          "date": {
            "type": "date",
            "format": "strict_date_optional_time||epoch_millis"
          },
          "keywords": {
            "type": "string"
          },
          "content_type": {
            "type": "string"
          },
          "content_length": {
            "type": "integer"
          },
          "language": {
            "type": "string"
          }
        }
      },
      "documentType": {
        "type": "string"
      },
      "id": {
        "type": "long"
      },
      "lastModifiedDate": {
        "type": "date",
        "format": "strict_date_optional_time||epoch_millis"
      },
      "location": {
        "type": "string"
      },
      "title": {
        "type": "string"
      }
    }
  }
},
"settings": {
  "index": {
    "creation_date": "1465193502636",
    "number_of_shards": "5",
    "number_of_replicas": "1",
    "uuid": "5kCRvhmsQAGyndkswLhLrg",
    "version": {
      "created": "2030399"
    }
  }
},
"warmers": {}
}
 }

我通过添加分析器找到了解决方案。

var fullNameFilters = new List<string> { "lowercase", "snowball" };
        client.CreateIndex("mydocs", c => c
              .Settings(st => st
                        .Analysis(anl => anl
                        .Analyzers(h => h
                            .Custom("full", ff => ff
                                 .Filters(fullNameFilters)
                                 .Tokenizer("standard"))
                            )
                            .TokenFilters(ba => ba
                                .Snowball("snowball", sn => sn
                                    .Language(SnowballLanguage.English)))                    
                             ))
                         .Mappings(mp => mp
                         .Map<IndexDocument>(ms => ms
                         .AutoMap()
                         .Properties(ps => ps
                             .Nested<Attachment>(n => n
                                 .Name(sc => sc.File)
                             .AutoMap()
                             ))
                        .Properties(at => at
                        .Attachment(a => a.Name(o => o.File)
                        .FileField(fl=>fl.Analyzer("full"))
                        .TitleField(t => t.Name(x => x.Title)
                        .Analyzer("full")
                        .TermVector(TermVectorOption.WithPositionsOffsets)
                        )))

                        ))                        
                        );