RavenDB:为什么我在这个 multi-map/reduce 索引中得到字段的空值?

RavenDB: Why do I get null-values for fields in this multi-map/reduce index?

受 Ayende 的文章 https://ayende.com/blog/89089/ravendb-multi-maps-reduce-indexes 启发,我有以下索引,它是这样工作的:

public class Posts_WithViewCountByUser : AbstractMultiMapIndexCreationTask<Posts_WithViewCountByUser.Result>
{
    public Posts_WithViewCountByUser()
    {
        AddMap<Post>(posts => from p in posts
            select new
            {
                ViewedByUserId = (string) null,
                ViewCount = 0,

                Id = p.Id,
                PostTitle = p.PostTitle,
            });

        AddMap<PostView>(postViews => from postView in postViews
            select new
            {
                ViewedByUserId = postView.ViewedByUserId,
                ViewCount = 1,

                Id = (string) postView.PostId,
                PostTitle = (string) null,
            });

        Reduce = results => from result in results
            group result by new
            {
                result.Id,
                result.ViewedByUserId
            }
            into g
            select new Result
            {
                ViewCount = g.Sum(x => x.ViewCount),
                Id = g.Key.Id,
                ViewedByUserId = g.Key.ViewedByUserId,
                PostTitle = g.Select(x => x.PostTitle).Where(x => x != null).FirstOrDefault(),
            };

        Store(x => x.PostTitle, FieldStorage.Yes);
    }

    public class Result
    {
        public string Id { get; set; }
        public string ViewedByUserId { get; set; }
        public int ViewCount { get; set; }
        public string PostTitle { get; set; }
    }
}

我想这样查询这个索引:

Return 所有 post 包括 - 对于给定用户 - 该用户查看 post 的次数的整数。 "views" 存储在单独的文档类型 PostView 中。请注意,我的真实文档类型已在此处重命名以匹配文章中的示例(我当然不会以这种方式实现 "most-viewed")。

我得到的查询结果是正确的 - 即,我总是得到所有 Post 文档,并为用户提供正确的查看次数。但我的问题是,PostTitle 字段在结果集中始终为空(所有 Post 文档在数据集中都具有非空值)。

我按 userId 和 (post)Id 的组合分组作为我的 "uniqueness" .我理解它的方式(如果我错了请纠正我)是,在减少的这一点上,我有一堆伪文档具有相同的 userId /postId组合,部分来自Post图,部分来自PostView图。现在我只是找到其中的任何一个伪文档,它们实际上具有 PostTitle 的值 - 即源自 Post 地图的伪文档。这些显然都应该具有相同的值,因为它是相同的 post,只是 "outer-joined"。 .Select(....).Where(....).FirstOrDefault() 链取自我用作基础的示例。然后我为我的最终文档设置这个 ViewCount 值,我将其投影到结果中。

我的问题是:如何获取结果中 PostTitle 字段的非空值?

问题是您有:

       ViewedByUserId = (string) null,

并且:

        group result by new
        {
            result.Id,
            result.ViewedByUserId
        }
        into g

换句话说,您实际上是按 null 分组,我假设这不是您的意图。

PostView 上有一个 map/reduce 索引并从包含或通过转换器获取 PostTitle 会简单得多。

您对正在发生的事情的理解是正确的,因为您正在创建带有 userId / postId 的索引结果。

但您 实际上 所做的是从 PostViewuserId /postId 以及 Postnull /postId 中创建结果。

这就是为什么您没有找到想要的比赛的原因。

索引中的分组不正确。使用以下示例数据:

new Post { Id = "Post-1", PostTitle = "Post Title", AuthorId = "Author-1" }
new PostView { ViewedByUserId = "User-1", PostId = "Post-1" }
new PostView { ViewedByUserId = "User-1", PostId = "Post-1" }
new PostView { ViewedByUserId = "User-2", PostId = "Post-1" }

索引结果是这样的:

ViewCount | Id     | ViewedByUserId | PostTitle
--------- | ------ | -------------- | ----------
 0        | Post-1 | null           | Post Title
 2        | Post-1 | User-1         | null
 1        | Post-1 | User-2         | null

索引中的映射操作只是为所有源文档创建一个公共文档。因此,Post-1 文档产生一行,Post-1User-1 的两个文档产生两行(后来通过 ViewCount == 2 减少为单行) Post-1User-2 的文档生成最后一行。

reduce 操作将所有映射的行分组并在索引中生成结果文档。在这种情况下,源自 Post 的文档与源自 PostView 的文档分开存储,因为 ViewedByUserId 中的 null 值未与来自 PostView collection.

如果你能改变你存储数据的方式,你可以通过直接在 PostView 中存储视图数来解决这个问题。它将大大减少数据库中的重复数据,同时更新视图计数的成本几乎相同。

完成测试(需要 xunit 和 RavenDB.Tests.Helpers nugets):

using Raven.Abstractions.Indexing;
using Raven.Client;
using Raven.Client.Indexes;
using Raven.Tests.Helpers;
using System.Linq;
using Xunit;

namespace SO41559770Answer
{
    public class SO41559770 : RavenTestBase
    {
        [Fact]
        public void SO41559770Test()
        {
            using (var server = GetNewServer())
            using (var store = NewRemoteDocumentStore(ravenDbServer: server))
            {
                new PostViewsIndex().Execute(store);

                using (IDocumentSession session = store.OpenSession())
                {
                    session.Store(new Post { Id = "Post-1", PostTitle = "Post Title", AuthorId = "Author-1" });
                    session.Store(new PostView { Id = "Views-1-1", ViewedByUserId = "User-1", PostId = "Post-1", ViewCount = 2 });
                    session.Store(new PostView { Id = "Views-1-2", ViewedByUserId = "User-2", PostId = "Post-1", ViewCount = 1 });
                    session.SaveChanges();
                }

                WaitForAllRequestsToComplete(server);
                WaitForIndexing(store);

                using (IDocumentSession session = store.OpenSession())
                {
                    var resultsForId1 = session
                        .Query<PostViewsIndex.Result, PostViewsIndex>()
                        .ProjectFromIndexFieldsInto<PostViewsIndex.Result>()
                        .Where(x => x.PostId == "Post-1" && x.UserId == "User-1");
                    Assert.Equal(2, resultsForId1.First().ViewCount);
                    Assert.Equal("Post Title", resultsForId1.First().PostTitle);
                    var resultsForId2 = session
                        .Query<PostViewsIndex.Result, PostViewsIndex>()
                        .ProjectFromIndexFieldsInto<PostViewsIndex.Result>()
                        .Where(x => x.PostId == "Post-1" && x.UserId == "User-2");
                    Assert.Equal(1, resultsForId2.First().ViewCount);
                    Assert.Equal("Post Title", resultsForId2.First().PostTitle);
                }
            }
        }
    }

    public class PostViewsIndex : AbstractIndexCreationTask<PostView, PostViewsIndex.Result>
    {
        public PostViewsIndex()
        {
            Map = postViews => from postView in postViews
                               let post = LoadDocument<Post>(postView.PostId)
                               select new
                               {
                                   Id = postView.Id,
                                   PostId = post.Id,
                                   PostTitle = post.PostTitle,
                                   UserId = postView.ViewedByUserId,
                                   ViewCount = postView.ViewCount,
                               };
            StoreAllFields(FieldStorage.Yes);
        }


        public class Result
        {
            public string Id { get; set; }
            public string PostId { get; set; }
            public string PostTitle { get; set; }
            public string UserId { get; set; }
            public int ViewCount { get; set; }
        }
    }

    public class Post
    {
        public string Id { get; set; }
        public string PostTitle { get; set; }
        public string AuthorId { get; set; }
    }

    public class PostView
    {
        public string Id { get; set; }
        public string ViewedByUserId { get; set; }
        public string PostId { get; set; }
        public int ViewCount { get; set; }
    }
}