在地图中合并嵌套值的正确方法?

Proper way to merge nested values within maps?

鉴于以下情况:

M1 = #{ "Robert" => #{"Scott" => #{}} },
M2 = #{ "Robert" => #{"Adams" => #{}} }

合并应该是:

M3 = #{ "Robert" => #{ "Scott" => #{}, "Adams" => {}}

现在,如果我们合并以下内容:

M4 = #{ "William" => #{ "Robert" => #{ "Scott" => {} }}}
M5 = #{ "William" => #{ "Robert" => #{ "Fitzgerald" => {} }}}

我们应该得到以下信息:

M6 = #{ "Robert" => #{ "Scott" => #{}, "Adams" => {}, 
        "William" => #{ "Robert" => #{ "Fitzgerald" => {}, "Scott" => {} }}}

我有了迭代的想法,获取每个级别的密钥并对其进行迭代。检查它们是否相同,如果不相同则合并地图,检查是否是地图,如果不停止并合并,否则再次调用自身。我遇到的问题是函数不断崩溃,有更好的方法吗?

这是我目前的代码:

merger(M1, M2) ->
   M1_Keys = maps:keys(M1),
   M2_Keys = maps:keys(M2),
   do_merge(M1, M2, M1_Keys).

do_merge(M1, M2, [Head|Tail]) ->
   Check = check_if_same(M1, M2),
   io:fwrite("Check is: ~p\n", [Check]),
   case Check of 
     {ok, true} -> 
       io:fwrite("true\n");
     {ok, false} ->
       io:fwrite("false\n")
   end,
   do_merge(M1, M2, Tail);
%   P1 = maps:get(Head, M1),
%   P2 = maps:get(Head, M2),
%   P3 = maps:merge(P1, P2),
%   M4 = maps:update(Head, P3, M1),
%   io:fwrite("~p \n", [M4]),
%   do_merge(M1, M2, Tail);

do_merge(M1, M2, []) -> 
   ok.
check_if_same(M1, M2) -> 
   {ok, lists:sort( maps:keys(M1) ) == lists:sort( maps:keys(M2) )}.

但是,它因以下错误而崩溃:

$erlc *.erl
helloworld.erl:10: Warning: variable 'M2_Keys' is unused
helloworld.erl:13: Warning: variable 'Head' is unused
helloworld.erl:30: Warning: variable 'M1' is unused
helloworld.erl:30: Warning: variable 'M2' is unused
$erl -noshell -s helloworld start -s init stop
Check is: {ok,true}
true
{"init terminating in do_boot",{{badmap,ok},[{maps,keys,[ok],[]},{helloworld,merger,2,[{file,"helloworld.erl"},{line,10}]},{init,start_em,1,[]},{init,do_boot,3,[]}]}}
init terminating in do_boot ()

Crash dump is being written to: erl_crash.dump...done

你的 do_merge returns ok 总是(基本递归情况)。

这里有两个解决方案,第一个更易读,但我会选择第二个

deep_map_merge(M1, M2) when is_map(M1), is_map(M2) ->
    % Merge both as if they had no common keys
    FlatMerge = maps:merge(M1, M2),
    % Get common keys (This is O(N^2), there are better ways)
    CommonKeys = [K || K <- maps:keys(M1), K2 <- maps:keys(M2), K == K2],
    % Update the merged map with the merge of the common keys
    lists:foldl(fun(K, MergeAcc) ->
                        MergeAcc#{K => deep_map_merge(maps:get(K, M1), maps:get(K, M2))}
                end, FlatMerge, CommonKeys);
deep_map_merge(_, Override) ->
    Override.


deep_map_merge2(M1, M2) when is_map(M1), is_map(M2) ->
    maps:fold(fun(K, V2, Acc) ->
                      case Acc of
                          #{K := V1} ->
                              Acc#{K => deep_map_merge2(V1, V2)};
                          _ ->
                              Acc#{K => V2}
                      end
              end, M1, M2);
deep_map_merge2(_, Override) ->
    Override.

正如我在之前的 post 中回答的那样,我不明白您为什么会得到这个结果,需要更多信息来了解您如何启动 shell、键入命令以及完整的结果。

不幸的是,我没有足够的时间来详细介绍和评论你的代码,我在这里放了一个代码来做你想要的,如果可以的话我稍后会添加评论:

-module (merger).

-compile(export_all).

% yourType = maps(Key :: term() => Value :: yourType()) | #{}.

% merge operation:
%   get all keys from 2 inputs
%   if a key belongs to one input only, insert key => value in the result
%   if a key belongs to 2 inputs, insert key => merge(Value1,value2) in the result
%   
% lets write this

merger(M1, M2) ->
   Keys = lists:usort(maps:keys(M1) ++ maps:keys(M2)), % will produce a list containing  all the keys without repetition
   lists:foldl(fun(Key,Acc) -> do_merge(Key,M1,M2,Acc) end,#{},Keys).

do_merge(Key, M1, M2, Acc) ->
   case {maps:is_key(Key, M1),maps:is_key(Key, M2)} of 
     {true, true} -> 
       maps:put(Key, merger(maps:get(Key, M1),maps:get(Key, M2)), Acc);
     {true, false} ->
       maps:put(Key,maps:get(Key, M1),Acc);
     {false, true} ->
       maps:put(Key,maps:get(Key, M2),Acc)
   end.

test() ->
 R1 = merger(#{ "Robert" => #{"Scott" => #{}} },#{ "Robert" => #{"Adams" => #{}} }),
 R2 = merger(R1,#{ "William" => #{ "Robert" => #{ "Scott" => #{} }}}),
 merger(R2,#{ "William" => #{ "Robert" => #{ "Fitzgerald" => #{} }}}).

在shell中给出:

1> c(merger).    
merger.erl:3: Warning: export_all flag enabled - all functions will be exported
{ok,merger}
2> merger:test().
#{"Robert" => #{"Adams" => #{},"Scott" => #{}},
  "William" =>
      #{"Robert" => #{"Fitzgerald" => #{},"Scott" => #{}}}}
3>

[编辑]

这是一个带有 2 种合并方法的注释版本

-module (merger).

-compile(export_all).

% yourType = maps(Key :: term() => Value :: yourType()) | #{}.

% This first version sticks to the description in natural language
% merge operation:
%   get all keys from 2 inputs
%   if a key belongs to one input only, insert key => value in the result
%   if a key belongs to 2 inputs, insert key => merge(Value1,value2) in the result
%   
% let's write this

merger(M1, M2) ->
   Keys = lists:usort(maps:keys(M1) ++ maps:keys(M2)), % will produce a list containing  all the keys without repetition
   lists:foldl(fun(Key,Acc) -> do_merge(Key,M1,M2,Acc) end,#{},Keys).
   % will execute the do_merge function for each element in the Keys list and accumulate the result in Acc.
   % The initial value of the accumulator is set to #{}
   % https://erlang.org/doc/man/lists.html#foldl-3


% This function is the direct translation of the description above.
do_merge(Key, M1, M2, Acc) ->
   % The case statement returns the result of the matching case.
   case {maps:is_key(Key, M1),maps:is_key(Key, M2)} of 
     {true, true} -> 
       maps:put(Key, merger(maps:get(Key, M1),maps:get(Key, M2)), Acc);
     {true, false} ->
       maps:put(Key,maps:get(Key, M1),Acc);
     {false, true} ->
       maps:put(Key,maps:get(Key, M2),Acc)
   end.

% the previous algorithm does a lot of useless operations: extract and combine the key lists, unique sort
% and uses 3 maps to build the result.
% a more efficient method is to break the symmetry of M1 and M2, and consider that you merge M2 into M1,
% so M1 is the the initial value of the algorithm.
% then, rather than extract the keys from M2, it is more direct to use the maps:foldl function.
% https://erlang.org/doc/man/maps.html#fold-3
% now the merge operation is :
%   insert {key, Value} in the accumulator.
%       If the key already exits in the accumulator, then the new value is the merge of the accumulator value and of the parameter value,
%       If not then simply put Key,Value in the accumulator

% fold will call do_merge2 with each Key and Value from M2, the result of previous operations
% and the Value for Key in the accumulator (undefined if Key does not exist in the accumulator).
% The initial value is M1.
merger2(M1,M2) ->
    maps:fold(fun(Key,Value,AccIn) -> do_merge2(Key,Value,AccIn,maps:get(Key,AccIn,undefined)) end, M1, M2).

% In the parameter I have added the result of maps:get/3, it returns either the Value if the key exists,
% either a default value, here: undefined if it does not exist. This allows to use pattern matching (more erlang way) rather than a case or if statement.
do_merge2(Key,Value,Acc,undefined) ->
    % the Key was not present in ACC
    maps:put(Key, Value, Acc);
do_merge2(Key,Value1,Acc,Value2) ->
    % the Key was present in ACC associated to Value2
    maps:put(Key,merger2(Value1,Value2),Acc).

% The nice thing is now the whole code needs only 3 function declarations containing 1 line of code each.
% It is pretty neat, maybe less easy to start with.

% For the test, I now pass the merger function name to use as a parameter
 test(Merger) ->
     R1 = Merger(#{ "Robert" => #{"Scott" => #{}} },#{ "Robert" => #{"Adams" => #{}}}),
     R2 = Merger(R1,#{ "William" => #{ "Robert" => #{ "Scott" => #{}}}}),
     Merger(R2,#{ "William" => #{ "Robert" => #{ "Fitzgerald" => #{}}}}).

 test1() -> 
    io:format("using merger  :~n~p~n~n",[test(fun merger:merger/2)]),
    io:format("using merger2 :~n~p~n~n",[test(fun merger:merger2/2)]).

在 shell 中,它给出:

$ erl
Erlang/OTP 22 [erts-10.6] [64-bit] [smp:8:8] [ds:8:8:10] [async-threads:1]

Eshell V10.6  (abort with ^G)
1> c(merger).           
merger.erl:3: Warning: export_all flag enabled - all functions will be exported
{ok,merger}
2> merger:test(fun merger:merger/2). 
#{"Robert" => #{"Adams" => #{},"Scott" => #{}},
  "William" =>
      #{"Robert" => #{"Fitzgerald" => #{},"Scott" => #{}}}}
3> merger:test(fun merger:merger2/2).
#{"Robert" => #{"Adams" => #{},"Scott" => #{}},
  "William" =>
      #{"Robert" => #{"Fitzgerald" => #{},"Scott" => #{}}}}
4>

或从 PowerShell 调用 window:

PS C:\git\test_area\src> erlc merger.erl
merger.erl:3: Warning: export_all flag enabled - all functions will be exported
PS C:\git\test_area\src> erl -noshell -s merger test1 -s init stop
using merger  :
#{"Robert" => #{"Adams" => #{},"Scott" => #{}},
  "William" => #{"Robert" => #{"Fitzgerald" => #{},"Scott" => #{}}}}

using merger2 :
#{"Robert" => #{"Adams" => #{},"Scott" => #{}},
  "William" => #{"Robert" => #{"Fitzgerald" => #{},"Scott" => #{}}}}

PS C:\git\test_area\src>

关于你得到崩溃转储的原因,我不得不做一些猜测(你没有提供统计功能:o)。我认为你像我一样做一个测试,它结合了几个评估。这种情况下的问题是,在递归结束时,对于第一次评估 (R1 = Merger(#{ "Robert" => #{"Scott" => #{}} },#{ "Robert" => #{"Adams" => #{}}}) in my case),您得到 return 值 ok(代码中的 do_merge(M1, M2, []) -> ok)。然后这个结果被重新用于下一次评估,并且程序在调用 maps:keys(ok) 时失败,说它得到了一个错误的地图:好的。