在 Ruby 中构建交集矩阵

Build an intersection matrix in Ruby

我有一组学生,他们每个人都选择了一定数量的他们想在下学期修读的课程,用哈希数组表示:

[
{"student"=>"1", "English"=>true, "Algebra"=>true, "History"=>false},
{"student"=>"2", "English"=>false, "Algebra"=>false, "History"=>true},
{"student"=>"3", "English"=>false, "Algebra"=>true, "History"=>false},
{"student"=>"4", "English"=>true, "Algebra"=>false, "History"=>true}
]

我想建立一个矩阵来显示每门课程之间有多少冲突,最终结果是这样的:

        English Algebra History
English    2       1       1
Algebra    1       2       -
History    1       -       2

其中交叉点的数字是选择了两门门课程的学生人数,即(English, English)交叉点的数字是2 = 总数选择英语的学生人数。 (History, Algebra) 的数字是“-”,因为从来没有一个学生同时选择了这两个课程。

我试着查看矩阵的 ruby 文档 class,它似乎解决了更多的数学矩阵 - 我不确定如何为此目的重新设计它,或者如果它是一个适合这个问题的 class。

我可以尝试什么样的方法 researching/googling 来有效地构建这样的矩阵?

以下适用于任意数量的学校科目和任意分组大小(即,不仅仅是 2)。

代码

def count_groupings(arr, group_size)
  combos = (arr.flat_map { |h| h.keys }.uniq - ["student"]).
    repeated_combination(group_size).to_a.product([0]).to_h
  arr.each do |h|
    keys = h.keys.select { |k| h[k] == true }
    combos.keys.each { |k| combos[k] += 1 if (k-keys).empty? }
  end
  combos     
end

例子

arr = [
  {"student"=>"1", "English"=>true,  "Algebra"=>true,  "History"=>false},
  {"student"=>"2", "English"=>false, "Algebra"=>false, "History"=>true},
  {"student"=>"3", "English"=>false, "Algebra"=>true,  "History"=>false},
  {"student"=>"4", "English"=>true,  "Algebra"=>false, "History"=>true}
]

count_groupings(arr, 1)
  #=> {["English"]=>2, ["Algebra"]=>2, ["History"]=>2}
count_groupings(arr, 2)
  #=> {["English", "English"]=>2, ["English", "Algebra"]=>1, ["English", "History"]=>1,
  #    ["Algebra", "Algebra"]=>2, ["Algebra", "History"]=>0, ["History", "History"]=>2}
count_groupings(arr, 3)
  #=> {["English", "English", "English"]=>2, ["English", "English", "Algebra"]=>1,
  #    ["English", "English", "History"]=>1, ["English", "Algebra", "Algebra"]=>1,
  #    ["English", "Algebra", "History"]=>0, ["English", "History", "History"]=>1,
  #    ["Algebra", "Algebra", "Algebra"]=>2, ["Algebra", "Algebra", "History"]=>0,
  #    ["Algebra", "History", "History"]=>0, ["History", "History", "History"]=>2}

说明

参见 Array#repeated_combination

步骤如下group_size #=> 2

a = arr.flat_map { |h| h.keys }
  #=> ["student", "English", "Algebra", "History", "student", "English",
  #    "Algebra", "History", "student", "English", "Algebra", "History",
  #    "student", "English", "Algebra", "History"]
b = a.uniq
  #=> ["student", "English", "Algebra", "History"]
c = b - ["student"]
  #=> ["English", "Algebra", "History"]
d = c.repeated_combination(group_size)
  #=> #<Enumerator: ["English", "Algebra", "History"]:repeated_combination(2)
e = d.to_a
  #=> [["English", "English"], ["English", "Algebra"], ["English", "History"],
  #    ["Algebra", "Algebra"], ["Algebra", "History"], ["History", "History"]]
f = e.product([0])
  #=> [[["English", "English"], 0], [["English", "Algebra"], 0],
  #    [["English", "History"], 0], [["Algebra", "Algebra"], 0],
  #    [["Algebra", "History"], 0], [["History", "History"], 0]]
combos = f.to_h
  #=> {["English", "English"]=>0, ["English", "Algebra"]=>0, ["English", "History"]=>0,
  #    ["Algebra", "Algebra"]=>0, ["Algebra", "History"]=>0, ["History", "History"]=>0}

g = arr.each
  #=> #<Enumerator: [{"student"=>"1", "English"=>true, "Algebra"=>true, "History"=>false},
  # ... 
h = g.next
  #=> {"student"=>"1", "English"=>true, "Algebra"=>true, "History"=>false}
i = h.keys
  #=> ["student", "English", "Algebra", "History"]
keys = i.select { |k| h[k] == true }
  #=> ["English", "Algebra"]
j = combos.keys
  #=> [["English", "English"], ["English", "Algebra"], ["English", "History"],
  #    ["Algebra", "Algebra"], ["Algebra", "History"], ["History", "History"]]
m = j.each
  #=> #<Enumerator: [["English", "English"], ["English", "Algebra"],
  #   ...]:each>
k = m.next
  #=> ["English", "English"]
(k-keys).empty?
  #=> (["English", "English"] - ["English", "Algebra"]).empty?
  #=> [].empty?    
  #=> true
combos[k] += 1
combos
  #=> {["English", "English"]=>1, ["English", "Algebra"]=>0, ["English", "History"]=>0,
  #    ["Algebra", "Algebra"]=>0, ["Algebra", "History"]=>0, ["History", "History"]=>0}

k = m.next
  #=> ["English", "Algebra"]
(k-keys).empty?
  #=> (["English", "Algebra"] - ["English", "Algebra"]).empty?
  #=> [].empty?
  #=> true
combos[k] += 1
combos
  #=> {["English", "English"]=>1, ["English", "Algebra"]=>1, ["English", "History"]=>0,
  #    ["Algebra", "Algebra"]=>0, ["Algebra", "History"]=>0, ["History", "History"]=>0}

其余计算类似。如果愿意,可以写

combos = (arr.flat_map { |h| h.keys }.uniq - ["student"]).
    repeated_combination(group_size).map(&:uniq).product([0]).to_h
  #=> {["English"]=>0, ["English", "Algebra"]=>0, ["English", "History"]=>0,
  #    ["Algebra"]=>0, ["Algebra", "History"]=>0, ["History"]=>0}

注意combosgroup_size = 1初始化:

combos = (arr.flat_map { |h| h.keys }.uniq - ["student"]).
    repeated_combination(group_size).to_a.product([0]).to_h
  #=> {["English"]=>0, ["Algebra"]=>0, ["History"]=>0}

group_size = 3

combos = (arr.flat_map { |h| h.keys }.uniq - ["student"]).
    repeated_combination(group_size).to_a.product([0]).to_h
  #=> {["English", "English", "English"]=>0, ["English", "English", "Algebra"]=>0,
  #    ["English", "English", "History"]=>0, ["English", "Algebra", "Algebra"]=>0,
  #    ["English", "Algebra", "History"]=>0, ["English", "History", "History"]=>0,
  #    ["Algebra", "Algebra", "Algebra"]=>0, ["Algebra", "Algebra", "History"]=>0,
  #    ["Algebra", "History", "History"]=>0, ["History", "History", "History"]=>0}

替代数据结构

如果可以选择数据结构,散列可能比给定的散列数组更容易使用,并且没有必要包括学生没有参加的课程(尤其是当有数百门课程正在上课时)教过)。

courses_by_student = {
  1 => %w| English Algebra|,
  2 => %w| History |,
  3 => %w| Algebra |,
  4 => %w| English History |
}
data = [
{"student"=>"1", "English"=>true, "Algebra"=>true, "History"=>false},
{"student"=>"2", "English"=>false, "Algebra"=>false, "History"=>true},
{"student"=>"3", "English"=>false, "Algebra"=>true, "History"=>false},
{"student"=>"4", "English"=>true, "Algebra"=>false, "History"=>true}
]

classes = data.first.keys.reject { |class_name| class_name == "student" }
print ' ' * 10
classes.each { |class_name| print "#{class_name}".center(10) }
puts 
classes.each_with_index do |row_class_name, idx|
  print row_class_name.ljust(10)

  classes.each do |col_class_name|
    count = data.count do |hash|
      hash[row_class_name] && hash[col_class_name]
    end
    print count.to_s.center(10)
  end
  puts 
end

这将输出

           English   Algebra   History  
English       2         1         1     
Algebra       1         2         0     
History       1         0         2