在汇源图中寻找最小加权匹配
Finding minimum weighted matching in sink source graph
我有三个节点列表。源、汇和管道。
有一个从源到管道再到汇的有向加权图。
源仅连接到管道,管道仅连接到汇。但是源并不直接连接到汇。管道是零和的,这意味着从源到每个管道的权重总和等于从该管道到汇的边的总和。
我想将最小数量的边添加到此图中从汇点回到源点,以便汇点和源点也成为零和。我知道这个问题是 np-complete 我很想看看这个问题是否有任何好的多项式近似值可以在现实生活中使用。
简单来说:
我有一个接收器和源列表。每个汇点都有一个负数,每个源点都有一个正数,因此图中节点中所有数字的总和为零(到目前为止没有边)。我想向该图中添加最少数量的边,以便 out/in 到每个节点的边的权重总和等于该节点上的数量。
这是一个示例代码,用于测试一个图是否总结了另一个图:
from functools import reduce
from collections import Counter
source_edges = {
"a0": {"p0": 1, "p2": 5},
"a1": {"p0": 2},
"a2": {"p1": 3}
}
sink_edges = {
"b0": {"p0": 1},
"b1": {"p0": 1, "p1": 1},
"b2": {"p0": 1, "p1": 2, "p2": 5},
}
res = {
"a0": {"b0": 1, "b2": 5},
"a1": {"b1": 2},
"a2": {"b2": 3}
}
sink_degs1 = {k: sum(v.values()) for k, v in sink_edges.items()}
sink_degs2 = dict(reduce(lambda x, y: x + y, (Counter(v) for v in res.values())))
source_degs1 ={k: sum(v.values()) for k, v in res.items()}
source_degs2 ={k: sum(v.values()) for k, v in source_edges.items()}
if sink_degs1 == sink_degs2 and source_degs1 == source_degs2:
print('res summerizes the graph')
else:
print('res does not summerize this graph')
以及此图的可视化:
这给出了少于 n-1 条边的次优解。
from numpy.random import randint
from collections import defaultdict
import copy
def create_sample(source_count=5000, sink_count=200):
diff = -1
while diff < 0:
sinks = [["b" + str(i), randint(source_count)] for i in range(sink_count)]
sources = [["a" + str(i), randint(sink_count)] for i in range(source_count)]
sink_sum = sum([x[1] for x in sinks])
source_sum = sum([x[1] for x in sources])
diff = sink_sum - source_sum
avg_refill = diff // source_count + 1
weights_match = False
while not weights_match:
for i in range(source_count):
if not diff:
break
rnd = randint(avg_refill * 2.5) if diff > 10 * (avg_refill) else diff
diff -= rnd
sources[i][1] += rnd
weights_match = sum([x[1] for x in sources]) == sum([x[1] for x in sinks])
return sources, sinks
def solve(sources, sinks):
src = sorted(copy.deepcopy(sources), key=lambda x: x[1])
snk = sorted(copy.deepcopy(sinks), key=lambda x: x[1])
res = []
while snk:
if src[0][1] > snk[0][1]:
edge = (src[0][0], *snk[0])
src[0][1] -= snk[0][1]
del snk[0]
elif src[0][1] < snk[0][1]:
edge = (src[0][0], snk[0][0], src[0][1])
snk[0][1] -= src[0][1]
del src[0]
else:
edge = (src[0][0], *snk[0])
del src[0], snk[0]
res += [edge]
return res
def test(sources, sinks):
res = solve(sources, sinks)
d_sources = defaultdict(int)
d_sinks = defaultdict(int)
w_sources = defaultdict(int)
w_sinks = defaultdict(int)
for a, b, c in res:
d_sources[a] += 1
d_sinks[b] += 1
w_sources[a] += c
w_sinks[b] += c
print("source " + ("is" if dict(sources) == w_sources else "isn't") + " source")
print("sink " + ("is" if dict(sinks) == w_sinks else "isn't") + " sink")
print(
f"source:\n \tdeg_sum = {sum(d_sources.values())}\n\tmax_deg = {max(d_sources.values())}"
)
print(
f"sink:\n \tdeg_sum = {sum(d_sinks.values())}\n\tmax_deg = {max(d_sinks.values())}"
)
这是一个示例 运行:
In [1]: %run solver.py
In [2]: test(*create_sample())
source is source
sink is sink
source:
deg_sum = 5196
max_deg = 3
sink:
deg_sum = 5196
max_deg = 56
下面是其工作原理的说明:
sources: 4,5,3,2
sinks: 2,7,2,2,1
sorted:
55555|44|44|33|32|2
77777|77|22|22|22|1
So we have 6 edges.
这是使用此算法排序和未排序解决方案之间的比较:
---------------------------------------------
| (1000,1000) |
---------------------------------------------
| criteria | sorted | random order |
| source degree sum | 1991 | 1999 |
| source max degree | 3 | 7 |
| sink degreee sum | 1991 | 1999 |
| sink max degree | 3 | 8 |
---------------------------------------------
---------------------------------------------
| (200,5000) |
---------------------------------------------
| criteria | sorted | random order |
| source degree sum | 5198 | 5198 |
| source max degree | 2 | 3 |
| sink degreee sum | 5198 | 5198 |
| sink max degree | 43 | 54 |
---------------------------------------------
我有三个节点列表。源、汇和管道。 有一个从源到管道再到汇的有向加权图。 源仅连接到管道,管道仅连接到汇。但是源并不直接连接到汇。管道是零和的,这意味着从源到每个管道的权重总和等于从该管道到汇的边的总和。
我想将最小数量的边添加到此图中从汇点回到源点,以便汇点和源点也成为零和。我知道这个问题是 np-complete 我很想看看这个问题是否有任何好的多项式近似值可以在现实生活中使用。
简单来说: 我有一个接收器和源列表。每个汇点都有一个负数,每个源点都有一个正数,因此图中节点中所有数字的总和为零(到目前为止没有边)。我想向该图中添加最少数量的边,以便 out/in 到每个节点的边的权重总和等于该节点上的数量。
这是一个示例代码,用于测试一个图是否总结了另一个图:
from functools import reduce
from collections import Counter
source_edges = {
"a0": {"p0": 1, "p2": 5},
"a1": {"p0": 2},
"a2": {"p1": 3}
}
sink_edges = {
"b0": {"p0": 1},
"b1": {"p0": 1, "p1": 1},
"b2": {"p0": 1, "p1": 2, "p2": 5},
}
res = {
"a0": {"b0": 1, "b2": 5},
"a1": {"b1": 2},
"a2": {"b2": 3}
}
sink_degs1 = {k: sum(v.values()) for k, v in sink_edges.items()}
sink_degs2 = dict(reduce(lambda x, y: x + y, (Counter(v) for v in res.values())))
source_degs1 ={k: sum(v.values()) for k, v in res.items()}
source_degs2 ={k: sum(v.values()) for k, v in source_edges.items()}
if sink_degs1 == sink_degs2 and source_degs1 == source_degs2:
print('res summerizes the graph')
else:
print('res does not summerize this graph')
以及此图的可视化:
这给出了少于 n-1 条边的次优解。
from numpy.random import randint
from collections import defaultdict
import copy
def create_sample(source_count=5000, sink_count=200):
diff = -1
while diff < 0:
sinks = [["b" + str(i), randint(source_count)] for i in range(sink_count)]
sources = [["a" + str(i), randint(sink_count)] for i in range(source_count)]
sink_sum = sum([x[1] for x in sinks])
source_sum = sum([x[1] for x in sources])
diff = sink_sum - source_sum
avg_refill = diff // source_count + 1
weights_match = False
while not weights_match:
for i in range(source_count):
if not diff:
break
rnd = randint(avg_refill * 2.5) if diff > 10 * (avg_refill) else diff
diff -= rnd
sources[i][1] += rnd
weights_match = sum([x[1] for x in sources]) == sum([x[1] for x in sinks])
return sources, sinks
def solve(sources, sinks):
src = sorted(copy.deepcopy(sources), key=lambda x: x[1])
snk = sorted(copy.deepcopy(sinks), key=lambda x: x[1])
res = []
while snk:
if src[0][1] > snk[0][1]:
edge = (src[0][0], *snk[0])
src[0][1] -= snk[0][1]
del snk[0]
elif src[0][1] < snk[0][1]:
edge = (src[0][0], snk[0][0], src[0][1])
snk[0][1] -= src[0][1]
del src[0]
else:
edge = (src[0][0], *snk[0])
del src[0], snk[0]
res += [edge]
return res
def test(sources, sinks):
res = solve(sources, sinks)
d_sources = defaultdict(int)
d_sinks = defaultdict(int)
w_sources = defaultdict(int)
w_sinks = defaultdict(int)
for a, b, c in res:
d_sources[a] += 1
d_sinks[b] += 1
w_sources[a] += c
w_sinks[b] += c
print("source " + ("is" if dict(sources) == w_sources else "isn't") + " source")
print("sink " + ("is" if dict(sinks) == w_sinks else "isn't") + " sink")
print(
f"source:\n \tdeg_sum = {sum(d_sources.values())}\n\tmax_deg = {max(d_sources.values())}"
)
print(
f"sink:\n \tdeg_sum = {sum(d_sinks.values())}\n\tmax_deg = {max(d_sinks.values())}"
)
这是一个示例 运行:
In [1]: %run solver.py
In [2]: test(*create_sample())
source is source
sink is sink
source:
deg_sum = 5196
max_deg = 3
sink:
deg_sum = 5196
max_deg = 56
下面是其工作原理的说明:
sources: 4,5,3,2
sinks: 2,7,2,2,1
sorted:
55555|44|44|33|32|2
77777|77|22|22|22|1
So we have 6 edges.
这是使用此算法排序和未排序解决方案之间的比较:
---------------------------------------------
| (1000,1000) |
---------------------------------------------
| criteria | sorted | random order |
| source degree sum | 1991 | 1999 |
| source max degree | 3 | 7 |
| sink degreee sum | 1991 | 1999 |
| sink max degree | 3 | 8 |
---------------------------------------------
---------------------------------------------
| (200,5000) |
---------------------------------------------
| criteria | sorted | random order |
| source degree sum | 5198 | 5198 |
| source max degree | 2 | 3 |
| sink degreee sum | 5198 | 5198 |
| sink max degree | 43 | 54 |
---------------------------------------------