使用邻接表实现图形表示时使用哪种数据结构
Which data structure to use when implementing graph representation using adjacency list
我有一个非常大的图,大约有 1,000,000 个节点和许多边。这就是我想知道在实现邻接表时哪种数据结构最合适。这是我跟踪的对象
- 边缘列表
- 节点到节点连接列表
我正在用 python 编码,所以我使用了一组(因为根据 this it has a o(1) average insertion time) for edge list and a dictionary to node to node connection list(by making it completely hashable according to How to make an object properly hashable?)。这是我的代码
class node:
def __init__(self, name = ""):
self.__name = name
def getName(self):
return self.__name
def __str__(self):
return self.__name
def __hash__(self):
return hash(self.__name)
def __lt__(self, other):
if(type(self) != type(other)):
return NotImplemented
return self.__name.__lt__(other.__name)
def __eq__(self, other):
if(type(self)) != type(other):
return NotImplemented
return self.__name == other.__name
class Edge:
def __init__(self, name = "", node1 = None, node2 = None, weight = 0):
self.__name = name
self.__firstNode = node1
self.__secondNode = node2
self.__weight = weight
def getName(self):
return self.__name
def getFirstNode(self):
return self.__firstNode
def getSecondNode(self):
return self.__secondNode
def getWeight(self):
return self.__weight
def __lt__(self, other):
if(type(self) != type(other)):
return NotImplemented
return self.__name.__lt__(other.__name) and self.__firstNode.__lt__(other.__firstNode) and self.__secondNode.__lt__(other.__secondNode) and self.__weight.__lt__(other.__weight)
def __eq__(self, other):
if(type(self) != type(other)):
return NotImplemented
return self.__name == other.__name and self.__firstNode == other.__firstNode and self.__secondNode == other.__secondNode and self.__weight == other.__weight
def __str__(self):
return self.__name + " " + str(self.__firstNode) + " " + str(self.__secondNode) + " " + str(self.__weight)
def __hash__(self):
return hash(hash(self.__name) + hash(self.__firstNode) + hash(self.__secondNode) + hash(self.__weight))
class graph:
def __init__(self):
self.__nodeToNode = {}
self.__edgeList = set()
def addEdge(self, edge):
if(type(edge) != type(Edge())):
return False
self.__edgeList.add(edge)
if(not edge.getFirstNode() in self.__nodeToNode):
self.__nodeToNode[edge.getFirstNode()] = set()
self.__nodeToNode[edge.getFirstNode()].add(edge.getSecondNode())
if(not edge.getSecondNode() in self.__nodeToNode):
self.__nodeToNode[edge.getSecondNode()] = set()
self.__nodeToNode[edge.getSecondNode()].add(edge.getSecondNode())
return True
def getNodes(self):
return dict(self.__nodeToNode)
def getEdges(self):
return set(self.__edgeList)
import string
import random
import time
grp = graph()
nodes = [None] * 20000
for i in range(20000):
st = ''.join(random.SystemRandom().choice(string.ascii_letters) for i in range(10))
node1 = node(st)
nodes[i] = node1
current = time.time()
for i in range(3000000):
rdm = random.randint(0, 199)
rdm2 = random.randint(0, 199)
st = ''.join(random.SystemRandom().choice(string.ascii_letters) for i in range(10))
eg = Edge(st, nodes[rdm], nodes[rdm2])
grp.addEdge(eg)
last = time.time()
print((last - current))
nodes = grp.getNodes()
edges = grp.getEdges()
但是这段代码运行的很慢我可以让它更快吗?如果是,使用什么数据结构?
让我向您介绍一种创建邻接表的方法:
假设您有这样的输入:
4 4
1 2
3 2
4 3
1 4
第一行包含 2 个数字 V
和 E
,接下来的 E
行定义了两个顶点之间的边。
您可以创建一个 .txt 文件并读取输入或通过 sys.stdin.read()
:
直接输入
input = sys.stdin.read()
data = list(map(int, input.split()))
n, m = data[0:2]
data = data[2:]
edges = list(zip(data[0:(2 * m):2], data[1:(2 * m):2]))
x, y = data[2 * m:]
adj = [[] for _ in range(n)]
x, y = x - 1, y - 1
for (a, b) in edges:
adj[a - 1].append(b - 1)
adj[b - 1].append(a - 1)
让我们输出邻接表adj
:
>>> print(adj)
[[1, 3], [0, 2], [1, 3], [2, 0]]
adj[0]
有两个 adj 节点:1 和 3。这意味着节点 1 有两个 adj 节点:2 和 4。
现在,如果你想要一个有向的加权图,你只需要像这样修改输入:
4 4
1 2 3 # edge(1, 2) has the weight of 3
3 2 1
4 3 1
1 4 2
input = sys.stdin.read()
data = list(map(int, input.split()))
n, m = data[0:2]
data = data[2:]
edges = list(zip(zip(data[0:(3 * m):3], data[1:(3 * m):3]), data[2:(3 * m):3]))
data = data[3 * m:]
adj = [[] for _ in range(n)]
cost = [[] for _ in range(n)]
for ((a, b), w) in edges:
adj[a - 1].append(b - 1)
cost[a - 1].append(w)
你将权重存储在cost
中,例如cost[0][1]
= 3,cost[0][3]
= 2。
希望对您有所帮助!
我有一个非常大的图,大约有 1,000,000 个节点和许多边。这就是我想知道在实现邻接表时哪种数据结构最合适。这是我跟踪的对象
- 边缘列表
- 节点到节点连接列表
我正在用 python 编码,所以我使用了一组(因为根据 this it has a o(1) average insertion time) for edge list and a dictionary to node to node connection list(by making it completely hashable according to How to make an object properly hashable?)。这是我的代码
class node:
def __init__(self, name = ""):
self.__name = name
def getName(self):
return self.__name
def __str__(self):
return self.__name
def __hash__(self):
return hash(self.__name)
def __lt__(self, other):
if(type(self) != type(other)):
return NotImplemented
return self.__name.__lt__(other.__name)
def __eq__(self, other):
if(type(self)) != type(other):
return NotImplemented
return self.__name == other.__name
class Edge:
def __init__(self, name = "", node1 = None, node2 = None, weight = 0):
self.__name = name
self.__firstNode = node1
self.__secondNode = node2
self.__weight = weight
def getName(self):
return self.__name
def getFirstNode(self):
return self.__firstNode
def getSecondNode(self):
return self.__secondNode
def getWeight(self):
return self.__weight
def __lt__(self, other):
if(type(self) != type(other)):
return NotImplemented
return self.__name.__lt__(other.__name) and self.__firstNode.__lt__(other.__firstNode) and self.__secondNode.__lt__(other.__secondNode) and self.__weight.__lt__(other.__weight)
def __eq__(self, other):
if(type(self) != type(other)):
return NotImplemented
return self.__name == other.__name and self.__firstNode == other.__firstNode and self.__secondNode == other.__secondNode and self.__weight == other.__weight
def __str__(self):
return self.__name + " " + str(self.__firstNode) + " " + str(self.__secondNode) + " " + str(self.__weight)
def __hash__(self):
return hash(hash(self.__name) + hash(self.__firstNode) + hash(self.__secondNode) + hash(self.__weight))
class graph:
def __init__(self):
self.__nodeToNode = {}
self.__edgeList = set()
def addEdge(self, edge):
if(type(edge) != type(Edge())):
return False
self.__edgeList.add(edge)
if(not edge.getFirstNode() in self.__nodeToNode):
self.__nodeToNode[edge.getFirstNode()] = set()
self.__nodeToNode[edge.getFirstNode()].add(edge.getSecondNode())
if(not edge.getSecondNode() in self.__nodeToNode):
self.__nodeToNode[edge.getSecondNode()] = set()
self.__nodeToNode[edge.getSecondNode()].add(edge.getSecondNode())
return True
def getNodes(self):
return dict(self.__nodeToNode)
def getEdges(self):
return set(self.__edgeList)
import string
import random
import time
grp = graph()
nodes = [None] * 20000
for i in range(20000):
st = ''.join(random.SystemRandom().choice(string.ascii_letters) for i in range(10))
node1 = node(st)
nodes[i] = node1
current = time.time()
for i in range(3000000):
rdm = random.randint(0, 199)
rdm2 = random.randint(0, 199)
st = ''.join(random.SystemRandom().choice(string.ascii_letters) for i in range(10))
eg = Edge(st, nodes[rdm], nodes[rdm2])
grp.addEdge(eg)
last = time.time()
print((last - current))
nodes = grp.getNodes()
edges = grp.getEdges()
但是这段代码运行的很慢我可以让它更快吗?如果是,使用什么数据结构?
让我向您介绍一种创建邻接表的方法:
假设您有这样的输入:
4 4
1 2
3 2
4 3
1 4
第一行包含 2 个数字 V
和 E
,接下来的 E
行定义了两个顶点之间的边。
您可以创建一个 .txt 文件并读取输入或通过 sys.stdin.read()
:
input = sys.stdin.read()
data = list(map(int, input.split()))
n, m = data[0:2]
data = data[2:]
edges = list(zip(data[0:(2 * m):2], data[1:(2 * m):2]))
x, y = data[2 * m:]
adj = [[] for _ in range(n)]
x, y = x - 1, y - 1
for (a, b) in edges:
adj[a - 1].append(b - 1)
adj[b - 1].append(a - 1)
让我们输出邻接表adj
:
>>> print(adj)
[[1, 3], [0, 2], [1, 3], [2, 0]]
adj[0]
有两个 adj 节点:1 和 3。这意味着节点 1 有两个 adj 节点:2 和 4。
现在,如果你想要一个有向的加权图,你只需要像这样修改输入:
4 4
1 2 3 # edge(1, 2) has the weight of 3
3 2 1
4 3 1
1 4 2
input = sys.stdin.read()
data = list(map(int, input.split()))
n, m = data[0:2]
data = data[2:]
edges = list(zip(zip(data[0:(3 * m):3], data[1:(3 * m):3]), data[2:(3 * m):3]))
data = data[3 * m:]
adj = [[] for _ in range(n)]
cost = [[] for _ in range(n)]
for ((a, b), w) in edges:
adj[a - 1].append(b - 1)
cost[a - 1].append(w)
你将权重存储在cost
中,例如cost[0][1]
= 3,cost[0][3]
= 2。
希望对您有所帮助!