按照添加边的顺序获取networkx中的连接组件
Getting the connected components in networkx in the order of which edges are added
doc2 中的每个句子都显示为图形。现在,边从各自的 subject_list、object_list 和 verb_list 以 s-o-v 的形式添加。
我试图显示连接的组件。但是它显示句子的顺序不是边添加的顺序。
# This Python file uses the following encoding: utf-8
%matplotlib notebook
import codecs
import itertools
import re
import networkx as nx
import matplotlib.pyplot as pl
from matplotlib.font_manager import FontProperties
prop = FontProperties()
graph = nx.Graph()
labels = {}
each_one = []
list_of_sentences = []
subject_list = []
object_list = []
verb_list = []
newDict = {}
with codecs.open('doc2.txt', encoding='utf-8') as f:
text = f.read()
sentences = re.split(r' *[\.\?!][\'"\)\]]* *', text)
for stuff in sentences:
list_of_sentences.append(stuff)
new_list_of_sentences = []
for d in list_of_sentences:
s = d.replace(u'वतीन', '').replace(u'आनी', '').replace(u'हिणें', '').replace(',', '')
new_list_of_sentences.append(s)
f = open('doc2_tag.txt', 'r')
for line in f:
k, v = line.strip().split('/')
newDict[k.strip().decode('utf-8')] = v.strip()
f.close()
for sentence in new_list_of_sentences:
a = b = c = ""
sentence_word_list = sentence.split()
for word in sentence_word_list:
if newDict[word] == 'N-NNP':
a += word + " "
if newDict[word] == 'N-NN':
b += word + " "
if newDict[word] == 'JJ':
b += word + " "
if newDict[word] == 'QT-QTC':
b += word + " "
if newDict[word] == 'RB':
b += word + " "
if newDict[word] == 'N-NST':
b += word + " "
if newDict[word] == 'PR-PRP':
b += word + " "
if newDict[word] == 'PSP':
b += word + " "
if newDict[word] == 'CC-CCD':
b += word + " "
if newDict[word] == 'V-VM-VF':
c += word + " "
subject_list.append(a)
object_list.append(b)
verb_list.append(c)
konkani_dict = {u'सनरायझर्साक': u'सनरायझर्स', u'सनरायझर्सान': u'सनरायझर्स', u'सनरायझर्साच्या': u'सनरायझर्स'}
for idx, sub in enumerate(subject_list):
temp_list = sub.split(" ")
for i in temp_list:
if i in konkani_dict:
new_sub = sub.replace(i, konkani_dict[i])
subject_list[idx] = new_sub
for s in subject_list:
if s is not "":
graph.add_node(s)
labels[s] = s
for o in object_list:
if o is not "":
graph.add_node(o)
labels[b] = b
for v in verb_list:
if v is not "":
graph.add_node(v)
labels[v] = v
for (s, o, v) in zip(subject_list, object_list, verb_list):
if s and o is not "":
graph.add_edge(s, o)
if o and v is not "":
graph.add_edge(o, v)
pos=nx.spring_layout(graph,k=0.15,iterations=20)
nx.draw(graph, with_labels = True, font_family = "Nirmala UI", node_size = 40, font_size = 9 ,node_color = "darkblue")
pl.show()
sentences=[]
for component in nx.connected_components(graph):
g=(
filter(
lambda x: x[0] in component and x[1] in component,
graph.edges
)
)
p=[]
p= ''.join(item for tuple_ in g for item in tuple_)
print p
sentences.append(p)
print sentences
output=[]
for i in sentences:
inputWords = i.split(" ")
inputWords=inputWords[-1::-1]
output = ' '.join(inputWords)
print output
预期输出是这样的:
शिखर धवनान सगळ्यांत चड ४५ धांवड्यो केल्यो ,
सनरायझर्स दीपर हुडा जैतांत पर्जळ्ळो
这是我得到的输出:sentences displayed
networkx
不存储创建 nodes/edges 的顺序,因为此信息几乎没有用。如果你想拥有这些信息,你应该手动添加它。在您的程序中,例如(对于边):
edge_index = 0
for (s, o, v) in zip(subject_list, object_list, verb_list):
if s and o is not "":
graph.add_edge(s, o, index=edge_index)
edge_index += 1
if o and v is not "":
graph.add_edge(o, v, index=edge_index)
edge_index += 1
然后你应该打印排序的边:
sorted( # Sorted list of edges
list(g.edges.data('index')), # With 'index' data
key=lambda x: x[2] # Sorted by 'index' data
)```
doc2 中的每个句子都显示为图形。现在,边从各自的 subject_list、object_list 和 verb_list 以 s-o-v 的形式添加。 我试图显示连接的组件。但是它显示句子的顺序不是边添加的顺序。
# This Python file uses the following encoding: utf-8
%matplotlib notebook
import codecs
import itertools
import re
import networkx as nx
import matplotlib.pyplot as pl
from matplotlib.font_manager import FontProperties
prop = FontProperties()
graph = nx.Graph()
labels = {}
each_one = []
list_of_sentences = []
subject_list = []
object_list = []
verb_list = []
newDict = {}
with codecs.open('doc2.txt', encoding='utf-8') as f:
text = f.read()
sentences = re.split(r' *[\.\?!][\'"\)\]]* *', text)
for stuff in sentences:
list_of_sentences.append(stuff)
new_list_of_sentences = []
for d in list_of_sentences:
s = d.replace(u'वतीन', '').replace(u'आनी', '').replace(u'हिणें', '').replace(',', '')
new_list_of_sentences.append(s)
f = open('doc2_tag.txt', 'r')
for line in f:
k, v = line.strip().split('/')
newDict[k.strip().decode('utf-8')] = v.strip()
f.close()
for sentence in new_list_of_sentences:
a = b = c = ""
sentence_word_list = sentence.split()
for word in sentence_word_list:
if newDict[word] == 'N-NNP':
a += word + " "
if newDict[word] == 'N-NN':
b += word + " "
if newDict[word] == 'JJ':
b += word + " "
if newDict[word] == 'QT-QTC':
b += word + " "
if newDict[word] == 'RB':
b += word + " "
if newDict[word] == 'N-NST':
b += word + " "
if newDict[word] == 'PR-PRP':
b += word + " "
if newDict[word] == 'PSP':
b += word + " "
if newDict[word] == 'CC-CCD':
b += word + " "
if newDict[word] == 'V-VM-VF':
c += word + " "
subject_list.append(a)
object_list.append(b)
verb_list.append(c)
konkani_dict = {u'सनरायझर्साक': u'सनरायझर्स', u'सनरायझर्सान': u'सनरायझर्स', u'सनरायझर्साच्या': u'सनरायझर्स'}
for idx, sub in enumerate(subject_list):
temp_list = sub.split(" ")
for i in temp_list:
if i in konkani_dict:
new_sub = sub.replace(i, konkani_dict[i])
subject_list[idx] = new_sub
for s in subject_list:
if s is not "":
graph.add_node(s)
labels[s] = s
for o in object_list:
if o is not "":
graph.add_node(o)
labels[b] = b
for v in verb_list:
if v is not "":
graph.add_node(v)
labels[v] = v
for (s, o, v) in zip(subject_list, object_list, verb_list):
if s and o is not "":
graph.add_edge(s, o)
if o and v is not "":
graph.add_edge(o, v)
pos=nx.spring_layout(graph,k=0.15,iterations=20)
nx.draw(graph, with_labels = True, font_family = "Nirmala UI", node_size = 40, font_size = 9 ,node_color = "darkblue")
pl.show()
sentences=[]
for component in nx.connected_components(graph):
g=(
filter(
lambda x: x[0] in component and x[1] in component,
graph.edges
)
)
p=[]
p= ''.join(item for tuple_ in g for item in tuple_)
print p
sentences.append(p)
print sentences
output=[]
for i in sentences:
inputWords = i.split(" ")
inputWords=inputWords[-1::-1]
output = ' '.join(inputWords)
print output
预期输出是这样的:
शिखर धवनान सगळ्यांत चड ४५ धांवड्यो केल्यो ,
सनरायझर्स दीपर हुडा जैतांत पर्जळ्ळो
这是我得到的输出:sentences displayed
networkx
不存储创建 nodes/edges 的顺序,因为此信息几乎没有用。如果你想拥有这些信息,你应该手动添加它。在您的程序中,例如(对于边):
edge_index = 0
for (s, o, v) in zip(subject_list, object_list, verb_list):
if s and o is not "":
graph.add_edge(s, o, index=edge_index)
edge_index += 1
if o and v is not "":
graph.add_edge(o, v, index=edge_index)
edge_index += 1
然后你应该打印排序的边:
sorted( # Sorted list of edges
list(g.edges.data('index')), # With 'index' data
key=lambda x: x[2] # Sorted by 'index' data
)```