Stellargraph 和 Node2Vec 嵌入
Stellargraph and Node2Vec embedding
我正在尝试根据文档 tutorial.
使用 stellargraph 进行 link 预测
当我到达这一部分时:
def node2vec_embedding(graph, name):
rw = BiasedRandomWalk(graph)
walks = rw.run(graph.nodes(), n=num_walks, length=walk_length, p=p, q=q)
print(f"Number of random walks for '{name}': {len(walks)}")
model = Word2Vec(
walks,
size=dimensions,
window=window_size,
min_count=0,
sg=1,
workers=workers,
iter=num_iter,
)
def get_embedding(u):
return model.wv[u]
return get_embedding
embedding_train = node2vec_embedding(graph_train, "Train Graph")
我得到一个 UFuncTypeError:
UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U21'), dtype('<U21')) -> dtype('<U21')
我不知道是什么原因造成的...
我考虑过这样做:
from node2vec import Node2Vec
graph_train = StellarGraph.to_networkx(graph_train)
node2vec = Node2Vec(graph_train)
model = node2vec.fit()
但我担心如果将图形转换为 networkX,我会丢失边缘特征...
任何帮助将不胜感激:)
编辑:
我在另一个数据集上尝试了教程方法,更简单,没有边缘特征,得到了同样的错误。
编辑 2:
为了以防万一,我将添加完整的错误代码:
UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U21'), dtype('<U21')) -> dtype('<U21')
---------------------------------------------------------------------------
UFuncTypeError Traceback (most recent call last)
<ipython-input-188-673a72292ea4> in <module>
----> 1 embedding_train = node2vec_embedding(graph_train, "Train Graph")
<ipython-input-187-aecb7f480f86> in node2vec_embedding(graph, name)
15 sg=1,
16 workers=workers,
---> 17 iter=num_iter,
18 )
19
D:\environnements\stel2\lib\site-packages\gensim\models\word2vec.py in __init__(self, sentences, corpus_file, size, alpha, window, min_count, max_vocab_size, sample, seed, workers, min_alpha, sg, hs, negative, ns_exponent, cbow_mean, hashfxn, iter, null_word, trim_rule, sorted_vocab, batch_words, compute_loss, callbacks, max_final_vocab)
598 sentences=sentences, corpus_file=corpus_file, workers=workers, vector_size=size, epochs=iter,
599 callbacks=callbacks, batch_words=batch_words, trim_rule=trim_rule, sg=sg, alpha=alpha, window=window,
--> 600 seed=seed, hs=hs, negative=negative, cbow_mean=cbow_mean, min_alpha=min_alpha, compute_loss=compute_loss)
601
602 def _do_train_epoch(self, corpus_file, thread_id, offset, cython_vocab, thread_private_mem, cur_epoch,
D:\environnements\stel2\lib\site-packages\gensim\models\base_any2vec.py in __init__(self, sentences, corpus_file, workers, vector_size, epochs, callbacks, batch_words, trim_rule, sg, alpha, window, seed, hs, negative, ns_exponent, cbow_mean, min_alpha, compute_loss, **kwargs)
743 raise TypeError("You can't pass a generator as the sentences argument. Try a sequence.")
744
--> 745 self.build_vocab(sentences=sentences, corpus_file=corpus_file, trim_rule=trim_rule)
746 self.train(
747 sentences=sentences, corpus_file=corpus_file, total_examples=self.corpus_count,
D:\environnements\stel2\lib\site-packages\gensim\models\base_any2vec.py in build_vocab(self, sentences, corpus_file, update, progress_per, keep_raw_vocab, trim_rule, **kwargs)
927 trim_rule=trim_rule, **kwargs)
928 report_values['memory'] = self.estimate_memory(vocab_size=report_values['num_retained_words'])
--> 929 self.trainables.prepare_weights(self.hs, self.negative, self.wv, update=update, vocabulary=self.vocabulary)
930
931 def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=None, trim_rule=None, update=False):
D:\environnements\stel2\lib\site-packages\gensim\models\word2vec.py in prepare_weights(self, hs, negative, wv, update, vocabulary)
1685 # set initial input/projection and hidden weights
1686 if not update:
-> 1687 self.reset_weights(hs, negative, wv)
1688 else:
1689 self.update_weights(hs, negative, wv)
D:\environnements\stel2\lib\site-packages\gensim\models\word2vec.py in reset_weights(self, hs, negative, wv)
1702 for i in range(len(wv.vocab)):
1703 # construct deterministic seed from word AND seed argument
-> 1704 wv.vectors[i] = self.seeded_vector(wv.index2word[i] + str(self.seed), wv.vector_size)
1705 if hs:
1706 self.syn1 = zeros((len(wv.vocab), self.layer1_size), dtype=REAL)
UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U21'), dtype('<U21')) -> dtype('<U21')
```
我终于找到了解决办法。从文档中(至少对我而言)还不清楚,但您的节点标签必须是字符串而不是整数。
所以我的数据框中的一个简单 .astype(str)
修复了它。
我希望这对以后的其他人有所帮助!
我正在尝试根据文档 tutorial.
使用 stellargraph 进行 link 预测
当我到达这一部分时:
def node2vec_embedding(graph, name):
rw = BiasedRandomWalk(graph)
walks = rw.run(graph.nodes(), n=num_walks, length=walk_length, p=p, q=q)
print(f"Number of random walks for '{name}': {len(walks)}")
model = Word2Vec(
walks,
size=dimensions,
window=window_size,
min_count=0,
sg=1,
workers=workers,
iter=num_iter,
)
def get_embedding(u):
return model.wv[u]
return get_embedding
embedding_train = node2vec_embedding(graph_train, "Train Graph")
我得到一个 UFuncTypeError:
UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U21'), dtype('<U21')) -> dtype('<U21')
我不知道是什么原因造成的...
我考虑过这样做:
from node2vec import Node2Vec
graph_train = StellarGraph.to_networkx(graph_train)
node2vec = Node2Vec(graph_train)
model = node2vec.fit()
但我担心如果将图形转换为 networkX,我会丢失边缘特征...
任何帮助将不胜感激:)
编辑: 我在另一个数据集上尝试了教程方法,更简单,没有边缘特征,得到了同样的错误。
编辑 2: 为了以防万一,我将添加完整的错误代码:
UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U21'), dtype('<U21')) -> dtype('<U21')
---------------------------------------------------------------------------
UFuncTypeError Traceback (most recent call last)
<ipython-input-188-673a72292ea4> in <module>
----> 1 embedding_train = node2vec_embedding(graph_train, "Train Graph")
<ipython-input-187-aecb7f480f86> in node2vec_embedding(graph, name)
15 sg=1,
16 workers=workers,
---> 17 iter=num_iter,
18 )
19
D:\environnements\stel2\lib\site-packages\gensim\models\word2vec.py in __init__(self, sentences, corpus_file, size, alpha, window, min_count, max_vocab_size, sample, seed, workers, min_alpha, sg, hs, negative, ns_exponent, cbow_mean, hashfxn, iter, null_word, trim_rule, sorted_vocab, batch_words, compute_loss, callbacks, max_final_vocab)
598 sentences=sentences, corpus_file=corpus_file, workers=workers, vector_size=size, epochs=iter,
599 callbacks=callbacks, batch_words=batch_words, trim_rule=trim_rule, sg=sg, alpha=alpha, window=window,
--> 600 seed=seed, hs=hs, negative=negative, cbow_mean=cbow_mean, min_alpha=min_alpha, compute_loss=compute_loss)
601
602 def _do_train_epoch(self, corpus_file, thread_id, offset, cython_vocab, thread_private_mem, cur_epoch,
D:\environnements\stel2\lib\site-packages\gensim\models\base_any2vec.py in __init__(self, sentences, corpus_file, workers, vector_size, epochs, callbacks, batch_words, trim_rule, sg, alpha, window, seed, hs, negative, ns_exponent, cbow_mean, min_alpha, compute_loss, **kwargs)
743 raise TypeError("You can't pass a generator as the sentences argument. Try a sequence.")
744
--> 745 self.build_vocab(sentences=sentences, corpus_file=corpus_file, trim_rule=trim_rule)
746 self.train(
747 sentences=sentences, corpus_file=corpus_file, total_examples=self.corpus_count,
D:\environnements\stel2\lib\site-packages\gensim\models\base_any2vec.py in build_vocab(self, sentences, corpus_file, update, progress_per, keep_raw_vocab, trim_rule, **kwargs)
927 trim_rule=trim_rule, **kwargs)
928 report_values['memory'] = self.estimate_memory(vocab_size=report_values['num_retained_words'])
--> 929 self.trainables.prepare_weights(self.hs, self.negative, self.wv, update=update, vocabulary=self.vocabulary)
930
931 def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=None, trim_rule=None, update=False):
D:\environnements\stel2\lib\site-packages\gensim\models\word2vec.py in prepare_weights(self, hs, negative, wv, update, vocabulary)
1685 # set initial input/projection and hidden weights
1686 if not update:
-> 1687 self.reset_weights(hs, negative, wv)
1688 else:
1689 self.update_weights(hs, negative, wv)
D:\environnements\stel2\lib\site-packages\gensim\models\word2vec.py in reset_weights(self, hs, negative, wv)
1702 for i in range(len(wv.vocab)):
1703 # construct deterministic seed from word AND seed argument
-> 1704 wv.vectors[i] = self.seeded_vector(wv.index2word[i] + str(self.seed), wv.vector_size)
1705 if hs:
1706 self.syn1 = zeros((len(wv.vocab), self.layer1_size), dtype=REAL)
UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U21'), dtype('<U21')) -> dtype('<U21')
```
我终于找到了解决办法。从文档中(至少对我而言)还不清楚,但您的节点标签必须是字符串而不是整数。
所以我的数据框中的一个简单 .astype(str)
修复了它。
我希望这对以后的其他人有所帮助!