Stellargraph 和 Node2Vec 嵌入

Stellargraph and Node2Vec embedding

我正在尝试根据文档 tutorial.
使用 stellargraph 进行 link 预测 当我到达这一部分时:

def node2vec_embedding(graph, name):
    rw = BiasedRandomWalk(graph)
    walks = rw.run(graph.nodes(), n=num_walks, length=walk_length, p=p, q=q)
    print(f"Number of random walks for '{name}': {len(walks)}")

    model = Word2Vec(
        walks,
        size=dimensions,
        window=window_size,
        min_count=0,
        sg=1,
        workers=workers,
        iter=num_iter,
    )

    def get_embedding(u):
        return model.wv[u]

    return get_embedding

embedding_train = node2vec_embedding(graph_train, "Train Graph")

我得到一个 UFuncTypeError:

UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U21'), dtype('<U21')) -> dtype('<U21')

我不知道是什么原因造成的...

我考虑过这样做:

from node2vec import Node2Vec
graph_train = StellarGraph.to_networkx(graph_train)
node2vec = Node2Vec(graph_train)
model = node2vec.fit()

但我担心如果将图形转换为 networkX,我会丢失边缘特征...

任何帮助将不胜感激:)

编辑: 我在另一个数据集上尝试了教程方法,更简单,没有边缘特征,得到了同样的错误。

编辑 2: 为了以防万一,我将添加完整的错误代码:

UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U21'), dtype('<U21')) -> dtype('<U21')
---------------------------------------------------------------------------
UFuncTypeError                            Traceback (most recent call last)
<ipython-input-188-673a72292ea4> in <module>
----> 1 embedding_train = node2vec_embedding(graph_train, "Train Graph")

<ipython-input-187-aecb7f480f86> in node2vec_embedding(graph, name)
     15         sg=1,
     16         workers=workers,
---> 17         iter=num_iter,
     18     )
     19 

D:\environnements\stel2\lib\site-packages\gensim\models\word2vec.py in __init__(self, sentences, corpus_file, size, alpha, window, min_count, max_vocab_size, sample, seed, workers, min_alpha, sg, hs, negative, ns_exponent, cbow_mean, hashfxn, iter, null_word, trim_rule, sorted_vocab, batch_words, compute_loss, callbacks, max_final_vocab)
    598             sentences=sentences, corpus_file=corpus_file, workers=workers, vector_size=size, epochs=iter,
    599             callbacks=callbacks, batch_words=batch_words, trim_rule=trim_rule, sg=sg, alpha=alpha, window=window,
--> 600             seed=seed, hs=hs, negative=negative, cbow_mean=cbow_mean, min_alpha=min_alpha, compute_loss=compute_loss)
    601 
    602     def _do_train_epoch(self, corpus_file, thread_id, offset, cython_vocab, thread_private_mem, cur_epoch,

D:\environnements\stel2\lib\site-packages\gensim\models\base_any2vec.py in __init__(self, sentences, corpus_file, workers, vector_size, epochs, callbacks, batch_words, trim_rule, sg, alpha, window, seed, hs, negative, ns_exponent, cbow_mean, min_alpha, compute_loss, **kwargs)
    743                 raise TypeError("You can't pass a generator as the sentences argument. Try a sequence.")
    744 
--> 745             self.build_vocab(sentences=sentences, corpus_file=corpus_file, trim_rule=trim_rule)
    746             self.train(
    747                 sentences=sentences, corpus_file=corpus_file, total_examples=self.corpus_count,

D:\environnements\stel2\lib\site-packages\gensim\models\base_any2vec.py in build_vocab(self, sentences, corpus_file, update, progress_per, keep_raw_vocab, trim_rule, **kwargs)
    927             trim_rule=trim_rule, **kwargs)
    928         report_values['memory'] = self.estimate_memory(vocab_size=report_values['num_retained_words'])
--> 929         self.trainables.prepare_weights(self.hs, self.negative, self.wv, update=update, vocabulary=self.vocabulary)
    930 
    931     def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=None, trim_rule=None, update=False):

D:\environnements\stel2\lib\site-packages\gensim\models\word2vec.py in prepare_weights(self, hs, negative, wv, update, vocabulary)
   1685         # set initial input/projection and hidden weights
   1686         if not update:
-> 1687             self.reset_weights(hs, negative, wv)
   1688         else:
   1689             self.update_weights(hs, negative, wv)

D:\environnements\stel2\lib\site-packages\gensim\models\word2vec.py in reset_weights(self, hs, negative, wv)
   1702         for i in range(len(wv.vocab)):
   1703             # construct deterministic seed from word AND seed argument
-> 1704             wv.vectors[i] = self.seeded_vector(wv.index2word[i] + str(self.seed), wv.vector_size)
   1705         if hs:
   1706             self.syn1 = zeros((len(wv.vocab), self.layer1_size), dtype=REAL)

UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U21'), dtype('<U21')) -> dtype('<U21')
```

我终于找到了解决办法。从文档中(至少对我而言)还不清楚,但您的节点标签必须是字符串而不是整数。 所以我的数据框中的一个简单 .astype(str) 修复了它。 我希望这对以后的其他人有所帮助!