Sklearn ValueError: Complex data not supported from K-ways Spectral partitioning function

Sklearn ValueError: Complex data not supported from K-ways Spectral partitioning function

我在研究谱聚类时看到了 Satyaki Sikdar 的一篇关于谱社区检测的论文。 资料来源:https://www3.nd.edu/~kogge/courses/cse60742-Fall2018/Public/StudentWork/KernelPaperFinal/SCD-Sikdar-final.pdf

在那篇论文中,Python 实现了连通图 G 的 k 向谱划分。 所以我试试看。

import networkx as nx
import numpy as np
import scipy.sparse.linalg
from sklearn.cluster import KMeans
import sklearn.preprocessing

def graphinit() -> nx.Graph:
    G = nx.Graph()
    G.add_edges_from([
        (1, 2),(1, 3),(1, 4),(2, 5),(2, 4),(3, 4),(4, 7),(4, 9),(4, 10),(5, 6),(5, 8),(5, 9),(6, 7),(6, 8),(6, 10),(7, 10),
        (8, 10),(8, 12),(9, 10),(9, 13),(11, 12),(11, 13),(11, 14),(11, 15),(11, 18),(12, 13),(12, 15),(12, 16),(12, 18),
        (13, 15),(13, 16),(13, 19),(14, 15),(14, 17),(15, 16),(15, 18),(16, 18),(16, 19),(17, 18),(17, 19),(18, 19),(18, 20)
        ])
    return G


# This function is copy-paste from the paper.
def k_way_spectral(G, k):
    assert nx.is_connected(G), "the graph must be connnected"

    clusters = []
    if G.order() < k:
        clusters = list(G.nodes())
    else:
        L = nx.laplacian_matrix(G)
        # compute the first k + 1 eigenvectors
        _, eigenvecs = scipy.sparse.linalg.eigs(L.asfptype(), k=k+1, which='SM')
        # discard the first trivial eigenvector
        eigenvecs = eigenvecs[:, 1:]
        # normalize each row by its L2 norm
        eigenvecs = sklearn.preprocessing.normalize(eigenvecs)
        # run K-means
        kmeans = KMeans(n_clusters=k).fit(eigenvecs)
        cluster_labels = kmeans.labels_
        clusters = [[] for _ in range(max(cluster_labels) + 1)]
        for node_id, cluster_id in zip(G.nodes(), cluster_labels):
            clusters[cluster_id].append(node_id)
    return clusters

k_way_spectral(graphinit(),2)

我遇到了这个奇怪的错误。

---------------------------------------------------------------------------
ComplexWarning                            Traceback (most recent call last)
c:\users\taextream\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
    597                 else:
--> 598                     array = np.asarray(array, order=order, dtype=dtype)
    599             except ComplexWarning:

c:\users\taextream\anaconda3\lib\site-packages\numpy\core\_asarray.py in asarray(a, dtype, order)
     82     "\""
---> 83     return array(a, dtype, copy=False, order=order)
     84 

ComplexWarning: Casting complex values to real discards the imaginary part

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-4-00f95d3b8e31> in <module>
----> 1 k_way_spectral(graphinit(),2)

<ipython-input-3-b9dd84b7165d> in k_way_spectral(G, k)
     12         eigenvecs = eigenvecs[:, 1:]
     13         # normalize each row by its L2 norm
---> 14         eigenvecs = sklearn.preprocessing.normalize(eigenvecs)
     15         # run K-means
     16         kmeans = KMeans(n_clusters=k).fit(eigenvecs)

c:\users\taextream\anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
     70                           FutureWarning)
     71         kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 72         return f(**kwargs)
     73     return inner_f
     74 

c:\users\taextream\anaconda3\lib\site-packages\sklearn\preprocessing\_data.py in normalize(X, norm, axis, copy, return_norm)
   1709 
   1710     X = check_array(X, accept_sparse=sparse_format, copy=copy,
-> 1711                     estimator='the normalize function', dtype=FLOAT_DTYPES)
   1712     if axis == 0:
   1713         X = X.T

c:\users\taextream\anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
     70                           FutureWarning)
     71         kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 72         return f(**kwargs)
     73     return inner_f
     74 

c:\users\taextream\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
    599             except ComplexWarning:
    600                 raise ValueError("Complex data not supported\n"
--> 601                                  "{}\n".format(array))
    602 
    603         # It is possible that the np.array(..) gave no warning. This happens

ValueError: Complex data not supported
[[-0.30232058+0.j -0.17231718+0.j]
 [-0.26779348+0.j -0.0786565 +0.j]
 [-0.31487312+0.j -0.21823149+0.j]
 [-0.24889819+0.j -0.05462207+0.j]
 [-0.18537559+0.j  0.06648773+0.j]
 [-0.22911675+0.j  0.05705642+0.j]
 [-0.12660584+0.j  0.06675646+0.j]
 [-0.19060563+0.j  0.07010246+0.j]
 [-0.19070607+0.j  0.1009091 +0.j]
 [-0.11016539+0.j  0.1131071 +0.j]
 [ 0.15350034+0.j  0.10633514+0.j]
 [ 0.15003078+0.j  0.12096495+0.j]
 [ 0.20614164+0.j  0.11914376+0.j]
 [ 0.24117332+0.j  0.17252741+0.j]
 [ 0.20554584+0.j  0.1161731 +0.j]
 [ 0.22904745+0.j -0.03467156+0.j]
 [ 0.20211962+0.j  0.1012013 +0.j]
 [ 0.22206629+0.j  0.10004164+0.j]
 [ 0.25168529+0.j  0.11662205+0.j]
 [ 0.30515005+0.j -0.86892982+0.j]]

任何人都知道如何解决此错误或导致错误的原因。

我现在发现我需要使我的特征向量具有真正的 float32 类型。

所以我将特征向量线从 eigenvecs = eigenvecs[:, 1:] 更改为 eigenvecs = eigenvecs[:, 1:].real.astype(np.float32)