如何在 python 中获得 dbscan 创建的聚类的置信度
how to get the confidence of clustering created by dbscan in python
我在python中使用了sklearn.dbscan
,结果只给出了每个簇的标签,但我还想计算聚类的置信度,或者只是簇之间的平均距离.
你们有什么想法吗?
我认为 Scikit 不支持此功能。群集置信度不是问题,因为 DBSCAN 不使用群集概率。但是,计算簇距离相对简单。
import numpy as np
from sklearn.datasets import load_iris
from sklearn.cluster import dbscan
# Get data & labels
data = load_iris()['data']
labels = dbscan(data)[1]
import numpy as np
from sklearn.datasets import load_iris
from sklearn.cluster import dbscan
# Get data & labels
data = load_iris()['data']
labels = dbscan(data)[1]
# Initialize results
cluster_means = np.zeros((len(set(labels)) - 1, data.shape[1]))
cluster_distances = np.zeros((len(data), len(set(labels)) - 1))
# Loop through clusters
for i, cluster in enumerate(set(labels)):
# Skip noise
if cluster == -1:
continue
# Get cluster mean
cluster_mean = np.mean(data[labels == cluster], axis=0)
# Set cluster mean
cluster_means[i, :] = cluster_mean
# Set cluster distances
cluster_distances[:, i] = np.linalg.norm(data - cluster_mean, axis=1)
我在python中使用了sklearn.dbscan
,结果只给出了每个簇的标签,但我还想计算聚类的置信度,或者只是簇之间的平均距离.
你们有什么想法吗?
我认为 Scikit 不支持此功能。群集置信度不是问题,因为 DBSCAN 不使用群集概率。但是,计算簇距离相对简单。
import numpy as np
from sklearn.datasets import load_iris
from sklearn.cluster import dbscan
# Get data & labels
data = load_iris()['data']
labels = dbscan(data)[1]
import numpy as np
from sklearn.datasets import load_iris
from sklearn.cluster import dbscan
# Get data & labels
data = load_iris()['data']
labels = dbscan(data)[1]
# Initialize results
cluster_means = np.zeros((len(set(labels)) - 1, data.shape[1]))
cluster_distances = np.zeros((len(data), len(set(labels)) - 1))
# Loop through clusters
for i, cluster in enumerate(set(labels)):
# Skip noise
if cluster == -1:
continue
# Get cluster mean
cluster_mean = np.mean(data[labels == cluster], axis=0)
# Set cluster mean
cluster_means[i, :] = cluster_mean
# Set cluster distances
cluster_distances[:, i] = np.linalg.norm(data - cluster_mean, axis=1)