在 Python 和 Bokeh 上聚类; select 允许用户更改聚类算法的小部件
Clustering on Python and Bokeh; select widget which allows user to change clustering algorithm
我正在尝试在 Bokeh 仪表板中构建一个功能,该功能允许用户对数据进行聚类。我使用以下示例作为模板,这里是 link:-
Clustering in Bokeh example
下面是这个例子的代码:-
import numpy as np
from sklearn import cluster, datasets
from sklearn.preprocessing import StandardScaler
from bokeh.layouts import column, row
from bokeh.plotting import figure, output_file, show
print("\n\n*** This example may take several seconds to run before displaying. ***\n\n")
N = 50000
PLOT_SIZE = 400
# generate datasets.
np.random.seed(0)
noisy_circles = datasets.make_circles(n_samples=N, factor=.5, noise=.04)
noisy_moons = datasets.make_moons(n_samples=N, noise=.05)
centers = [(-2, 3), (2, 3), (-2, -3), (2, -3)]
blobs1 = datasets.make_blobs(centers=centers, n_samples=N, cluster_std=0.4, random_state=8)
blobs2 = datasets.make_blobs(centers=centers, n_samples=N, cluster_std=0.7, random_state=8)
colors = np.array([x for x in ('#00f', '#0f0', '#f00', '#0ff', '#f0f', '#ff0')])
colors = np.hstack([colors] * 20)
# create clustering algorithms
dbscan = cluster.DBSCAN(eps=.2)
birch = cluster.Birch(n_clusters=2)
means = cluster.MiniBatchKMeans(n_clusters=2)
spectral = cluster.SpectralClustering(n_clusters=2, eigen_solver='arpack', affinity="nearest_neighbors")
affinity = cluster.AffinityPropagation(damping=.9, preference=-200)
# change here, to select clustering algorithm (note: spectral is slow)
algorithm = dbscan # <- SELECT ALG
plots =[]
for dataset in (noisy_circles, noisy_moons, blobs1, blobs2):
X, y = dataset
X = StandardScaler().fit_transform(X)
# predict cluster memberships
algorithm.fit(X)
if hasattr(algorithm, 'labels_'):
y_pred = algorithm.labels_.astype(int)
else:
y_pred = algorithm.predict(X)
p = figure(output_backend="webgl", title=algorithm.__class__.__name__,
width=PLOT_SIZE, height=PLOT_SIZE)
p.circle(X[:, 0], X[:, 1], color=colors[y_pred].tolist(), alpha=0.1,)
plots.append(p)
# generate layout for the plots
layout = column(row(plots[:2]), row(plots[2:]))
output_file("clustering.html", title="clustering with sklearn")
show(layout)
该示例允许用户对数据进行聚类。在代码中,您可以指定要使用的算法;在上面粘贴的代码中,算法是 dbscan。我试图修改代码,以便我可以添加一个小部件,允许用户指定要使用的算法:-
from bokeh.models.annotations import Label
import numpy as np
from sklearn import cluster, datasets
from sklearn.preprocessing import StandardScaler
from bokeh.layouts import column, row
from bokeh.plotting import figure, output_file, show
from bokeh.models import CustomJS, Select
print("\n\n*** This example may take several seconds to run before displaying. ***\n\n")
N = 50000
PLOT_SIZE = 400
# generate datasets.
np.random.seed(0)
noisy_circles = datasets.make_circles(n_samples=N, factor=.5, noise=.04)
noisy_moons = datasets.make_moons(n_samples=N, noise=.05)
centers = [(-2, 3), (2, 3), (-2, -3), (2, -3)]
blobs1 = datasets.make_blobs(centers=centers, n_samples=N, cluster_std=0.4, random_state=8)
blobs2 = datasets.make_blobs(centers=centers, n_samples=N, cluster_std=0.7, random_state=8)
colors = np.array([x for x in ('#00f', '#0f0', '#f00', '#0ff', '#f0f', '#ff0')])
colors = np.hstack([colors] * 20)
# create clustering algorithms
dbscan = cluster.DBSCAN(eps=.2)
birch = cluster.Birch(n_clusters=2)
means = cluster.MiniBatchKMeans(n_clusters=2)
spectral = cluster.SpectralClustering(n_clusters=2, eigen_solver='arpack', affinity="nearest_neighbors")
affinity = cluster.AffinityPropagation(damping=.9, preference=-200)
kmeans = cluster.KMeans(n_clusters=2)
############################select widget for different clustering algorithms############
menu =[('DBSCAN','dbscan'),('Birch','birch'),('MiniBatchKmeans','means'),('Spectral','spectral'),('Affinity','affinity'),('K-means','kmeans')]
select = Select(title="Option:", value="DBSCAN", options=menu)
select.js_on_change("value", CustomJS(code="""
console.log('select: value=' + this.value, this.toString())
"""))
# change here, to select clustering algorithm (note: spectral is slow)
algorithm = select.value
############################################################
plots =[]
for dataset in (noisy_circles, noisy_moons, blobs1, blobs2):
X, y = dataset
X = StandardScaler().fit_transform(X)
# predict cluster memberships
algorithm.fit(X)
if hasattr(algorithm, 'labels_'):
y_pred = algorithm.labels_.astype(int)
else:
y_pred = algorithm.predict(X)
p = figure(output_backend="webgl", title=algorithm.__class__.__name__,
width=PLOT_SIZE, height=PLOT_SIZE)
p.circle(X[:, 0], X[:, 1], color=colors[y_pred].tolist(), alpha=0.1,)
plots.append(p)
# generate layout for the plots
layout = column(select,row(plots[:2]), row(plots[2:]))
output_file("clustering.html", title="clustering with sklearn")
show(layout)
但是,当我尝试 运行 时出现此错误:-
AttributeError: 'str' object has no attribute 'fit'
谁能告诉我我缺少什么来解决这个问题?
另外,如果不太难的话,我想添加一个数字输入小部件,它允许用户 select 每个算法要查找的簇数。建议?
非常感谢:)
编辑
这是@Tony 解决方案的代码的当前状态。
''' Example inspired by an example from the scikit-learn project:
http://scikit-learn.org/stable/auto_examples/cluster/plot_cluster_comparison.html
'''
#https://github.com/bokeh/bokeh/blob/branch-2.4/examples/webgl/clustering.py
from bokeh.models.annotations import Label
import numpy as np
from sklearn import cluster, datasets
from sklearn.preprocessing import StandardScaler
from bokeh.layouts import column, row
from bokeh.plotting import figure, output_file, show
from bokeh.models import CustomJS, Select
print("\n\n*** This example may take several seconds to run before displaying. ***\n\n")
N = 50000
PLOT_SIZE = 400
# generate datasets.
np.random.seed(0)
noisy_circles = datasets.make_circles(n_samples=N, factor=.5, noise=.04)
noisy_moons = datasets.make_moons(n_samples=N, noise=.05)
centers = [(-2, 3), (2, 3), (-2, -3), (2, -3)]
blobs1 = datasets.make_blobs(centers=centers, n_samples=N, cluster_std=0.4, random_state=8)
blobs2 = datasets.make_blobs(centers=centers, n_samples=N, cluster_std=0.7, random_state=8)
colors = np.array([x for x in ('#00f', '#0f0', '#f00', '#0ff', '#f0f', '#ff0')])
colors = np.hstack([colors] * 20)
# create clustering algorithms
dbscan = cluster.DBSCAN(eps=.2)
birch = cluster.Birch(n_clusters=2)
means = cluster.MiniBatchKMeans(n_clusters=2)
spectral = cluster.SpectralClustering(n_clusters=2, eigen_solver='arpack', affinity="nearest_neighbors")
affinity = cluster.AffinityPropagation(damping=.9, preference=-200)
kmeans = cluster.KMeans(n_clusters=2)
menu =[('DBSCAN','dbscan'),('Birch','birch'),('MiniBatchKmeans','means'),('Spectral','spectral'),('Affinity','affinity'),('K-means','kmeans')]
select = Select(title="Option:", value="DBSCAN", options=menu)
select.js_on_change("value", CustomJS(code="""
console.log('select: value=' + this.value, this.toString())
"""))
# change here, to select clustering algorithm (note: spectral is slow)
#algorithm = select.value
algorithm = None
if select.value == 'dbscan':
algorithm = dbscan # use dbscan algorithm function
elif select.value == 'birch':
algorithm = birch # use birch algorithm function
elif select.value == 'means':
algorithm = means # use means algorithm function
elif select.value == 'spectral':
algorithm = spectral
elif select.value == 'affinity':
algorithm = affinity
elif select.value == 'kmeans':
algorithm = 'kmeans'
if algorithm is not None:
plots =[]
for dataset in (noisy_circles, noisy_moons, blobs1, blobs2):
X, y = dataset
X = StandardScaler().fit_transform(X)
# predict cluster memberships
algorithm.fit(X) ######################This is what appears to be the problem######################
if hasattr(algorithm, 'labels_'):
y_pred = algorithm.labels_.astype(int)
else:
y_pred = algorithm.predict(X)
p = figure(output_backend="webgl", title=algorithm.__class__.__name__,
width=PLOT_SIZE, height=PLOT_SIZE)
p.circle(X[:, 0], X[:, 1], color=colors[y_pred].tolist(), alpha=0.1,)
plots.append(p)
else:
print('Please select an algorithm first')
# generate layout for the plots
layout = column(select,row(plots[:2]), row(plots[2:]))
output_file("clustering.html", title="clustering with sklearn")
show(layout)
参见algorithm.fit(X)
这是错误发生的地方。
错误信息:-
AttributeError: 'NoneType' object has no attribute 'fit'
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
m:\bokehdash\clusteringbokeh.py in
67
68 # predict cluster memberships
---> 69 algorithm.fit(X)
70 if hasattr(algorithm, 'labels_'):
71 y_pred = algorithm.labels_.astype(int)
AttributeError: 'NoneType' object has no attribute 'fit'
我不知道 sklearn
但比较你的两个例子我可以看到以下内容:
Select
是一个具有 value
类型 string
属性的 Bokeh 模型。所以 select.value
是一个 string
dbscan
是一个算法函数
所以当你做 algorithm = dbscan
时,你为你的 algorithm
变量分配了一个算法函数,当你在第二个例子中做 algorithm = select.value
时,你只给它分配了一个字符串,所以它赢了无法工作,因为 string
没有 fit()
函数。你应该这样做:
algorithm = None
if select.value == 'DBSCAN':
algorithm = dbscan # use dbscan algorithm function
elif select.value == 'Birch':
algorithm = birch # use birch algorithm function
elif select.value == 'MiniBatchKmeans':
algorithm = means # use means algorithm function
etc...
if algorithm is not None:
plots =[]
for dataset in (noisy_circles, noisy_moons, blobs1, blobs2):
...
else:
print('Please select an algorithm first')
我正在尝试在 Bokeh 仪表板中构建一个功能,该功能允许用户对数据进行聚类。我使用以下示例作为模板,这里是 link:- Clustering in Bokeh example
下面是这个例子的代码:-
import numpy as np
from sklearn import cluster, datasets
from sklearn.preprocessing import StandardScaler
from bokeh.layouts import column, row
from bokeh.plotting import figure, output_file, show
print("\n\n*** This example may take several seconds to run before displaying. ***\n\n")
N = 50000
PLOT_SIZE = 400
# generate datasets.
np.random.seed(0)
noisy_circles = datasets.make_circles(n_samples=N, factor=.5, noise=.04)
noisy_moons = datasets.make_moons(n_samples=N, noise=.05)
centers = [(-2, 3), (2, 3), (-2, -3), (2, -3)]
blobs1 = datasets.make_blobs(centers=centers, n_samples=N, cluster_std=0.4, random_state=8)
blobs2 = datasets.make_blobs(centers=centers, n_samples=N, cluster_std=0.7, random_state=8)
colors = np.array([x for x in ('#00f', '#0f0', '#f00', '#0ff', '#f0f', '#ff0')])
colors = np.hstack([colors] * 20)
# create clustering algorithms
dbscan = cluster.DBSCAN(eps=.2)
birch = cluster.Birch(n_clusters=2)
means = cluster.MiniBatchKMeans(n_clusters=2)
spectral = cluster.SpectralClustering(n_clusters=2, eigen_solver='arpack', affinity="nearest_neighbors")
affinity = cluster.AffinityPropagation(damping=.9, preference=-200)
# change here, to select clustering algorithm (note: spectral is slow)
algorithm = dbscan # <- SELECT ALG
plots =[]
for dataset in (noisy_circles, noisy_moons, blobs1, blobs2):
X, y = dataset
X = StandardScaler().fit_transform(X)
# predict cluster memberships
algorithm.fit(X)
if hasattr(algorithm, 'labels_'):
y_pred = algorithm.labels_.astype(int)
else:
y_pred = algorithm.predict(X)
p = figure(output_backend="webgl", title=algorithm.__class__.__name__,
width=PLOT_SIZE, height=PLOT_SIZE)
p.circle(X[:, 0], X[:, 1], color=colors[y_pred].tolist(), alpha=0.1,)
plots.append(p)
# generate layout for the plots
layout = column(row(plots[:2]), row(plots[2:]))
output_file("clustering.html", title="clustering with sklearn")
show(layout)
该示例允许用户对数据进行聚类。在代码中,您可以指定要使用的算法;在上面粘贴的代码中,算法是 dbscan。我试图修改代码,以便我可以添加一个小部件,允许用户指定要使用的算法:-
from bokeh.models.annotations import Label
import numpy as np
from sklearn import cluster, datasets
from sklearn.preprocessing import StandardScaler
from bokeh.layouts import column, row
from bokeh.plotting import figure, output_file, show
from bokeh.models import CustomJS, Select
print("\n\n*** This example may take several seconds to run before displaying. ***\n\n")
N = 50000
PLOT_SIZE = 400
# generate datasets.
np.random.seed(0)
noisy_circles = datasets.make_circles(n_samples=N, factor=.5, noise=.04)
noisy_moons = datasets.make_moons(n_samples=N, noise=.05)
centers = [(-2, 3), (2, 3), (-2, -3), (2, -3)]
blobs1 = datasets.make_blobs(centers=centers, n_samples=N, cluster_std=0.4, random_state=8)
blobs2 = datasets.make_blobs(centers=centers, n_samples=N, cluster_std=0.7, random_state=8)
colors = np.array([x for x in ('#00f', '#0f0', '#f00', '#0ff', '#f0f', '#ff0')])
colors = np.hstack([colors] * 20)
# create clustering algorithms
dbscan = cluster.DBSCAN(eps=.2)
birch = cluster.Birch(n_clusters=2)
means = cluster.MiniBatchKMeans(n_clusters=2)
spectral = cluster.SpectralClustering(n_clusters=2, eigen_solver='arpack', affinity="nearest_neighbors")
affinity = cluster.AffinityPropagation(damping=.9, preference=-200)
kmeans = cluster.KMeans(n_clusters=2)
############################select widget for different clustering algorithms############
menu =[('DBSCAN','dbscan'),('Birch','birch'),('MiniBatchKmeans','means'),('Spectral','spectral'),('Affinity','affinity'),('K-means','kmeans')]
select = Select(title="Option:", value="DBSCAN", options=menu)
select.js_on_change("value", CustomJS(code="""
console.log('select: value=' + this.value, this.toString())
"""))
# change here, to select clustering algorithm (note: spectral is slow)
algorithm = select.value
############################################################
plots =[]
for dataset in (noisy_circles, noisy_moons, blobs1, blobs2):
X, y = dataset
X = StandardScaler().fit_transform(X)
# predict cluster memberships
algorithm.fit(X)
if hasattr(algorithm, 'labels_'):
y_pred = algorithm.labels_.astype(int)
else:
y_pred = algorithm.predict(X)
p = figure(output_backend="webgl", title=algorithm.__class__.__name__,
width=PLOT_SIZE, height=PLOT_SIZE)
p.circle(X[:, 0], X[:, 1], color=colors[y_pred].tolist(), alpha=0.1,)
plots.append(p)
# generate layout for the plots
layout = column(select,row(plots[:2]), row(plots[2:]))
output_file("clustering.html", title="clustering with sklearn")
show(layout)
但是,当我尝试 运行 时出现此错误:-
AttributeError: 'str' object has no attribute 'fit'
谁能告诉我我缺少什么来解决这个问题?
另外,如果不太难的话,我想添加一个数字输入小部件,它允许用户 select 每个算法要查找的簇数。建议?
非常感谢:)
编辑
这是@Tony 解决方案的代码的当前状态。
''' Example inspired by an example from the scikit-learn project:
http://scikit-learn.org/stable/auto_examples/cluster/plot_cluster_comparison.html
'''
#https://github.com/bokeh/bokeh/blob/branch-2.4/examples/webgl/clustering.py
from bokeh.models.annotations import Label
import numpy as np
from sklearn import cluster, datasets
from sklearn.preprocessing import StandardScaler
from bokeh.layouts import column, row
from bokeh.plotting import figure, output_file, show
from bokeh.models import CustomJS, Select
print("\n\n*** This example may take several seconds to run before displaying. ***\n\n")
N = 50000
PLOT_SIZE = 400
# generate datasets.
np.random.seed(0)
noisy_circles = datasets.make_circles(n_samples=N, factor=.5, noise=.04)
noisy_moons = datasets.make_moons(n_samples=N, noise=.05)
centers = [(-2, 3), (2, 3), (-2, -3), (2, -3)]
blobs1 = datasets.make_blobs(centers=centers, n_samples=N, cluster_std=0.4, random_state=8)
blobs2 = datasets.make_blobs(centers=centers, n_samples=N, cluster_std=0.7, random_state=8)
colors = np.array([x for x in ('#00f', '#0f0', '#f00', '#0ff', '#f0f', '#ff0')])
colors = np.hstack([colors] * 20)
# create clustering algorithms
dbscan = cluster.DBSCAN(eps=.2)
birch = cluster.Birch(n_clusters=2)
means = cluster.MiniBatchKMeans(n_clusters=2)
spectral = cluster.SpectralClustering(n_clusters=2, eigen_solver='arpack', affinity="nearest_neighbors")
affinity = cluster.AffinityPropagation(damping=.9, preference=-200)
kmeans = cluster.KMeans(n_clusters=2)
menu =[('DBSCAN','dbscan'),('Birch','birch'),('MiniBatchKmeans','means'),('Spectral','spectral'),('Affinity','affinity'),('K-means','kmeans')]
select = Select(title="Option:", value="DBSCAN", options=menu)
select.js_on_change("value", CustomJS(code="""
console.log('select: value=' + this.value, this.toString())
"""))
# change here, to select clustering algorithm (note: spectral is slow)
#algorithm = select.value
algorithm = None
if select.value == 'dbscan':
algorithm = dbscan # use dbscan algorithm function
elif select.value == 'birch':
algorithm = birch # use birch algorithm function
elif select.value == 'means':
algorithm = means # use means algorithm function
elif select.value == 'spectral':
algorithm = spectral
elif select.value == 'affinity':
algorithm = affinity
elif select.value == 'kmeans':
algorithm = 'kmeans'
if algorithm is not None:
plots =[]
for dataset in (noisy_circles, noisy_moons, blobs1, blobs2):
X, y = dataset
X = StandardScaler().fit_transform(X)
# predict cluster memberships
algorithm.fit(X) ######################This is what appears to be the problem######################
if hasattr(algorithm, 'labels_'):
y_pred = algorithm.labels_.astype(int)
else:
y_pred = algorithm.predict(X)
p = figure(output_backend="webgl", title=algorithm.__class__.__name__,
width=PLOT_SIZE, height=PLOT_SIZE)
p.circle(X[:, 0], X[:, 1], color=colors[y_pred].tolist(), alpha=0.1,)
plots.append(p)
else:
print('Please select an algorithm first')
# generate layout for the plots
layout = column(select,row(plots[:2]), row(plots[2:]))
output_file("clustering.html", title="clustering with sklearn")
show(layout)
参见algorithm.fit(X)
这是错误发生的地方。
错误信息:-
AttributeError: 'NoneType' object has no attribute 'fit'
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
m:\bokehdash\clusteringbokeh.py in
67
68 # predict cluster memberships
---> 69 algorithm.fit(X)
70 if hasattr(algorithm, 'labels_'):
71 y_pred = algorithm.labels_.astype(int)
AttributeError: 'NoneType' object has no attribute 'fit'
我不知道 sklearn
但比较你的两个例子我可以看到以下内容:
Select
是一个具有value
类型string
属性的 Bokeh 模型。所以select.value
是一个 stringdbscan
是一个算法函数
所以当你做 algorithm = dbscan
时,你为你的 algorithm
变量分配了一个算法函数,当你在第二个例子中做 algorithm = select.value
时,你只给它分配了一个字符串,所以它赢了无法工作,因为 string
没有 fit()
函数。你应该这样做:
algorithm = None
if select.value == 'DBSCAN':
algorithm = dbscan # use dbscan algorithm function
elif select.value == 'Birch':
algorithm = birch # use birch algorithm function
elif select.value == 'MiniBatchKmeans':
algorithm = means # use means algorithm function
etc...
if algorithm is not None:
plots =[]
for dataset in (noisy_circles, noisy_moons, blobs1, blobs2):
...
else:
print('Please select an algorithm first')