如何自动获取树状图聚类层次结构返回的颜色数量和颜色?
how to get automaticaly the number of colors and the colors returned from a dendrogram clustering hierarchy?
scikit-learn 给出 an example of python code to generate a dendogram。我 copy/paste 下面的代码。此代码生成树状图。此树状图显示 3 种不同的颜色:蓝色、绿色和橙色。
问题:与此树状图代码示例关联的代码可以自动交付:
- 树状图生成的 颜色数 ?
- 那些那些颜色(或它们的代码)的列表?
import numpy as np
from matplotlib import pyplot as plt
from scipy.cluster.hierarchy import dendrogram
from sklearn.datasets import load_iris
from sklearn.cluster import AgglomerativeClustering
def plot_dendrogram(model, **kwargs):
# Create linkage matrix and then plot the dendrogram
# create the counts of samples under each node
counts = np.zeros(model.children_.shape[0])
n_samples = len(model.labels_)
for i, merge in enumerate(model.children_):
current_count = 0
for child_idx in merge:
if child_idx < n_samples:
current_count += 1 # leaf node
else:
current_count += counts[child_idx - n_samples]
counts[i] = current_count
linkage_matrix = np.column_stack(
[model.children_, model.distances_, counts]
).astype(float)
# Plot the corresponding dendrogram
dendrogram(linkage_matrix, **kwargs)
iris = load_iris()
X = iris.data
# setting distance_threshold=0 ensures we compute the full tree.
model = AgglomerativeClustering(distance_threshold=0, n_clusters=None)
model = model.fit(X)
plt.title("Hierarchical Clustering Dendrogram")
# plot the top three levels of the dendrogram
plot_dendrogram(model, truncate_mode="level", p=3)
plt.xlabel("Number of points in node (or index of point if no parenthesis).")
plt.show()
如果您阅读文档 here,颜色数由 color_threshold
决定,默认为 0.7*max(Z[:,2])
。所以你只需要找到比它更高的合并数:
首先修改你的代码以获得链接矩阵:
def get_linkage(model):
# Create linkage matrix
# create the counts of samples under each node
counts = np.zeros(model.children_.shape[0])
n_samples = len(model.labels_)
for i, merge in enumerate(model.children_):
current_count = 0
for child_idx in merge:
if child_idx < n_samples:
current_count += 1 # leaf node
else:
current_count += counts[child_idx - n_samples]
counts[i] = current_count
linkage_matrix = np.column_stack(
[model.children_, model.distances_, counts]
).astype(float)
return linkage_matrix
iris = load_iris()
X = iris.data
# setting distance_threshold=0 ensures we compute the full tree.
model = AgglomerativeClustering(distance_threshold=0, n_clusters=None)
model = model.fit(X)
linkage_matrix = get_linkage(model)
然后从中算出颜色的个数:
from scipy.cluster.hierarchy import cut_tree
color_threshold = 0.7 * max(linkage_matrix[:, 2])
n_color = 1 + len(np.unique(cut_tree(linkage_matrix, height = color_threshold)))
color_codes = ['C' + str(i) for i in range(n_color)] # this is simply the matplotlib default color code
scikit-learn 给出 an example of python code to generate a dendogram。我 copy/paste 下面的代码。此代码生成树状图。此树状图显示 3 种不同的颜色:蓝色、绿色和橙色。
问题:与此树状图代码示例关联的代码可以自动交付:
- 树状图生成的 颜色数 ?
- 那些那些颜色(或它们的代码)的列表?
import numpy as np
from matplotlib import pyplot as plt
from scipy.cluster.hierarchy import dendrogram
from sklearn.datasets import load_iris
from sklearn.cluster import AgglomerativeClustering
def plot_dendrogram(model, **kwargs):
# Create linkage matrix and then plot the dendrogram
# create the counts of samples under each node
counts = np.zeros(model.children_.shape[0])
n_samples = len(model.labels_)
for i, merge in enumerate(model.children_):
current_count = 0
for child_idx in merge:
if child_idx < n_samples:
current_count += 1 # leaf node
else:
current_count += counts[child_idx - n_samples]
counts[i] = current_count
linkage_matrix = np.column_stack(
[model.children_, model.distances_, counts]
).astype(float)
# Plot the corresponding dendrogram
dendrogram(linkage_matrix, **kwargs)
iris = load_iris()
X = iris.data
# setting distance_threshold=0 ensures we compute the full tree.
model = AgglomerativeClustering(distance_threshold=0, n_clusters=None)
model = model.fit(X)
plt.title("Hierarchical Clustering Dendrogram")
# plot the top three levels of the dendrogram
plot_dendrogram(model, truncate_mode="level", p=3)
plt.xlabel("Number of points in node (or index of point if no parenthesis).")
plt.show()
如果您阅读文档 here,颜色数由 color_threshold
决定,默认为 0.7*max(Z[:,2])
。所以你只需要找到比它更高的合并数:
首先修改你的代码以获得链接矩阵:
def get_linkage(model):
# Create linkage matrix
# create the counts of samples under each node
counts = np.zeros(model.children_.shape[0])
n_samples = len(model.labels_)
for i, merge in enumerate(model.children_):
current_count = 0
for child_idx in merge:
if child_idx < n_samples:
current_count += 1 # leaf node
else:
current_count += counts[child_idx - n_samples]
counts[i] = current_count
linkage_matrix = np.column_stack(
[model.children_, model.distances_, counts]
).astype(float)
return linkage_matrix
iris = load_iris()
X = iris.data
# setting distance_threshold=0 ensures we compute the full tree.
model = AgglomerativeClustering(distance_threshold=0, n_clusters=None)
model = model.fit(X)
linkage_matrix = get_linkage(model)
然后从中算出颜色的个数:
from scipy.cluster.hierarchy import cut_tree
color_threshold = 0.7 * max(linkage_matrix[:, 2])
n_color = 1 + len(np.unique(cut_tree(linkage_matrix, height = color_threshold)))
color_codes = ['C' + str(i) for i in range(n_color)] # this is simply the matplotlib default color code