从 dendextend::circlize_dendrogram() 的输出中提取簇颜色
Extract cluster color from output of dendextend::circlize_dendrogram()
我正在尝试提取 circlize_dendrogram
的聚类中使用的颜色。这是示例代码:
library(magrittr)
library(dendextend)
cols <- c("#009000", "#FF033E", "#CB410B", "#3B444B", "#007FFF")
dend <- iris[1:40,-5] %>% dist %>% hclust %>% as.dendrogram
dend <- color_branches(dend, k = 5, col = cols)
dend %<>% set("labels_col", value = cols, k= 5)
dend %<>% set("labels_cex", .8)
dend %<>% set("branches_lwd", 2)
circlize_dendrogram(dend)
以便使用 cutree(dend, k = 5)
提取表格化的聚类。有没有办法根据给定的 cols
提取树状图中簇的颜色?我需要它使用 grid
包在图中插入图例。
示例,图例:群集 1 - #009000
;集群 2 - #FF033E
;集群 3 - #CB410B
;集群 4 - #3B444B
;集群 5 - #007FFF
。 circlize_dendrogram
的问题是用于群集的颜色顺序不同。
虽然我可以手动执行此操作,但如果我可以自动执行则效率会更高。如果我可以提取簇的颜色,那是可能的。
好的,这是一个非常 hacky 的解决方案。我相信还有更好的,但这是第一次尝试,所以请耐心等待。
想法是在 dend
对象(内部是一个列表)中搜索各个元素名称(在本例中只是数字)并提取相应的颜色,将其保存在数据框中,然后用这个作为图例。
# First we'll extract the elements and corresponding categories...
categories <- cutree(dend, k = 5)
# ... and save them in a data frame
categories_df <- data.frame(elements = as.numeric(names(categories)),
categories = categories,
color = NA)
# now here's a little function that extracts the color for each element
# from the 'dend' object. It uses the list.search() function from the
# 'rlist' package
library(rlist)
extract_color <- function(element_no, dend_obj) {
dend.search <- list.search(dend_obj, all(. == element_no))
color <- attr(dend.search[[1]], "edgePar")$col
return(color)
}
# I use 'dplyr' to manipulate the data
library(dplyr)
categories_df <- categories_df %>%
group_by(elements) %>%
mutate(color = extract_color(elements, dend))
现在这给了我们以下数据框:
> categories_df
Source: local data frame [40 x 3]
Groups: elements [40]
elements categories color
(dbl) (int) (chr)
1 1 1 #CB410B
2 2 1 #CB410B
3 3 1 #CB410B
4 4 1 #CB410B
5 5 1 #CB410B
6 6 2 #009000
7 7 1 #CB410B
8 8 1 #CB410B
9 9 3 #007FFF
10 10 1 #CB410B
.. ... ... ...
我们可以将其汇总为仅包含类别颜色的数据框,例如
legend_data <- categories_df %>%
group_by(categories) %>%
summarise(color = unique(color))
> legend_data
Source: local data frame [5 x 2]
categories color
(int) (chr)
1 1 #CB410B
2 2 #009000
3 3 #007FFF
4 4 #FF033E
5 5 #3B444B
现在很容易生成图例了:
circlize_dendrogram(dend)
legend(-1.05, 1.05, legend = legend_data$categories, fill = legend_data$color, cex = 0.7)
这给你:
您可以使用 cutree(dend, k = 5)
来确认类别颜色的数字对应于每个元素的类别。
除了Felix的解决方案,我想post自己的答案:
library(magrittr)
library(grid)
library(gridExtra)
library(dendextend)
cols <- c("#009000", "#FF033E", "#CB410B", "#3B444B", "#007FFF")
dend <- iris[1:40,-5] %>% dist %>% hclust %>% as.dendrogram
dend <- color_branches(dend, k = 5, col = cols)
dend %<>% set("labels_col", value = cols, k= 5)
dend %<>% set("labels_cex", .8)
dend %<>% set("branches_lwd", 2)
clust <- cutree(dend, k = 5)
colors <- labels_colors(dend)[clust %>% sort %>% names]
clust_labs <- colors %>% unique
circlize_dendrogram(dend)
grid.circle(x = .95, y = .9, r = .02, gp = gpar(fill = clust_labs[1]))
grid.circle(x = .95, y = .85, r = .02, gp = gpar(fill = clust_labs[2]))
grid.circle(x = .95, y = .8, r = .02, gp = gpar(fill = clust_labs[3]))
grid.circle(x = .95, y = .75, r = .02, gp = gpar(fill = clust_labs[4]))
grid.circle(x = .95, y = .7, r = .02, gp = gpar(fill = clust_labs[5]))
grid.text(x = .95, y = .9, label = expression(bold(1)), gp = gpar(fontsize = 9, col = "white"))
grid.text(x = .95, y = .85, label = expression(bold(2)), gp = gpar(fontsize = 9, col = "white"))
grid.text(x = .95, y = .8, label = expression(bold(3)), gp = gpar(fontsize = 9, col = "white"))
grid.text(x = .95, y = .75, label = expression(bold(4)), gp = gpar(fontsize = 9, col = "white"))
grid.text(x = .95, y = .7, label = expression(bold(5)), gp = gpar(fontsize = 9, col = "white"))
grid.text(x = .91, y = .8, label = "CLUSTERS", rot = 90, gp = gpar(fontsize = 9))
我正在尝试提取 circlize_dendrogram
的聚类中使用的颜色。这是示例代码:
library(magrittr)
library(dendextend)
cols <- c("#009000", "#FF033E", "#CB410B", "#3B444B", "#007FFF")
dend <- iris[1:40,-5] %>% dist %>% hclust %>% as.dendrogram
dend <- color_branches(dend, k = 5, col = cols)
dend %<>% set("labels_col", value = cols, k= 5)
dend %<>% set("labels_cex", .8)
dend %<>% set("branches_lwd", 2)
circlize_dendrogram(dend)
以便使用 cutree(dend, k = 5)
提取表格化的聚类。有没有办法根据给定的 cols
提取树状图中簇的颜色?我需要它使用 grid
包在图中插入图例。
示例,图例:群集 1 - #009000
;集群 2 - #FF033E
;集群 3 - #CB410B
;集群 4 - #3B444B
;集群 5 - #007FFF
。 circlize_dendrogram
的问题是用于群集的颜色顺序不同。
虽然我可以手动执行此操作,但如果我可以自动执行则效率会更高。如果我可以提取簇的颜色,那是可能的。
好的,这是一个非常 hacky 的解决方案。我相信还有更好的,但这是第一次尝试,所以请耐心等待。
想法是在 dend
对象(内部是一个列表)中搜索各个元素名称(在本例中只是数字)并提取相应的颜色,将其保存在数据框中,然后用这个作为图例。
# First we'll extract the elements and corresponding categories...
categories <- cutree(dend, k = 5)
# ... and save them in a data frame
categories_df <- data.frame(elements = as.numeric(names(categories)),
categories = categories,
color = NA)
# now here's a little function that extracts the color for each element
# from the 'dend' object. It uses the list.search() function from the
# 'rlist' package
library(rlist)
extract_color <- function(element_no, dend_obj) {
dend.search <- list.search(dend_obj, all(. == element_no))
color <- attr(dend.search[[1]], "edgePar")$col
return(color)
}
# I use 'dplyr' to manipulate the data
library(dplyr)
categories_df <- categories_df %>%
group_by(elements) %>%
mutate(color = extract_color(elements, dend))
现在这给了我们以下数据框:
> categories_df
Source: local data frame [40 x 3]
Groups: elements [40]
elements categories color
(dbl) (int) (chr)
1 1 1 #CB410B
2 2 1 #CB410B
3 3 1 #CB410B
4 4 1 #CB410B
5 5 1 #CB410B
6 6 2 #009000
7 7 1 #CB410B
8 8 1 #CB410B
9 9 3 #007FFF
10 10 1 #CB410B
.. ... ... ...
我们可以将其汇总为仅包含类别颜色的数据框,例如
legend_data <- categories_df %>%
group_by(categories) %>%
summarise(color = unique(color))
> legend_data
Source: local data frame [5 x 2]
categories color
(int) (chr)
1 1 #CB410B
2 2 #009000
3 3 #007FFF
4 4 #FF033E
5 5 #3B444B
现在很容易生成图例了:
circlize_dendrogram(dend)
legend(-1.05, 1.05, legend = legend_data$categories, fill = legend_data$color, cex = 0.7)
这给你:
您可以使用 cutree(dend, k = 5)
来确认类别颜色的数字对应于每个元素的类别。
除了Felix的解决方案,我想post自己的答案:
library(magrittr)
library(grid)
library(gridExtra)
library(dendextend)
cols <- c("#009000", "#FF033E", "#CB410B", "#3B444B", "#007FFF")
dend <- iris[1:40,-5] %>% dist %>% hclust %>% as.dendrogram
dend <- color_branches(dend, k = 5, col = cols)
dend %<>% set("labels_col", value = cols, k= 5)
dend %<>% set("labels_cex", .8)
dend %<>% set("branches_lwd", 2)
clust <- cutree(dend, k = 5)
colors <- labels_colors(dend)[clust %>% sort %>% names]
clust_labs <- colors %>% unique
circlize_dendrogram(dend)
grid.circle(x = .95, y = .9, r = .02, gp = gpar(fill = clust_labs[1]))
grid.circle(x = .95, y = .85, r = .02, gp = gpar(fill = clust_labs[2]))
grid.circle(x = .95, y = .8, r = .02, gp = gpar(fill = clust_labs[3]))
grid.circle(x = .95, y = .75, r = .02, gp = gpar(fill = clust_labs[4]))
grid.circle(x = .95, y = .7, r = .02, gp = gpar(fill = clust_labs[5]))
grid.text(x = .95, y = .9, label = expression(bold(1)), gp = gpar(fontsize = 9, col = "white"))
grid.text(x = .95, y = .85, label = expression(bold(2)), gp = gpar(fontsize = 9, col = "white"))
grid.text(x = .95, y = .8, label = expression(bold(3)), gp = gpar(fontsize = 9, col = "white"))
grid.text(x = .95, y = .75, label = expression(bold(4)), gp = gpar(fontsize = 9, col = "white"))
grid.text(x = .95, y = .7, label = expression(bold(5)), gp = gpar(fontsize = 9, col = "white"))
grid.text(x = .91, y = .8, label = "CLUSTERS", rot = 90, gp = gpar(fontsize = 9))