在 ggridge 中选择 NaN 的联合带宽
Picking joint bandwidth of NaN within ggridge
我正在尝试生成像 here 中描述的那样的脊线图,但错误“选择 NaN 的联合带宽”不断出现。这有什么问题吗?
感谢任何指针,提示。
最佳
toplot = structure(list(Year = c("2000", "2000", "2001", "2001", "2002",
"2002", "2003", "2003", "2004", "2004", "2005", "2005", "2006",
"2006", "2007", "2007", "2008", "2008", "2009", "2009", "2010",
"2010", "2011", "2011", "2012", "2012", "2013", "2013", "2014",
"2014", "2015", "2015", "2016", "2016", "2017", "2017", "2018",
"2018", "2019", "2019", "2020", "2020", "2021", "2021"), genes = c("DAO",
"IDH2", "DAO", "IDH2", "DAO", "IDH2", "DAO", "IDH2", "DAO", "IDH2",
"DAO", "IDH2", "DAO", "IDH2", "DAO", "IDH2", "DAO", "IDH2", "DAO",
"IDH2", "DAO", "IDH2", "DAO", "IDH2", "DAO", "IDH2", "DAO", "IDH2",
"DAO", "IDH2", "DAO", "IDH2", "DAO", "IDH2", "DAO", "IDH2", "DAO",
"IDH2", "DAO", "IDH2", "DAO", "IDH2", "DAO", "IDH2"), n = c(2L,
0L, 2L, 0L, 2L, 0L, 3L, 0L, 5L, 0L, 5L, 0L, 4L, 0L, 6L, 0L, 2L,
0L, 4L, 0L, 13L, 0L, 7L, 0L, 7L, 0L, 169L, 1L, 182L, 0L, 215L,
56L, 147L, 11L, 165L, 115L, 10L, 62L, 13L, 74L, 14L, 59L, 67L,
44L)), row.names = c(NA, -44L), class = c("tbl_df", "tbl", "data.frame"
))
toplot %>%
mutate(YearFct = fct_rev(as.factor(Year))) %>%
ggplot(aes(y = YearFct)) +
geom_density_ridges(
aes(x = n, fill = paste(YearFct, genes)),
alpha = .8
) +
labs(
x = "No_Patent",
y = "Year"
) +
scale_y_discrete(expand = c(0, 0)) +
scale_x_continuous(expand = c(0, 0)) +
coord_cartesian(clip = "off") +
theme_ridges(grid = FALSE)
你的代码没问题。您只是没有足够的数据来绘制此类图。每年每个基因只有一次测量。因此,您试图每年根据一个点创建密度估计,但这是行不通的。您每年至少需要两分才能获得自动带宽选择。
如果您模拟大量数据,您会发现您的代码运行良好。
set.seed(1)
toplot <- data.frame(Year = rep(2000:2021, each = 20),
genes = rep(c("DAO", "IDH2"), 220),
n = round(rexp(440, rep(c(0.9, 0.05), 220))))
library(tidyverse)
library(ggridges)
toplot %>%
mutate(YearFct = fct_rev(as.factor(Year))) %>%
ggplot(aes(y = YearFct)) +
geom_density_ridges(
aes(x = n, fill = genes),
alpha = .8
) +
labs(
x = "No_Patent",
y = "Year"
) +
scale_y_discrete(expand = c(0, 0)) +
scale_x_continuous(expand = c(0, 0)) +
coord_cartesian(clip = "off") +
theme_ridges(grid = FALSE)
#> Picking joint bandwidth of 3.71
一种可能的解决方案是使用 dnorm
创建您自己的密度曲线,其中 n
是平均值,分组标准差是 sd
。这使您对所涉及的不确定性有所了解。它确实至少产生了一个 kind-of 信息图,尽管可能不如简单的闪避条形图诚实,这将是绘制此数据集的明显方法
toplot %>%
group_by(genes) %>%
mutate(sd = sd(n)) %>%
group_by(Year, genes) %>%
summarise(x = seq(0, 300, length = 1000),
dens = dnorm(x, n, sd),
dens = dens/max(dens)) %>%
mutate(YearFct = fct_rev(as.factor(Year))) %>%
ggplot(aes(y = YearFct, x = x)) +
geom_ridgeline(
aes(height = dens, fill = genes),
alpha = .8
) +
labs(
x = "No_Patent",
y = "Year"
) +
scale_y_discrete(expand = c(0, 0)) +
scale_x_continuous(expand = c(0, 0)) +
coord_cartesian(clip = "off") +
theme_ridges(grid = FALSE)
我认为这比简单的线图更难解释,更不诚实,而且可以说更没有吸引力。
toplot %>%
ggplot(aes(factor(Year), n, color = genes, group = genes)) +
geom_line(size = 1.5) +
geom_point(size = 4, shape = 21, fill = "white") +
scale_color_manual(values = c("deepskyblue4", "orange")) +
labs(x = "Year") +
theme_light(base_size = 16)
由 reprex package (v2.0.1)
于 2022-04-24 创建
我正在尝试生成像 here 中描述的那样的脊线图,但错误“选择 NaN 的联合带宽”不断出现。这有什么问题吗? 感谢任何指针,提示。 最佳
toplot = structure(list(Year = c("2000", "2000", "2001", "2001", "2002",
"2002", "2003", "2003", "2004", "2004", "2005", "2005", "2006",
"2006", "2007", "2007", "2008", "2008", "2009", "2009", "2010",
"2010", "2011", "2011", "2012", "2012", "2013", "2013", "2014",
"2014", "2015", "2015", "2016", "2016", "2017", "2017", "2018",
"2018", "2019", "2019", "2020", "2020", "2021", "2021"), genes = c("DAO",
"IDH2", "DAO", "IDH2", "DAO", "IDH2", "DAO", "IDH2", "DAO", "IDH2",
"DAO", "IDH2", "DAO", "IDH2", "DAO", "IDH2", "DAO", "IDH2", "DAO",
"IDH2", "DAO", "IDH2", "DAO", "IDH2", "DAO", "IDH2", "DAO", "IDH2",
"DAO", "IDH2", "DAO", "IDH2", "DAO", "IDH2", "DAO", "IDH2", "DAO",
"IDH2", "DAO", "IDH2", "DAO", "IDH2", "DAO", "IDH2"), n = c(2L,
0L, 2L, 0L, 2L, 0L, 3L, 0L, 5L, 0L, 5L, 0L, 4L, 0L, 6L, 0L, 2L,
0L, 4L, 0L, 13L, 0L, 7L, 0L, 7L, 0L, 169L, 1L, 182L, 0L, 215L,
56L, 147L, 11L, 165L, 115L, 10L, 62L, 13L, 74L, 14L, 59L, 67L,
44L)), row.names = c(NA, -44L), class = c("tbl_df", "tbl", "data.frame"
))
toplot %>%
mutate(YearFct = fct_rev(as.factor(Year))) %>%
ggplot(aes(y = YearFct)) +
geom_density_ridges(
aes(x = n, fill = paste(YearFct, genes)),
alpha = .8
) +
labs(
x = "No_Patent",
y = "Year"
) +
scale_y_discrete(expand = c(0, 0)) +
scale_x_continuous(expand = c(0, 0)) +
coord_cartesian(clip = "off") +
theme_ridges(grid = FALSE)
你的代码没问题。您只是没有足够的数据来绘制此类图。每年每个基因只有一次测量。因此,您试图每年根据一个点创建密度估计,但这是行不通的。您每年至少需要两分才能获得自动带宽选择。
如果您模拟大量数据,您会发现您的代码运行良好。
set.seed(1)
toplot <- data.frame(Year = rep(2000:2021, each = 20),
genes = rep(c("DAO", "IDH2"), 220),
n = round(rexp(440, rep(c(0.9, 0.05), 220))))
library(tidyverse)
library(ggridges)
toplot %>%
mutate(YearFct = fct_rev(as.factor(Year))) %>%
ggplot(aes(y = YearFct)) +
geom_density_ridges(
aes(x = n, fill = genes),
alpha = .8
) +
labs(
x = "No_Patent",
y = "Year"
) +
scale_y_discrete(expand = c(0, 0)) +
scale_x_continuous(expand = c(0, 0)) +
coord_cartesian(clip = "off") +
theme_ridges(grid = FALSE)
#> Picking joint bandwidth of 3.71
一种可能的解决方案是使用 dnorm
创建您自己的密度曲线,其中 n
是平均值,分组标准差是 sd
。这使您对所涉及的不确定性有所了解。它确实至少产生了一个 kind-of 信息图,尽管可能不如简单的闪避条形图诚实,这将是绘制此数据集的明显方法
toplot %>%
group_by(genes) %>%
mutate(sd = sd(n)) %>%
group_by(Year, genes) %>%
summarise(x = seq(0, 300, length = 1000),
dens = dnorm(x, n, sd),
dens = dens/max(dens)) %>%
mutate(YearFct = fct_rev(as.factor(Year))) %>%
ggplot(aes(y = YearFct, x = x)) +
geom_ridgeline(
aes(height = dens, fill = genes),
alpha = .8
) +
labs(
x = "No_Patent",
y = "Year"
) +
scale_y_discrete(expand = c(0, 0)) +
scale_x_continuous(expand = c(0, 0)) +
coord_cartesian(clip = "off") +
theme_ridges(grid = FALSE)
我认为这比简单的线图更难解释,更不诚实,而且可以说更没有吸引力。
toplot %>%
ggplot(aes(factor(Year), n, color = genes, group = genes)) +
geom_line(size = 1.5) +
geom_point(size = 4, shape = 21, fill = "white") +
scale_color_manual(values = c("deepskyblue4", "orange")) +
labs(x = "Year") +
theme_light(base_size = 16)
由 reprex package (v2.0.1)
于 2022-04-24 创建