使用 geom_histogram 函数设置 binwidth 会给出空白图
Setting binwidth with geom_histogram function gives a blank graph
我是 R 的新手,使用 ggplot2 为直方图编写了以下代码:
library(ggplot2)
graph1 <- ggplot(data = data1, aes(data1$Chr.position));
graph1 + geom_histogram()
(Chr.position == 染色体位置并包含大约 273 个与心脏病相关的染色体 2 突变,范围从位置 179395822 到位置 179658211。)
此代码给出了以下直方图:
这太棒了!(我实际上用 R 做了一件事情!!),但是当我想使用以下代码更改 'binwidth' 时:
graph1 + geom_histogram(binwidth = 0.04)
Rstudio 卡在这个命令上,它没有冻结但需要超过半小时来加载直方图(如果有的话),当它最终加载时它只是一个空白图表,没有条形图以下错误:
In loop_apply(n, do.ply) :
position_stack requires constant width: output may be incorrect
结构(列表(Chr.position = c(179604264L, 179591957L, 179558736L,
179498055L, 179506963L, 179506963L, 179497076L, 179478864L, 179472127L,
179458075L, 179456704L, 179455162L, 179454957L, 179444661L, 179442324L,
179433758L, 179433213L, 179428871L, 179425091L, 179424036L, 179412902L,
179412245L, 179410544L, 179406990L, 179406990L, 179410799L, 179485012L,
179477004L, 179471841L, 179457392L, 179457005L, 179444429L, 179441649L,
179441015L, 179440067L, 179424398L, 179422457L, 179417723L, 179413187L,
179408239L, 179404491L, 179404286L, 179401029L, 179456704L, 179456704L,
179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L,
179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L,
179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L,
179456704L, 179452435L, 179477885L, 179477885L, 179477885L, 179454576L,
179454576L, 179438874L, 179438874L, 179438874L, 179438874L, 179438874L,
179438874L, 179438874L, 179438874L, 179438874L, 179438874L, 179438874L,
179438874L, 179438874L, 179438874L, 179403522L, 179403522L, 179403522L,
179428871L, 179428871L, 179428871L, 179428871L, 179424496L, 179424496L,
179424496L, 179424496L, 179424496L, 179424496L, 179424496L, 179424496L,
179443339L, 179443339L, 179477885L, 179477885L, 179477885L, 179477885L,
179477885L, 179477885L, 179434009L, 179434009L, 179419765L, 179419765L,
179419765L, 179658211L, 179433665L, 179433665L, 179433665L, 179455112L,
179455112L, 179455112L, 179455112L, 179455112L, 179455112L, 179413187L,
179413187L, 179453427L, 179453427L, 179463684L, 179429849L, 179430371L,
179429468L, 179442793L, 179497039L, 179497039L, 179424782L, 179424782L,
179422725L, 179422725L, 179422231L, 179422231L, 179658189L, 179658189L,
179422725L, 179422725L, 179414153L, 179472209L, 179472209L, 179440319L,
179432420L, 179469738L, 179469738L, 179632576L, 179632576L, 179632576L,
179458085L, 179458085L, 179458085L, 179458085L, 179458085L, 179403566L,
179403566L, 179403566L, 179403566L, 179470359L, 179470359L, 179470359L,
179470359L, 179466263L, 179428086L, 179462634L, 179462634L, 179400405L,
179433407L, 179433407L, 179433407L, 179433407L, 179478861L, 179478861L,
179478861L, 179478861L, 179456704L, 179456704L, 179456704L, 179456704L,
179477169L, 179477169L, 179477169L, 179422249L, 179422249L, 179481600L,
179481600L, 179452411L, 179452411L, 179442238L, 179442238L, 179442238L,
179427963L, 179427963L, 179427963L, 179427963L, 179427963L, 179416530L,
179416531L, 179456704L, 179456704L, 179456704L, 179418418L, 179418418L,
179418418L, 179418418L, 179456704L, 179456704L, 179469477L, 179469477L,
179469477L, 179469477L, 179469477L, 179426073L, 179426074L, 179452242L,
179430544L, 179456704L, 179456704L, 179435468L, 179435468L, 179485829L,
179605063L, 179441870L, 179423314L, 179423314L, 179416474L, 179416474L,
179395822L, 179605941L, 179605941L, 179634455L, 179442238L, 179442238L,
179411339L, 179414506L, 179456704L, 179605063L, 179487411L, 179487411L,
179487411L, 179487411L, 179487411L, 179487411L, 179487411L, 179487411L,
179644174L, 179644174L, 179472155L, 179472155L, 179472155L)), .Names = "Chr.position", row.names = c(NA,
254L), class = "data.frame")
由于以下几个原因它不起作用:
- 规模太大(从开始到结束的范围约为 200kb),您尝试将数据分成 0.04 个大小的容器。当您的机器计算结果时,您看不到它们,因为在 200kb 的规模上,它们是几行,大小为 0.04。
- 这没有意义:如果你对基因组单位(即碱基对)进行操作,那么你怎么会有 0.04 个碱基对?
我将如何呈现这样的数据:
geom_density
library(ggplot2)
ggplot(data1, aes(Chr.position)) +
geom_point() +
labs(x = "Position in chromosome2",
y = "Mutation density")
geom_point
# Count frequency of hits
data2 <- data.frame(table(data1))
data2$position <- as.numeric(as.character(data2$data1))
# Plot result
ggplot(data2, aes(position, Freq)) +
geom_point() +
labs(x = "Position in chromosome2",
y = "Number of mutations")
我是 R 的新手,使用 ggplot2 为直方图编写了以下代码:
library(ggplot2)
graph1 <- ggplot(data = data1, aes(data1$Chr.position));
graph1 + geom_histogram()
(Chr.position == 染色体位置并包含大约 273 个与心脏病相关的染色体 2 突变,范围从位置 179395822 到位置 179658211。)
此代码给出了以下直方图:
这太棒了!(我实际上用 R 做了一件事情!!),但是当我想使用以下代码更改 'binwidth' 时:
graph1 + geom_histogram(binwidth = 0.04)
Rstudio 卡在这个命令上,它没有冻结但需要超过半小时来加载直方图(如果有的话),当它最终加载时它只是一个空白图表,没有条形图以下错误:
In loop_apply(n, do.ply) : position_stack requires constant width: output may be incorrect
结构(列表(Chr.position = c(179604264L, 179591957L, 179558736L, 179498055L, 179506963L, 179506963L, 179497076L, 179478864L, 179472127L, 179458075L, 179456704L, 179455162L, 179454957L, 179444661L, 179442324L, 179433758L, 179433213L, 179428871L, 179425091L, 179424036L, 179412902L, 179412245L, 179410544L, 179406990L, 179406990L, 179410799L, 179485012L, 179477004L, 179471841L, 179457392L, 179457005L, 179444429L, 179441649L, 179441015L, 179440067L, 179424398L, 179422457L, 179417723L, 179413187L, 179408239L, 179404491L, 179404286L, 179401029L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179456704L, 179452435L, 179477885L, 179477885L, 179477885L, 179454576L, 179454576L, 179438874L, 179438874L, 179438874L, 179438874L, 179438874L, 179438874L, 179438874L, 179438874L, 179438874L, 179438874L, 179438874L, 179438874L, 179438874L, 179438874L, 179403522L, 179403522L, 179403522L, 179428871L, 179428871L, 179428871L, 179428871L, 179424496L, 179424496L, 179424496L, 179424496L, 179424496L, 179424496L, 179424496L, 179424496L, 179443339L, 179443339L, 179477885L, 179477885L, 179477885L, 179477885L, 179477885L, 179477885L, 179434009L, 179434009L, 179419765L, 179419765L, 179419765L, 179658211L, 179433665L, 179433665L, 179433665L, 179455112L, 179455112L, 179455112L, 179455112L, 179455112L, 179455112L, 179413187L, 179413187L, 179453427L, 179453427L, 179463684L, 179429849L, 179430371L, 179429468L, 179442793L, 179497039L, 179497039L, 179424782L, 179424782L, 179422725L, 179422725L, 179422231L, 179422231L, 179658189L, 179658189L, 179422725L, 179422725L, 179414153L, 179472209L, 179472209L, 179440319L, 179432420L, 179469738L, 179469738L, 179632576L, 179632576L, 179632576L, 179458085L, 179458085L, 179458085L, 179458085L, 179458085L, 179403566L, 179403566L, 179403566L, 179403566L, 179470359L, 179470359L, 179470359L, 179470359L, 179466263L, 179428086L, 179462634L, 179462634L, 179400405L, 179433407L, 179433407L, 179433407L, 179433407L, 179478861L, 179478861L, 179478861L, 179478861L, 179456704L, 179456704L, 179456704L, 179456704L, 179477169L, 179477169L, 179477169L, 179422249L, 179422249L, 179481600L, 179481600L, 179452411L, 179452411L, 179442238L, 179442238L, 179442238L, 179427963L, 179427963L, 179427963L, 179427963L, 179427963L, 179416530L, 179416531L, 179456704L, 179456704L, 179456704L, 179418418L, 179418418L, 179418418L, 179418418L, 179456704L, 179456704L, 179469477L, 179469477L, 179469477L, 179469477L, 179469477L, 179426073L, 179426074L, 179452242L, 179430544L, 179456704L, 179456704L, 179435468L, 179435468L, 179485829L, 179605063L, 179441870L, 179423314L, 179423314L, 179416474L, 179416474L, 179395822L, 179605941L, 179605941L, 179634455L, 179442238L, 179442238L, 179411339L, 179414506L, 179456704L, 179605063L, 179487411L, 179487411L, 179487411L, 179487411L, 179487411L, 179487411L, 179487411L, 179487411L, 179644174L, 179644174L, 179472155L, 179472155L, 179472155L)), .Names = "Chr.position", row.names = c(NA, 254L), class = "data.frame")
由于以下几个原因它不起作用:
- 规模太大(从开始到结束的范围约为 200kb),您尝试将数据分成 0.04 个大小的容器。当您的机器计算结果时,您看不到它们,因为在 200kb 的规模上,它们是几行,大小为 0.04。
- 这没有意义:如果你对基因组单位(即碱基对)进行操作,那么你怎么会有 0.04 个碱基对?
我将如何呈现这样的数据:
geom_density
library(ggplot2)
ggplot(data1, aes(Chr.position)) +
geom_point() +
labs(x = "Position in chromosome2",
y = "Mutation density")
geom_point
# Count frequency of hits
data2 <- data.frame(table(data1))
data2$position <- as.numeric(as.character(data2$data1))
# Plot result
ggplot(data2, aes(position, Freq)) +
geom_point() +
labs(x = "Position in chromosome2",
y = "Number of mutations")