如何使用 reshape2 包中的 melt() 来堆叠数据的分类标签以生成多个并排的 boxpot
How to use melt() from the reshape2 package to stack categorical labels of data to produce multiple side-by-side boxpots
我正在尝试使用 R 中 “reshape2”
包中的 melt() function
来堆叠数据框,同时为单个观察保留分类标签。我的问题是如何调整 Eric Cai's code
Code 以在 behaviours$Family(一个 2 级因子列)级别上生成多个并排的缺口箱线图,这些箱线图由数据的每个行为变量分组 -设置称为 behviours(下面提供了一个 link 到虚拟数据)?
我的目标是用图例为每个家庭 (V4=red and W3 = blue)
对这些多缺口箱线图进行颜色编码。但是,在尝试使用 melt()
函数排列数据框时,我遇到了尺寸问题,我无法从中破译。如果有人可以提供帮助,那么非常感谢。
在堆栈溢出页面的底部找到了可重现的虚拟数据
Here is an example:
I am trying to follow Eric Cai's instructions
(1) Stack the data:
(a) Retain the categorical (2 level factor column) for family [,1]
(b) Retain all behavioural variables [,2:13]
#Set vectors for labelling the data
behaviours.label=c("Swimming",
"Not.Swimming",
"Running",
"Not.Running",
"Fighting",
"Not.Fighting",
"Resting",
"Not.Resting",
"Hunting",
"Not.Hunting",
"Grooming",
"Not.Grooming")
family.labels=c("V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8")
library(tidyr)
data_long <- gather(behaviours, x, Mean.Value, Swimming:Not.Grooming)
head(data_long)
# stack the data while retaining the Family and behavioural variables
stacked.data = melt(behaviours, id = c('Family', 'behaviours'))
# remove the column that gives the column name variable
stacked.data = stacked.data[, -3]
#head(stacked.data)
colnames(stacked.data)<-c("Family", "Behaviours", "Values")
生成箱形图
生成一个名为boxplots.double的对象,它将使用公式
text{Mean.value ~ Family + Behaviours} 将图分成 12 组双胞胎(即,每个行为将在单个图中的 behaviours$family 级别分组)。在 Eric Cai 的代码中,“at = ”是一个选项,用于指定箱形图沿水平轴的位置,xaxt = 'n' 抑制默认水平轴,该轴使用 axis() 和 title()
boxplots.double = boxplot(values~Family + Behaviours,
data = stacked.data,
at = c(1:24),
xaxt='n',
ylim = c(min(0, min(-3)),
max(7, na.rm = T)),
notch=TRUE,
col = c("red", "blue"),
names = c("V4", "G8"),
cex.axis=1.0,
srt=45)
axis(side=1, at=c(1.8, 6.8), labels=c("Swimming",
"Not.Swimming",
"Running",
"Not.Running",
"Fighting",
"Not.Fighting",
"Resting",
"Not.Resting",
"Hunting",
"Not.Hunting",
"Grooming",
"Not.Grooming"), line=0.5, lwd=0)
错误信息
Error in axis(side = 1, at = 1:24, labels = c("V4", "G8"), xaxt = "n", :
'at' and 'labels' lengths differ, 24 != 2
In addition: Warning message:
In bxp(list(stats = c(-1.20186549488911, -0.970033304559564, -0.465271399251147, :
some notches went outside hinges ('box'): maybe set notch=FALSE
在理查德·特尔福德 (Richard Telford) 好心提供帮助后,此代码生成了多个并排箱线图,这些箱线图使用包含在包裹 reshape2
clear the working directory
rm(list=ls())
data(behaviours)
#Set vectors for labelling the data
behaviours.labels=c("Swimming",
"Not.Swimming",
"Running",
"Not.Running",
"Fighting",
"Not.Fighting",
"Resting",
"Not.Resting",
"Hunting",
"Not.Hunting",
"Grooming",
"Not.Grooming")
family.labels=c("V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8")
library(tidyr)
#Structure the data from wide to long format
data_long <- gather(behaviours, x, Mean.Value, Swimming:Not.Grooming)
head(data_long)
library(reshape2)
# stack the data while retaining Family and Values calculated from behaviours[,2:13] using the melt() function
stacked.data = melt(data_long, id = c('Family', 'x'))
head(stacked.data)
# remove the column that gives the column name of the `variable' from all.data
stacked.data = stacked.data[, -3]
head(stacked.data)
#Rename the column headings
colnames(stacked.data)<-c("Family", "Behaviours", "Values")
#Generate the side-by-side boxplots
windows(height=10, width=14)
par(mar = c(9, 7, 4, 4)+0.3, mgp=c(5, 1.5, 0))
boxplots.double = boxplot(Values~Family + Behaviours,
data = stacked.data,
at = c(1:24),
ylim = c(min(0, min(0)),
max(1.8, na.rm = T)),
xaxt = "n",
notch=TRUE,
col = c("red", "blue"),
cex.axis=0.7,
cex.labels=0.7,
ylab="Values",
xlab="Behaviours",
space=1)
axis(side = 1, at = seq(2, 24, by = 2), labels = FALSE)
text(seq(2, 24, by=2), par("usr")[3] - 0.2, labels=unique(behaviours.labels), srt = 45, pos = 1, xpd = TRUE, cex=0.8)
legend("topright", title = "Family", cex=1.0, legend=c("V4" , "G8"), fill=c("Blue", "Red"), lty = c(1,1))
我正在尝试使用 R 中 “reshape2”
包中的 melt() function
来堆叠数据框,同时为单个观察保留分类标签。我的问题是如何调整 Eric Cai's code
Code 以在 behaviours$Family(一个 2 级因子列)级别上生成多个并排的缺口箱线图,这些箱线图由数据的每个行为变量分组 -设置称为 behviours(下面提供了一个 link 到虚拟数据)?
我的目标是用图例为每个家庭 (V4=red and W3 = blue)
对这些多缺口箱线图进行颜色编码。但是,在尝试使用 melt()
函数排列数据框时,我遇到了尺寸问题,我无法从中破译。如果有人可以提供帮助,那么非常感谢。
在堆栈溢出页面的底部找到了可重现的虚拟数据
Here is an example:
I am trying to follow Eric Cai's instructions
(1) Stack the data:
(a) Retain the categorical (2 level factor column) for family [,1]
(b) Retain all behavioural variables [,2:13]
#Set vectors for labelling the data
behaviours.label=c("Swimming",
"Not.Swimming",
"Running",
"Not.Running",
"Fighting",
"Not.Fighting",
"Resting",
"Not.Resting",
"Hunting",
"Not.Hunting",
"Grooming",
"Not.Grooming")
family.labels=c("V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8")
library(tidyr)
data_long <- gather(behaviours, x, Mean.Value, Swimming:Not.Grooming)
head(data_long)
# stack the data while retaining the Family and behavioural variables
stacked.data = melt(behaviours, id = c('Family', 'behaviours'))
# remove the column that gives the column name variable
stacked.data = stacked.data[, -3]
#head(stacked.data)
colnames(stacked.data)<-c("Family", "Behaviours", "Values")
生成箱形图
生成一个名为boxplots.double的对象,它将使用公式 text{Mean.value ~ Family + Behaviours} 将图分成 12 组双胞胎(即,每个行为将在单个图中的 behaviours$family 级别分组)。在 Eric Cai 的代码中,“at = ”是一个选项,用于指定箱形图沿水平轴的位置,xaxt = 'n' 抑制默认水平轴,该轴使用 axis() 和 title()
boxplots.double = boxplot(values~Family + Behaviours,
data = stacked.data,
at = c(1:24),
xaxt='n',
ylim = c(min(0, min(-3)),
max(7, na.rm = T)),
notch=TRUE,
col = c("red", "blue"),
names = c("V4", "G8"),
cex.axis=1.0,
srt=45)
axis(side=1, at=c(1.8, 6.8), labels=c("Swimming",
"Not.Swimming",
"Running",
"Not.Running",
"Fighting",
"Not.Fighting",
"Resting",
"Not.Resting",
"Hunting",
"Not.Hunting",
"Grooming",
"Not.Grooming"), line=0.5, lwd=0)
错误信息
Error in axis(side = 1, at = 1:24, labels = c("V4", "G8"), xaxt = "n", :
'at' and 'labels' lengths differ, 24 != 2
In addition: Warning message:
In bxp(list(stats = c(-1.20186549488911, -0.970033304559564, -0.465271399251147, :
some notches went outside hinges ('box'): maybe set notch=FALSE
在理查德·特尔福德 (Richard Telford) 好心提供帮助后,此代码生成了多个并排箱线图,这些箱线图使用包含在包裹 reshape2
clear the working directory
rm(list=ls())
data(behaviours)
#Set vectors for labelling the data
behaviours.labels=c("Swimming",
"Not.Swimming",
"Running",
"Not.Running",
"Fighting",
"Not.Fighting",
"Resting",
"Not.Resting",
"Hunting",
"Not.Hunting",
"Grooming",
"Not.Grooming")
family.labels=c("V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8",
"V4", "G8")
library(tidyr)
#Structure the data from wide to long format
data_long <- gather(behaviours, x, Mean.Value, Swimming:Not.Grooming)
head(data_long)
library(reshape2)
# stack the data while retaining Family and Values calculated from behaviours[,2:13] using the melt() function
stacked.data = melt(data_long, id = c('Family', 'x'))
head(stacked.data)
# remove the column that gives the column name of the `variable' from all.data
stacked.data = stacked.data[, -3]
head(stacked.data)
#Rename the column headings
colnames(stacked.data)<-c("Family", "Behaviours", "Values")
#Generate the side-by-side boxplots
windows(height=10, width=14)
par(mar = c(9, 7, 4, 4)+0.3, mgp=c(5, 1.5, 0))
boxplots.double = boxplot(Values~Family + Behaviours,
data = stacked.data,
at = c(1:24),
ylim = c(min(0, min(0)),
max(1.8, na.rm = T)),
xaxt = "n",
notch=TRUE,
col = c("red", "blue"),
cex.axis=0.7,
cex.labels=0.7,
ylab="Values",
xlab="Behaviours",
space=1)
axis(side = 1, at = seq(2, 24, by = 2), labels = FALSE)
text(seq(2, 24, by=2), par("usr")[3] - 0.2, labels=unique(behaviours.labels), srt = 45, pos = 1, xpd = TRUE, cex=0.8)
legend("topright", title = "Family", cex=1.0, legend=c("V4" , "G8"), fill=c("Blue", "Red"), lty = c(1,1))