ggplot 中的颜色编码和图例标签
Color coding and legend labels in ggplot
我有数据想用 ggplot
的 geom_point
:
绘制
set.seed(1)
df <- data.frame(x=rnorm(100),y=rnorm(100),val=c(rnorm(90),rep(NA,10)))
我按照df$val
:
的区间添加颜色
intervals.df <- data.frame(interval=c("(-3,-2]","(-2,-0.999]","(-0.999,0]","(0,1.96]","(1.96,3.91]","(3.91,5.87]","not expressed"),
start=c(-3,-2,-0.999,0,1.96,3.91,NA),end=c(-2,-0.999,0,1.96,3.91,5.87,NA),
col=c("#2f3b61","#436CE8","#E0E0FF","#7d4343","#C74747","#EBCCD6","#D3D3D3"),stringsAsFactors=F)
df <- cbind(df,do.call(rbind,lapply(df$val,function(x){
if(is.na(x)){
return(data.frame(col=intervals.df$col[nrow(intervals.df)],interval=intervals.df$interval[nrow(intervals.df)]))
} else{
idx <- which(intervals.df$start <= x & intervals.df$end >= x)
return(data.frame(col=intervals.df$col[idx],interval=intervals.df$interval[idx]))
}
})))
这里我将 df$col
设置为 factor
并将标签设置为间隔,以便我可以在图例中绘制它们:
df$col <- factor(df$col,levels=intervals.df$col,labels=intervals.df$interval)
这还将显示所有间隔,包括 df$val
可能未覆盖的间隔,但我想要那个。
下面是我尝试绘制它的方式:
library(ggplot2)
ggplot(df,aes(x=x,y=y,colour=col))+geom_point(cex=2,shape=1,stroke=1)+labs(x="X",y="Y")+theme_bw()+theme(legend.key=element_blank(),panel.border=element_blank(),strip.background=element_blank())+scale_shape(solid=T)+scale_fill_manual(drop=FALSE,values=levels(df$col),name="DE")
这让我很接近,但颜色不对:
所以我认为这个 plot 命令可以纠正(添加 scale_color_manual
):
ggplot(df,aes(x=x,y=y,colour=col))+geom_point(cex=2,shape=1,stroke=1)+labs(x="X",y="Y")+theme_bw()+theme(legend.key=element_blank(),panel.border=element_blank(),strip.background=element_blank())+scale_shape(solid=T)+scale_fill_manual(drop=FALSE,values=levels(df$col),name="DE")+scale_color_manual(drop=FALSE,values=levels(df$col),name="DE")
但这会引发错误:
Error in grDevices::col2rgb(colour, TRUE) : invalid color name '(0,1.96]'
那么,如何正确设置颜色(以及图例 name
正确)?
一个选项是在通过 intervals.df
设置级别后将颜色映射到 interval
,这样级别的顺序和级别的数量是正确的。使用 intervals.df
中的颜色,制作一个颜色的命名向量以传递给 scale_color_manual
.
# Set levels of interval via intervals.df
df$interval = factor(df$interval, levels=intervals.df$interval)
# Named vector of the colors based on intervals.df
colors = intervals.df$col
names(colors) = intervals.df$interval
ggplot(df, aes(x=x, y=y, colour=interval))+
geom_point(cex=2, shape=1, stroke=1) +
labs(x="X", y="Y")+
theme_bw()+
theme(legend.key=element_blank(),
panel.border=element_blank(), strip.background=element_blank())+
scale_color_manual(values = colors, name = "DE", drop = FALSE)
我有数据想用 ggplot
的 geom_point
:
set.seed(1)
df <- data.frame(x=rnorm(100),y=rnorm(100),val=c(rnorm(90),rep(NA,10)))
我按照df$val
:
intervals.df <- data.frame(interval=c("(-3,-2]","(-2,-0.999]","(-0.999,0]","(0,1.96]","(1.96,3.91]","(3.91,5.87]","not expressed"),
start=c(-3,-2,-0.999,0,1.96,3.91,NA),end=c(-2,-0.999,0,1.96,3.91,5.87,NA),
col=c("#2f3b61","#436CE8","#E0E0FF","#7d4343","#C74747","#EBCCD6","#D3D3D3"),stringsAsFactors=F)
df <- cbind(df,do.call(rbind,lapply(df$val,function(x){
if(is.na(x)){
return(data.frame(col=intervals.df$col[nrow(intervals.df)],interval=intervals.df$interval[nrow(intervals.df)]))
} else{
idx <- which(intervals.df$start <= x & intervals.df$end >= x)
return(data.frame(col=intervals.df$col[idx],interval=intervals.df$interval[idx]))
}
})))
这里我将 df$col
设置为 factor
并将标签设置为间隔,以便我可以在图例中绘制它们:
df$col <- factor(df$col,levels=intervals.df$col,labels=intervals.df$interval)
这还将显示所有间隔,包括 df$val
可能未覆盖的间隔,但我想要那个。
下面是我尝试绘制它的方式:
library(ggplot2)
ggplot(df,aes(x=x,y=y,colour=col))+geom_point(cex=2,shape=1,stroke=1)+labs(x="X",y="Y")+theme_bw()+theme(legend.key=element_blank(),panel.border=element_blank(),strip.background=element_blank())+scale_shape(solid=T)+scale_fill_manual(drop=FALSE,values=levels(df$col),name="DE")
这让我很接近,但颜色不对:
所以我认为这个 plot 命令可以纠正(添加 scale_color_manual
):
ggplot(df,aes(x=x,y=y,colour=col))+geom_point(cex=2,shape=1,stroke=1)+labs(x="X",y="Y")+theme_bw()+theme(legend.key=element_blank(),panel.border=element_blank(),strip.background=element_blank())+scale_shape(solid=T)+scale_fill_manual(drop=FALSE,values=levels(df$col),name="DE")+scale_color_manual(drop=FALSE,values=levels(df$col),name="DE")
但这会引发错误:
Error in grDevices::col2rgb(colour, TRUE) : invalid color name '(0,1.96]'
那么,如何正确设置颜色(以及图例 name
正确)?
一个选项是在通过 intervals.df
设置级别后将颜色映射到 interval
,这样级别的顺序和级别的数量是正确的。使用 intervals.df
中的颜色,制作一个颜色的命名向量以传递给 scale_color_manual
.
# Set levels of interval via intervals.df
df$interval = factor(df$interval, levels=intervals.df$interval)
# Named vector of the colors based on intervals.df
colors = intervals.df$col
names(colors) = intervals.df$interval
ggplot(df, aes(x=x, y=y, colour=interval))+
geom_point(cex=2, shape=1, stroke=1) +
labs(x="X", y="Y")+
theme_bw()+
theme(legend.key=element_blank(),
panel.border=element_blank(), strip.background=element_blank())+
scale_color_manual(values = colors, name = "DE", drop = FALSE)