partykit - 修改箱线图的终端节点以在对数刻度中显示 y 轴
partykit - Modify the terminal node of a boxplot to display y axis in the log scale
我正在尝试使用 partykit
绘制由 rpart
生成的回归树。生成树的代码是这个:
library("rpart")
fit <- rpart(Price ~ Mileage + Type + Country, cu.summary)
library("partykit")
tree.2 <- as.party(fit)
plot(tree.2, type = "simple", terminal_panel = node_boxplot(tree.2,
col = "black", fill = "lightgray", width = 0.5, yscale = NULL,
ylines = 3, cex = 0.5, id = TRUE))
我正在尝试修改终端节点上的箱线图,以便 y 轴在对数刻度上。
我知道在尝试制作箱线图时,我们所要做的就是指定 boxplot(data, log="y")
。这就是为什么我试图仅在使用函数 boxplot
的单行中修改函数 node_boxplot
的原因。但是我一直得到相同的图表。有什么我想念的吗?任何反馈将不胜感激。
node_boxplot2<-function (obj, col = "black", fill = "lightgray", bg = "white",
width = 0.5, yscale = NULL, ylines = 3, cex = 0.5, id = TRUE,
mainlab = NULL, gp = gpar())
{
y <- log(obj$fitted[["(response)"]])
stopifnot(is.numeric(y))
if (is.null(yscale))
yscale <- range(y) +c(0,0.1)* diff(range(y))
rval <- function(node) {
nid <- id_node(node)
dat <- data_party(obj, nid)
yn <- dat[["(response)"]]
wn <- dat[["(weights)"]]
if (is.null(wn))
wn <- rep(1, length(yn))
x <- boxplot(rep.int(yn, wn),plot = FALSE)
top_vp <- viewport(layout = grid.layout(nrow = 2, ncol = 3,
widths = unit(c(ylines, 1, 1), c("lines", "null",
"lines")), heights = unit(c(1, 1), c("lines",
"null"))), width = unit(1, "npc"), height = unit(1,
"npc") - unit(2, "lines"), name = paste("node_boxplot",
nid, sep = ""), gp = gp)
pushViewport(top_vp)
grid.rect(gp = gpar(fill = bg, col = 0))
top <- viewport(layout.pos.col = 2, layout.pos.row = 1)
pushViewport(top)
if (is.null(mainlab)) {
mainlab <- if (id) {
function(id, nobs) sprintf("Node %s (n = %s)",
id, nobs)
}
else {
function(id, nobs) sprintf("n = %s", nobs)
}
}
if (is.function(mainlab)) {
mainlab <- mainlab(names(obj)[nid], sum(wn))
}
grid.text(mainlab)
popViewport()
plot <- viewport(layout.pos.col = 2, layout.pos.row = 2,
xscale = c(0, 1), yscale = yscale, name = paste0("node_boxplot",
nid, "plot"), clip = FALSE)
pushViewport(plot)
grid.yaxis()
grid.rect(gp = gpar(fill = "transparent"))
grid.clip()
xl <- 0.5 - width/4
xr <- 0.5 + width/4
grid.lines(unit(c(xl, xr), "npc"), unit(x$stats[1], "native"),
gp = gpar(col = col))
grid.lines(unit(0.5, "npc"), unit(x$stats[1:2], "native"),
gp = gpar(col = col, lty = 2))
grid.rect(unit(0.5, "npc"), unit(x$stats[2], "native"),
width = unit(width, "npc"), height = unit(diff(x$stats[c(2,
4)]), "native"), just = c("center", "bottom"),
gp = gpar(col = col, fill = fill))
grid.lines(unit(c(0.5 - width/2, 0.5 + width/2), "npc"),
unit(x$stats[3], "native"), gp = gpar(col = col,
lwd = 2))
grid.lines(unit(0.5, "npc"), unit(x$stats[4:5], "native"),
gp = gpar(col = col, lty = 2))
grid.lines(unit(c(xl, xr), "npc"), unit(x$stats[5], "native"),
gp = gpar(col = col))
n <- length(x$out)
if (n > 0) {
index <- 1:n
if (length(index) > 0)
grid.points(unit(rep.int(0.5, length(index)),
"npc"), unit(x$out[index], "native"), size = unit(cex,
"char"), gp = gpar(col = col))
}
upViewport(2)
}
return(rval)
}
(1) 如果在 log-scale 上绘图更合适,那么我通常希望在 log-scale 上更好地生长树。在这里,您可以简单地使用 rpart(log(Price) ~ ...)
.
(2) 如果您只想在节点箱线图中绘制不同的比例,则需要做更多的工作,因为箱线图是使用 grid.*()
函数绘制的 "by hand"。在下面的代码中,我转换了整体响应和节点中的响应,以通过获取日志来绘制。然后我根据需要修改 grid.yaxis()
。函数 node_logboxplot()
只是 node_boxplot()
的副本,并进行了一些简单的修改(由 #!!#
标记)。有了这个你可以做到
plot(tree.2, terminal_panel = node_logboxplot)
与
相比
plot(tree.2, terminal_panel = node_boxplot)
修改面板功能:
node_logboxplot <- function(obj,
col = "black",
fill = "lightgray",
bg = "white",
width = 0.5,
yscale = NULL,
ylines = 3,
cex = 0.5,
id = TRUE,
mainlab = NULL,
gp = gpar())
{
y <- log(obj$fitted[["(response)"]]) #!!# log-transform overall response
stopifnot(is.numeric(y))
if (is.null(yscale))
yscale <- range(y) + c(-0.1, 0.1) * diff(range(y))
#!!# compute yaxis labels on original scale
yaxis <- pretty(exp(y))
yaxis <- yaxis[yaxis > 0]
### panel function for boxplots in nodes
rval <- function(node) {
## extract data
nid <- id_node(node)
dat <- data_party(obj, nid)
yn <- log(dat[["(response)"]]) #!!# log-transform response in node
wn <- dat[["(weights)"]]
if(is.null(wn)) wn <- rep(1, length(yn))
## parameter setup
x <- boxplot(rep.int(yn, wn), plot = FALSE)
top_vp <- viewport(layout = grid.layout(nrow = 2, ncol = 3,
widths = unit(c(ylines, 1, 1),
c("lines", "null", "lines")),
heights = unit(c(1, 1), c("lines", "null"))),
width = unit(1, "npc"),
height = unit(1, "npc") - unit(2, "lines"),
name = paste("node_boxplot", nid, sep = ""),
gp = gp)
pushViewport(top_vp)
grid.rect(gp = gpar(fill = bg, col = 0))
## main title
top <- viewport(layout.pos.col=2, layout.pos.row=1)
pushViewport(top)
if (is.null(mainlab)) {
mainlab <- if(id) {
function(id, nobs) sprintf("Node %s (n = %s)", id, nobs)
} else {
function(id, nobs) sprintf("n = %s", nobs)
}
}
if (is.function(mainlab)) {
mainlab <- mainlab(names(obj)[nid], sum(wn))
}
grid.text(mainlab)
popViewport()
plot <- viewport(layout.pos.col = 2, layout.pos.row = 2,
xscale = c(0, 1), yscale = yscale,
name = paste0("node_boxplot", nid, "plot"),
clip = FALSE)
pushViewport(plot)
grid.yaxis(at = log(yaxis), label = yaxis) #!!# use pre-computed axis labels
grid.rect(gp = gpar(fill = "transparent"))
grid.clip()
xl <- 0.5 - width/4
xr <- 0.5 + width/4
## box & whiskers
grid.lines(unit(c(xl, xr), "npc"),
unit(x$stats[1], "native"), gp = gpar(col = col))
grid.lines(unit(0.5, "npc"),
unit(x$stats[1:2], "native"), gp = gpar(col = col, lty = 2))
grid.rect(unit(0.5, "npc"), unit(x$stats[2], "native"),
width = unit(width, "npc"), height = unit(diff(x$stats[c(2, 4)]), "native"),
just = c("center", "bottom"),
gp = gpar(col = col, fill = fill))
grid.lines(unit(c(0.5 - width/2, 0.5+width/2), "npc"),
unit(x$stats[3], "native"), gp = gpar(col = col, lwd = 2))
grid.lines(unit(0.5, "npc"), unit(x$stats[4:5], "native"),
gp = gpar(col = col, lty = 2))
grid.lines(unit(c(xl, xr), "npc"), unit(x$stats[5], "native"),
gp = gpar(col = col))
## outlier
n <- length(x$out)
if (n > 0) {
index <- 1:n ## which(x$out > yscale[1] & x$out < yscale[2])
if (length(index) > 0)
grid.points(unit(rep.int(0.5, length(index)), "npc"),
unit(x$out[index], "native"),
size = unit(cex, "char"), gp = gpar(col = col))
}
upViewport(2)
}
return(rval)
}
class(node_logboxplot) <- "grapcon_generator"
我正在尝试使用 partykit
绘制由 rpart
生成的回归树。生成树的代码是这个:
library("rpart")
fit <- rpart(Price ~ Mileage + Type + Country, cu.summary)
library("partykit")
tree.2 <- as.party(fit)
plot(tree.2, type = "simple", terminal_panel = node_boxplot(tree.2,
col = "black", fill = "lightgray", width = 0.5, yscale = NULL,
ylines = 3, cex = 0.5, id = TRUE))
我正在尝试修改终端节点上的箱线图,以便 y 轴在对数刻度上。
我知道在尝试制作箱线图时,我们所要做的就是指定 boxplot(data, log="y")
。这就是为什么我试图仅在使用函数 boxplot
的单行中修改函数 node_boxplot
的原因。但是我一直得到相同的图表。有什么我想念的吗?任何反馈将不胜感激。
node_boxplot2<-function (obj, col = "black", fill = "lightgray", bg = "white",
width = 0.5, yscale = NULL, ylines = 3, cex = 0.5, id = TRUE,
mainlab = NULL, gp = gpar())
{
y <- log(obj$fitted[["(response)"]])
stopifnot(is.numeric(y))
if (is.null(yscale))
yscale <- range(y) +c(0,0.1)* diff(range(y))
rval <- function(node) {
nid <- id_node(node)
dat <- data_party(obj, nid)
yn <- dat[["(response)"]]
wn <- dat[["(weights)"]]
if (is.null(wn))
wn <- rep(1, length(yn))
x <- boxplot(rep.int(yn, wn),plot = FALSE)
top_vp <- viewport(layout = grid.layout(nrow = 2, ncol = 3,
widths = unit(c(ylines, 1, 1), c("lines", "null",
"lines")), heights = unit(c(1, 1), c("lines",
"null"))), width = unit(1, "npc"), height = unit(1,
"npc") - unit(2, "lines"), name = paste("node_boxplot",
nid, sep = ""), gp = gp)
pushViewport(top_vp)
grid.rect(gp = gpar(fill = bg, col = 0))
top <- viewport(layout.pos.col = 2, layout.pos.row = 1)
pushViewport(top)
if (is.null(mainlab)) {
mainlab <- if (id) {
function(id, nobs) sprintf("Node %s (n = %s)",
id, nobs)
}
else {
function(id, nobs) sprintf("n = %s", nobs)
}
}
if (is.function(mainlab)) {
mainlab <- mainlab(names(obj)[nid], sum(wn))
}
grid.text(mainlab)
popViewport()
plot <- viewport(layout.pos.col = 2, layout.pos.row = 2,
xscale = c(0, 1), yscale = yscale, name = paste0("node_boxplot",
nid, "plot"), clip = FALSE)
pushViewport(plot)
grid.yaxis()
grid.rect(gp = gpar(fill = "transparent"))
grid.clip()
xl <- 0.5 - width/4
xr <- 0.5 + width/4
grid.lines(unit(c(xl, xr), "npc"), unit(x$stats[1], "native"),
gp = gpar(col = col))
grid.lines(unit(0.5, "npc"), unit(x$stats[1:2], "native"),
gp = gpar(col = col, lty = 2))
grid.rect(unit(0.5, "npc"), unit(x$stats[2], "native"),
width = unit(width, "npc"), height = unit(diff(x$stats[c(2,
4)]), "native"), just = c("center", "bottom"),
gp = gpar(col = col, fill = fill))
grid.lines(unit(c(0.5 - width/2, 0.5 + width/2), "npc"),
unit(x$stats[3], "native"), gp = gpar(col = col,
lwd = 2))
grid.lines(unit(0.5, "npc"), unit(x$stats[4:5], "native"),
gp = gpar(col = col, lty = 2))
grid.lines(unit(c(xl, xr), "npc"), unit(x$stats[5], "native"),
gp = gpar(col = col))
n <- length(x$out)
if (n > 0) {
index <- 1:n
if (length(index) > 0)
grid.points(unit(rep.int(0.5, length(index)),
"npc"), unit(x$out[index], "native"), size = unit(cex,
"char"), gp = gpar(col = col))
}
upViewport(2)
}
return(rval)
}
(1) 如果在 log-scale 上绘图更合适,那么我通常希望在 log-scale 上更好地生长树。在这里,您可以简单地使用 rpart(log(Price) ~ ...)
.
(2) 如果您只想在节点箱线图中绘制不同的比例,则需要做更多的工作,因为箱线图是使用 grid.*()
函数绘制的 "by hand"。在下面的代码中,我转换了整体响应和节点中的响应,以通过获取日志来绘制。然后我根据需要修改 grid.yaxis()
。函数 node_logboxplot()
只是 node_boxplot()
的副本,并进行了一些简单的修改(由 #!!#
标记)。有了这个你可以做到
plot(tree.2, terminal_panel = node_logboxplot)
与
相比plot(tree.2, terminal_panel = node_boxplot)
修改面板功能:
node_logboxplot <- function(obj,
col = "black",
fill = "lightgray",
bg = "white",
width = 0.5,
yscale = NULL,
ylines = 3,
cex = 0.5,
id = TRUE,
mainlab = NULL,
gp = gpar())
{
y <- log(obj$fitted[["(response)"]]) #!!# log-transform overall response
stopifnot(is.numeric(y))
if (is.null(yscale))
yscale <- range(y) + c(-0.1, 0.1) * diff(range(y))
#!!# compute yaxis labels on original scale
yaxis <- pretty(exp(y))
yaxis <- yaxis[yaxis > 0]
### panel function for boxplots in nodes
rval <- function(node) {
## extract data
nid <- id_node(node)
dat <- data_party(obj, nid)
yn <- log(dat[["(response)"]]) #!!# log-transform response in node
wn <- dat[["(weights)"]]
if(is.null(wn)) wn <- rep(1, length(yn))
## parameter setup
x <- boxplot(rep.int(yn, wn), plot = FALSE)
top_vp <- viewport(layout = grid.layout(nrow = 2, ncol = 3,
widths = unit(c(ylines, 1, 1),
c("lines", "null", "lines")),
heights = unit(c(1, 1), c("lines", "null"))),
width = unit(1, "npc"),
height = unit(1, "npc") - unit(2, "lines"),
name = paste("node_boxplot", nid, sep = ""),
gp = gp)
pushViewport(top_vp)
grid.rect(gp = gpar(fill = bg, col = 0))
## main title
top <- viewport(layout.pos.col=2, layout.pos.row=1)
pushViewport(top)
if (is.null(mainlab)) {
mainlab <- if(id) {
function(id, nobs) sprintf("Node %s (n = %s)", id, nobs)
} else {
function(id, nobs) sprintf("n = %s", nobs)
}
}
if (is.function(mainlab)) {
mainlab <- mainlab(names(obj)[nid], sum(wn))
}
grid.text(mainlab)
popViewport()
plot <- viewport(layout.pos.col = 2, layout.pos.row = 2,
xscale = c(0, 1), yscale = yscale,
name = paste0("node_boxplot", nid, "plot"),
clip = FALSE)
pushViewport(plot)
grid.yaxis(at = log(yaxis), label = yaxis) #!!# use pre-computed axis labels
grid.rect(gp = gpar(fill = "transparent"))
grid.clip()
xl <- 0.5 - width/4
xr <- 0.5 + width/4
## box & whiskers
grid.lines(unit(c(xl, xr), "npc"),
unit(x$stats[1], "native"), gp = gpar(col = col))
grid.lines(unit(0.5, "npc"),
unit(x$stats[1:2], "native"), gp = gpar(col = col, lty = 2))
grid.rect(unit(0.5, "npc"), unit(x$stats[2], "native"),
width = unit(width, "npc"), height = unit(diff(x$stats[c(2, 4)]), "native"),
just = c("center", "bottom"),
gp = gpar(col = col, fill = fill))
grid.lines(unit(c(0.5 - width/2, 0.5+width/2), "npc"),
unit(x$stats[3], "native"), gp = gpar(col = col, lwd = 2))
grid.lines(unit(0.5, "npc"), unit(x$stats[4:5], "native"),
gp = gpar(col = col, lty = 2))
grid.lines(unit(c(xl, xr), "npc"), unit(x$stats[5], "native"),
gp = gpar(col = col))
## outlier
n <- length(x$out)
if (n > 0) {
index <- 1:n ## which(x$out > yscale[1] & x$out < yscale[2])
if (length(index) > 0)
grid.points(unit(rep.int(0.5, length(index)), "npc"),
unit(x$out[index], "native"),
size = unit(cex, "char"), gp = gpar(col = col))
}
upViewport(2)
}
return(rval)
}
class(node_logboxplot) <- "grapcon_generator"