对 ggplot 使用 aes_ 而不是 aes_string
Using aes_ instead of aes_string for ggplot
我正在尝试遍历数据框 btest
的列子集(下面的数据)并绘制一些数据。我仍然不太了解 tidyeval 系统,尽管它似乎还没有在 ggplot2 中完全实现?
我可以使用以下代码执行此操作:
bcells <- LETTERS[1:9]
lapply(bcells, function(cell)
ggplot(data = btest) +
geom_point(aes_string(x = 'response',
y = cell,
color = 'response')))
但是,aes_string
和 aes_
的文档指出 aes_
更可取。
aes_string
and aes_
are particularly useful when writing functions
that create plots because you can use strings or quoted names/calls to
define the aesthetic mappings, rather than having to use substitute to
generate a call to aes()
.
I recommend using aes_()
, because creating the equivalents of
aes(colour = "my colour")
or aes{x = `X`}
with aes_string()
is quite
clunky.
我的问题:我不明白为什么 aes_ 更可取,或者我如何在这里使用 aes_
而不是 aes_string
来实现相同的代码。
我的数据如下:
structure(list(A = c(0.982753626864792, 0.490259710510256, 0.454306062926931,
0.443880090600994, 0.992704372174903, 0.831160693384458, 0.740981059382211,
0.971324123908582, 0.589614636646592, 0.663016559532728, 0.0442355006911685,
0.758388810061299, 0.39969185826509, 0.96343122781417, 0.578266180477106,
0.935289565081073, 0.954414616390872, 0.476708144579943, 0.906912570353835,
0.860767420084079, 0.878055964477507, 0.717065411183562, 0.626679994549329,
0.00471648517899614), B = c(0, 0.0359489937221843, 0.0455022610469154,
0.0245991717594771, 0, 0.0150049906282552, 0.0357444181630181,
0.00409885247542489, 0.0153491030612378, 0.0116215258999156,
0.0971266806096337, 0.00523783384210994, 0.000104337240641022,
0.000475801870965878, 0.0388763858222573, 0, 0.00947456311935685,
0.091028293882243, 0, 0.00114639793715674, 0.01709111810953,
0.00642564686487351, 0.0122005596623948, 0.0735538728126912),
C = c(0.00396910361917928, 0.3726373775819, 0.389150409858995,
0.236004149077653, 0.00654522233764124, 0.0757762646142197,
0.064467293054645, 0.021714367242937, 0.0510773710590119,
0.279742249706993, 0.228435750343793, 0.0163968987432784,
0.0386501968648076, 0.0147706021018908, 0.117796354856799,
0.0262705486829676, 0.0116437209145554, 0.249652632820836,
0.0708357724821996, 0.115182751748712, 0.0485081551895102,
0.0430306406326062, 0.0185687667917195, 0.062321917083855
), D = c(0, 0, 0.00728600019514972, 0.00320524248329104,
0, 0.0063037888029564, 0.00654538187729239, 0, 0.0176038859003177,
0, 0.0181870750390433, 0.00152581718814669, 0.00977725964480791,
0, 0, 0.00447626637015039, 0, 0, 0, 0, 0, 0.0453332704320773,
0, 0), E = c(0, 0, 0, 0.000475498116547242, 0.000291416366767824,
0, 0, 0.000108185143509404, 0, 0, 0.000585484789620521, 0,
0.00119989502426795, 0.000562924764494004, 0, 0, 0.000232527879948303,
6.96708420418182e-05, 0.000472096790474276, 0.000545274075130702,
0.000572161953294472, 0, 0, 0.0111234621378363), F = c(0.0113619316667346,
0.0761221446319925, 0.0940043097282167, 0.181463421237771,
0.00045898912068803, 0.0379484560273567, 0.130661228056559,
0.00273248163097645, 0.27374951093064, 0.0456196648603633,
0.311899809955928, 0.200378764906006, 0.483217874497928,
0.0162868512293491, 0.187555044444225, 0.0336927109381938,
0.0179346325967824, 0.141906152617276, 0.0167439810037839,
0.0137012129908311, 0.0297632632518369, 0.170891255992311,
0.210301640776889, 0.0808642159093989), G = c(0.00140289433926378,
0.00867420181911554, 0, 0.0103474797609997, 0, 0.0217237781037489,
0, 2.19895985703425e-05, 0, 0, 0.113543191682212, 0.00442851495302812,
0, 0, 0.0176396645397039, 0.000270908927614937, 0.00629993909848545,
0.0406351052576609, 0.00503557936970754, 0.00865694316409033,
0.0191377054890488, 0, 0.0115149714931613, 0.243819008858108
), H = c(0, 0.00293624962565618, 0, 0.0109784338152019, 0,
0, 0, 0, 0.0368789010169724, 0, 0, 0.0104630777433289, 0.0618086409502145,
0, 0.0252902404777352, 0, 0, 0, 0, 0, 0, 0.0101281816458403,
0.051238858176748, 0), I = c(0.000512443510029838, 0.0134213221088962,
0.00975095624379213, 0.0890465131480651, 0, 0.0120820284390054,
0.0216006194662742, 0, 0.0157265913852278, 0, 0.185986506888601,
0.00318028256280335, 0.00554993751224351, 0.00447259221913014,
0.0345761293821743, 0, 0, 0, 0, 0, 0.00687163152927302, 0.00712559324872945,
0.0694952085497587, 0.523601038019114), `P-value` = c(9999,
9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999,
9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999,
9999, 9999, 9999), Correlation = c(0.787379117728473, 0.713767273835577,
0.432941448432532, 0.654688521787571, 0.690623129562749,
0.72269025999843, 0.535092134674879, 0.795288368310815, 0.754840745986047,
0.0872468087627683, 0.760738916041899, 0.875990453791969,
0.878637700077733, 0.851326230903871, 0.458259685017224,
0.815125101981778, 0.299231595131615, 0.613359452217542,
0.424264050686203, 0.691764490900993, 0.806704730396525,
0.602426815978143, 0.786361339790331, 0.871574807143838),
RMSE = c(0.698736121897212, 0.75020398425833, 0.901716663988092,
0.763690363629575, 0.758968447930353, 0.757909848657902,
0.8482750320726, 0.695776594753745, 0.668395739137566, 1.04995120161959,
0.654740332409367, 0.590052129521314, 0.484783647407576,
0.659059332792332, 0.890274043213301, 0.687194392331628,
0.966871968720401, 0.807189528281839, 0.906250907041538,
0.770762860306121, 0.704446496934398, 0.805908330153981,
0.652426738364919, 0.490900219800415), sample_id = c("x6494",
"x1867", "x5038", "x5118", "x4631", "x6126", "x2051", "x0346",
"x2056", "x4949", "x5784", "x7357", "x1509", "x9449", "x0167",
"x9521", "x1494", "x7623", "x9705", "x4810", "x3549", "x6336",
"x9699", "x8727"), patient_id = c("x6494", "x1867", "x5038",
"x5118", "x4631", "x6126", "x2051", "x0346", "x2056", "x4949",
"x5784", "x7357", "x1509", "x9449", "x0167", "x9521", "x1494",
"x7623", "x9705", "x4810", "x3549", "x6336", "x9699", "x8727"
), treated = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE), timing = c("post",
"pre", "post", "post", "post", "pre", "pre", "post", "pre",
"post", "pre", "post", "post", "post", "pre", "post", "pre",
"post", "post", "post", "pre", "post", "post", "pre"), response = c("nonresp",
"nonresp", "nonresp", "nonresp", "nonresp", "resp", "nonresp",
"nonresp", "nonresp", "nonresp", "nonresp", "nonresp", "resp",
"nonresp", "nonresp", "nonresp", "nonresp", "resp", "nonresp",
"nonresp", "nonresp", "nonresp", "resp", "nonresp"), dataset = c("sny",
"sny", "sny", "sny", "sny", "sny", "sny", "sny", "sny", "sny",
"sny", "sny", "sny", "sny", "sny", "sny", "sny", "sny", "sny",
"sny", "sny", "sny", "sny", "sny"), OS_status = c(1, 1, 1,
1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0,
0, 1), OS_time = c(36.5, 78.2142857142857, 62.5714285714286,
140.785714285714, 26.0714285714286, 99.0714285714286, 41.7142857142857,
151.214285714286, 203.357142857143, 36.5, 26.0714285714286,
104.285714285714, 234.642857142857, 31.2857142857143, 140.785714285714,
140.785714285714, 104.285714285714, 208.571428571429, 62.5714285714286,
20.8571428571429, 26.0714285714286, 359.785714285714, 229.428571428571,
20.8571428571429), filtercol = structure(c(2L, 3L, 2L, 2L,
2L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L,
2L, 3L, 2L, 2L, 3L), .Label = c("on", "post", "pre"), class = "factor")), class = "data.frame", row.names = c(NA,
-24L), .Names = c("A", "B", "C", "D", "E", "F", "G", "H", "I",
"P-value", "Correlation", "RMSE", "sample_id", "patient_id",
"treated", "timing", "response", "dataset", "OS_status", "OS_time",
"filtercol"))
除了以将代码存储为字符串不好为由的反对意见外,在文档中的示例中解释了更喜欢 aes_
而不是 aes_string
背后的想法:
# You can't easily mimic these calls with aes_string
aes(`0`, colour = "smooth")
aes_(~ `0`, colour = "smooth")
# Ok, you can, but it requires a _lot_ of quotes
aes_string("`0`", colour = '"smooth"')
它还列出了如何让您的示例工作:
# Three ways of generating the same aesthetics
aes(mpg, wt, col = cyl)
aes_(quote(mpg), quote(wt), col = quote(cyl))
aes_(~mpg, ~wt, col = ~cyl)
aes_string("mpg", "wt", col = "cyl")
...
# Convert strings to names with as.name
var <- "cyl"
...
aes_(col = as.name(var))
那么,在上下文中,
library(ggplot2)
plots <- lapply(LETTERS[1:9], function(cell){
ggplot(data = btest,
aes_(~response, as.name(cell), color = ~response)) +
geom_point()
})
plots[[3]]
但是,at some point in the future, ggplot2 NSE will change, likely to use rlang 所以它与 tidyverse 的其余部分更加统一,所以现在不要花太多精力去弄清楚它的微妙之处;如果你能让 aes_string
做你想做的事,那就去做吧。现在。
相反,我可能会建议一种完全不同的方法:将您的九个地块转换为一个多面的地块。您需要将数据重塑为长格式,但到那时它并不复杂:
library(tidyr)
btest %>%
gather(cell, value, A:I) %>%
ggplot(aes(response, value, color = response)) +
geom_point() +
facet_wrap(~cell)
我正在尝试遍历数据框 btest
的列子集(下面的数据)并绘制一些数据。我仍然不太了解 tidyeval 系统,尽管它似乎还没有在 ggplot2 中完全实现?
我可以使用以下代码执行此操作:
bcells <- LETTERS[1:9]
lapply(bcells, function(cell)
ggplot(data = btest) +
geom_point(aes_string(x = 'response',
y = cell,
color = 'response')))
但是,aes_string
和 aes_
的文档指出 aes_
更可取。
aes_string
andaes_
are particularly useful when writing functions that create plots because you can use strings or quoted names/calls to define the aesthetic mappings, rather than having to use substitute to generate a call toaes()
.I recommend using
aes_()
, because creating the equivalents ofaes(colour = "my colour")
oraes{x = `X`}
withaes_string()
is quite clunky.
我的问题:我不明白为什么 aes_ 更可取,或者我如何在这里使用 aes_
而不是 aes_string
来实现相同的代码。
我的数据如下:
structure(list(A = c(0.982753626864792, 0.490259710510256, 0.454306062926931,
0.443880090600994, 0.992704372174903, 0.831160693384458, 0.740981059382211,
0.971324123908582, 0.589614636646592, 0.663016559532728, 0.0442355006911685,
0.758388810061299, 0.39969185826509, 0.96343122781417, 0.578266180477106,
0.935289565081073, 0.954414616390872, 0.476708144579943, 0.906912570353835,
0.860767420084079, 0.878055964477507, 0.717065411183562, 0.626679994549329,
0.00471648517899614), B = c(0, 0.0359489937221843, 0.0455022610469154,
0.0245991717594771, 0, 0.0150049906282552, 0.0357444181630181,
0.00409885247542489, 0.0153491030612378, 0.0116215258999156,
0.0971266806096337, 0.00523783384210994, 0.000104337240641022,
0.000475801870965878, 0.0388763858222573, 0, 0.00947456311935685,
0.091028293882243, 0, 0.00114639793715674, 0.01709111810953,
0.00642564686487351, 0.0122005596623948, 0.0735538728126912),
C = c(0.00396910361917928, 0.3726373775819, 0.389150409858995,
0.236004149077653, 0.00654522233764124, 0.0757762646142197,
0.064467293054645, 0.021714367242937, 0.0510773710590119,
0.279742249706993, 0.228435750343793, 0.0163968987432784,
0.0386501968648076, 0.0147706021018908, 0.117796354856799,
0.0262705486829676, 0.0116437209145554, 0.249652632820836,
0.0708357724821996, 0.115182751748712, 0.0485081551895102,
0.0430306406326062, 0.0185687667917195, 0.062321917083855
), D = c(0, 0, 0.00728600019514972, 0.00320524248329104,
0, 0.0063037888029564, 0.00654538187729239, 0, 0.0176038859003177,
0, 0.0181870750390433, 0.00152581718814669, 0.00977725964480791,
0, 0, 0.00447626637015039, 0, 0, 0, 0, 0, 0.0453332704320773,
0, 0), E = c(0, 0, 0, 0.000475498116547242, 0.000291416366767824,
0, 0, 0.000108185143509404, 0, 0, 0.000585484789620521, 0,
0.00119989502426795, 0.000562924764494004, 0, 0, 0.000232527879948303,
6.96708420418182e-05, 0.000472096790474276, 0.000545274075130702,
0.000572161953294472, 0, 0, 0.0111234621378363), F = c(0.0113619316667346,
0.0761221446319925, 0.0940043097282167, 0.181463421237771,
0.00045898912068803, 0.0379484560273567, 0.130661228056559,
0.00273248163097645, 0.27374951093064, 0.0456196648603633,
0.311899809955928, 0.200378764906006, 0.483217874497928,
0.0162868512293491, 0.187555044444225, 0.0336927109381938,
0.0179346325967824, 0.141906152617276, 0.0167439810037839,
0.0137012129908311, 0.0297632632518369, 0.170891255992311,
0.210301640776889, 0.0808642159093989), G = c(0.00140289433926378,
0.00867420181911554, 0, 0.0103474797609997, 0, 0.0217237781037489,
0, 2.19895985703425e-05, 0, 0, 0.113543191682212, 0.00442851495302812,
0, 0, 0.0176396645397039, 0.000270908927614937, 0.00629993909848545,
0.0406351052576609, 0.00503557936970754, 0.00865694316409033,
0.0191377054890488, 0, 0.0115149714931613, 0.243819008858108
), H = c(0, 0.00293624962565618, 0, 0.0109784338152019, 0,
0, 0, 0, 0.0368789010169724, 0, 0, 0.0104630777433289, 0.0618086409502145,
0, 0.0252902404777352, 0, 0, 0, 0, 0, 0, 0.0101281816458403,
0.051238858176748, 0), I = c(0.000512443510029838, 0.0134213221088962,
0.00975095624379213, 0.0890465131480651, 0, 0.0120820284390054,
0.0216006194662742, 0, 0.0157265913852278, 0, 0.185986506888601,
0.00318028256280335, 0.00554993751224351, 0.00447259221913014,
0.0345761293821743, 0, 0, 0, 0, 0, 0.00687163152927302, 0.00712559324872945,
0.0694952085497587, 0.523601038019114), `P-value` = c(9999,
9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999,
9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999,
9999, 9999, 9999), Correlation = c(0.787379117728473, 0.713767273835577,
0.432941448432532, 0.654688521787571, 0.690623129562749,
0.72269025999843, 0.535092134674879, 0.795288368310815, 0.754840745986047,
0.0872468087627683, 0.760738916041899, 0.875990453791969,
0.878637700077733, 0.851326230903871, 0.458259685017224,
0.815125101981778, 0.299231595131615, 0.613359452217542,
0.424264050686203, 0.691764490900993, 0.806704730396525,
0.602426815978143, 0.786361339790331, 0.871574807143838),
RMSE = c(0.698736121897212, 0.75020398425833, 0.901716663988092,
0.763690363629575, 0.758968447930353, 0.757909848657902,
0.8482750320726, 0.695776594753745, 0.668395739137566, 1.04995120161959,
0.654740332409367, 0.590052129521314, 0.484783647407576,
0.659059332792332, 0.890274043213301, 0.687194392331628,
0.966871968720401, 0.807189528281839, 0.906250907041538,
0.770762860306121, 0.704446496934398, 0.805908330153981,
0.652426738364919, 0.490900219800415), sample_id = c("x6494",
"x1867", "x5038", "x5118", "x4631", "x6126", "x2051", "x0346",
"x2056", "x4949", "x5784", "x7357", "x1509", "x9449", "x0167",
"x9521", "x1494", "x7623", "x9705", "x4810", "x3549", "x6336",
"x9699", "x8727"), patient_id = c("x6494", "x1867", "x5038",
"x5118", "x4631", "x6126", "x2051", "x0346", "x2056", "x4949",
"x5784", "x7357", "x1509", "x9449", "x0167", "x9521", "x1494",
"x7623", "x9705", "x4810", "x3549", "x6336", "x9699", "x8727"
), treated = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE), timing = c("post",
"pre", "post", "post", "post", "pre", "pre", "post", "pre",
"post", "pre", "post", "post", "post", "pre", "post", "pre",
"post", "post", "post", "pre", "post", "post", "pre"), response = c("nonresp",
"nonresp", "nonresp", "nonresp", "nonresp", "resp", "nonresp",
"nonresp", "nonresp", "nonresp", "nonresp", "nonresp", "resp",
"nonresp", "nonresp", "nonresp", "nonresp", "resp", "nonresp",
"nonresp", "nonresp", "nonresp", "resp", "nonresp"), dataset = c("sny",
"sny", "sny", "sny", "sny", "sny", "sny", "sny", "sny", "sny",
"sny", "sny", "sny", "sny", "sny", "sny", "sny", "sny", "sny",
"sny", "sny", "sny", "sny", "sny"), OS_status = c(1, 1, 1,
1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0,
0, 1), OS_time = c(36.5, 78.2142857142857, 62.5714285714286,
140.785714285714, 26.0714285714286, 99.0714285714286, 41.7142857142857,
151.214285714286, 203.357142857143, 36.5, 26.0714285714286,
104.285714285714, 234.642857142857, 31.2857142857143, 140.785714285714,
140.785714285714, 104.285714285714, 208.571428571429, 62.5714285714286,
20.8571428571429, 26.0714285714286, 359.785714285714, 229.428571428571,
20.8571428571429), filtercol = structure(c(2L, 3L, 2L, 2L,
2L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L,
2L, 3L, 2L, 2L, 3L), .Label = c("on", "post", "pre"), class = "factor")), class = "data.frame", row.names = c(NA,
-24L), .Names = c("A", "B", "C", "D", "E", "F", "G", "H", "I",
"P-value", "Correlation", "RMSE", "sample_id", "patient_id",
"treated", "timing", "response", "dataset", "OS_status", "OS_time",
"filtercol"))
除了以将代码存储为字符串不好为由的反对意见外,在文档中的示例中解释了更喜欢 aes_
而不是 aes_string
背后的想法:
# You can't easily mimic these calls with aes_string aes(`0`, colour = "smooth") aes_(~ `0`, colour = "smooth") # Ok, you can, but it requires a _lot_ of quotes aes_string("`0`", colour = '"smooth"')
它还列出了如何让您的示例工作:
# Three ways of generating the same aesthetics aes(mpg, wt, col = cyl) aes_(quote(mpg), quote(wt), col = quote(cyl)) aes_(~mpg, ~wt, col = ~cyl) aes_string("mpg", "wt", col = "cyl") ... # Convert strings to names with as.name var <- "cyl" ... aes_(col = as.name(var))
那么,在上下文中,
library(ggplot2)
plots <- lapply(LETTERS[1:9], function(cell){
ggplot(data = btest,
aes_(~response, as.name(cell), color = ~response)) +
geom_point()
})
plots[[3]]
但是,at some point in the future, ggplot2 NSE will change, likely to use rlang 所以它与 tidyverse 的其余部分更加统一,所以现在不要花太多精力去弄清楚它的微妙之处;如果你能让 aes_string
做你想做的事,那就去做吧。现在。
相反,我可能会建议一种完全不同的方法:将您的九个地块转换为一个多面的地块。您需要将数据重塑为长格式,但到那时它并不复杂:
library(tidyr)
btest %>%
gather(cell, value, A:I) %>%
ggplot(aes(response, value, color = response)) +
geom_point() +
facet_wrap(~cell)