对 ggplot 使用 aes_ 而不是 aes_string

Using aes_ instead of aes_string for ggplot

我正在尝试遍历数据框 btest 的列子集(下面的数据)并绘制一些数据。我仍然不太了解 tidyeval 系统,尽管它似乎还没有在 ggplot2 中完全实现?

我可以使用以下代码执行此操作:

bcells <- LETTERS[1:9]
lapply(bcells, function(cell)
                ggplot(data = btest) + 
                geom_point(aes_string(x = 'response', 
                                      y = cell, 
                                      color = 'response')))

但是,aes_stringaes_ 的文档指出 aes_ 更可取。

aes_string and aes_ are particularly useful when writing functions that create plots because you can use strings or quoted names/calls to define the aesthetic mappings, rather than having to use substitute to generate a call to aes().

I recommend using aes_(), because creating the equivalents of aes(colour = "my colour") or aes{x = `X`} with aes_string() is quite clunky.

我的问题:我不明白为什么 aes_ 更可取,或者我如何在这里使用 aes_ 而不是 aes_string 来实现相同的代码。

我的数据如下:

structure(list(A = c(0.982753626864792, 0.490259710510256, 0.454306062926931, 
0.443880090600994, 0.992704372174903, 0.831160693384458, 0.740981059382211, 
0.971324123908582, 0.589614636646592, 0.663016559532728, 0.0442355006911685, 
0.758388810061299, 0.39969185826509, 0.96343122781417, 0.578266180477106, 
0.935289565081073, 0.954414616390872, 0.476708144579943, 0.906912570353835, 
0.860767420084079, 0.878055964477507, 0.717065411183562, 0.626679994549329, 
0.00471648517899614), B = c(0, 0.0359489937221843, 0.0455022610469154, 
0.0245991717594771, 0, 0.0150049906282552, 0.0357444181630181, 
0.00409885247542489, 0.0153491030612378, 0.0116215258999156, 
0.0971266806096337, 0.00523783384210994, 0.000104337240641022, 
0.000475801870965878, 0.0388763858222573, 0, 0.00947456311935685, 
0.091028293882243, 0, 0.00114639793715674, 0.01709111810953, 
0.00642564686487351, 0.0122005596623948, 0.0735538728126912), 
    C = c(0.00396910361917928, 0.3726373775819, 0.389150409858995, 
    0.236004149077653, 0.00654522233764124, 0.0757762646142197, 
    0.064467293054645, 0.021714367242937, 0.0510773710590119, 
    0.279742249706993, 0.228435750343793, 0.0163968987432784, 
    0.0386501968648076, 0.0147706021018908, 0.117796354856799, 
    0.0262705486829676, 0.0116437209145554, 0.249652632820836, 
    0.0708357724821996, 0.115182751748712, 0.0485081551895102, 
    0.0430306406326062, 0.0185687667917195, 0.062321917083855
    ), D = c(0, 0, 0.00728600019514972, 0.00320524248329104, 
    0, 0.0063037888029564, 0.00654538187729239, 0, 0.0176038859003177, 
    0, 0.0181870750390433, 0.00152581718814669, 0.00977725964480791, 
    0, 0, 0.00447626637015039, 0, 0, 0, 0, 0, 0.0453332704320773, 
    0, 0), E = c(0, 0, 0, 0.000475498116547242, 0.000291416366767824, 
    0, 0, 0.000108185143509404, 0, 0, 0.000585484789620521, 0, 
    0.00119989502426795, 0.000562924764494004, 0, 0, 0.000232527879948303, 
    6.96708420418182e-05, 0.000472096790474276, 0.000545274075130702, 
    0.000572161953294472, 0, 0, 0.0111234621378363), F = c(0.0113619316667346, 
    0.0761221446319925, 0.0940043097282167, 0.181463421237771, 
    0.00045898912068803, 0.0379484560273567, 0.130661228056559, 
    0.00273248163097645, 0.27374951093064, 0.0456196648603633, 
    0.311899809955928, 0.200378764906006, 0.483217874497928, 
    0.0162868512293491, 0.187555044444225, 0.0336927109381938, 
    0.0179346325967824, 0.141906152617276, 0.0167439810037839, 
    0.0137012129908311, 0.0297632632518369, 0.170891255992311, 
    0.210301640776889, 0.0808642159093989), G = c(0.00140289433926378, 
    0.00867420181911554, 0, 0.0103474797609997, 0, 0.0217237781037489, 
    0, 2.19895985703425e-05, 0, 0, 0.113543191682212, 0.00442851495302812, 
    0, 0, 0.0176396645397039, 0.000270908927614937, 0.00629993909848545, 
    0.0406351052576609, 0.00503557936970754, 0.00865694316409033, 
    0.0191377054890488, 0, 0.0115149714931613, 0.243819008858108
    ), H = c(0, 0.00293624962565618, 0, 0.0109784338152019, 0, 
    0, 0, 0, 0.0368789010169724, 0, 0, 0.0104630777433289, 0.0618086409502145, 
    0, 0.0252902404777352, 0, 0, 0, 0, 0, 0, 0.0101281816458403, 
    0.051238858176748, 0), I = c(0.000512443510029838, 0.0134213221088962, 
    0.00975095624379213, 0.0890465131480651, 0, 0.0120820284390054, 
    0.0216006194662742, 0, 0.0157265913852278, 0, 0.185986506888601, 
    0.00318028256280335, 0.00554993751224351, 0.00447259221913014, 
    0.0345761293821743, 0, 0, 0, 0, 0, 0.00687163152927302, 0.00712559324872945, 
    0.0694952085497587, 0.523601038019114), `P-value` = c(9999, 
    9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999, 
    9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999, 
    9999, 9999, 9999), Correlation = c(0.787379117728473, 0.713767273835577, 
    0.432941448432532, 0.654688521787571, 0.690623129562749, 
    0.72269025999843, 0.535092134674879, 0.795288368310815, 0.754840745986047, 
    0.0872468087627683, 0.760738916041899, 0.875990453791969, 
    0.878637700077733, 0.851326230903871, 0.458259685017224, 
    0.815125101981778, 0.299231595131615, 0.613359452217542, 
    0.424264050686203, 0.691764490900993, 0.806704730396525, 
    0.602426815978143, 0.786361339790331, 0.871574807143838), 
    RMSE = c(0.698736121897212, 0.75020398425833, 0.901716663988092, 
    0.763690363629575, 0.758968447930353, 0.757909848657902, 
    0.8482750320726, 0.695776594753745, 0.668395739137566, 1.04995120161959, 
    0.654740332409367, 0.590052129521314, 0.484783647407576, 
    0.659059332792332, 0.890274043213301, 0.687194392331628, 
    0.966871968720401, 0.807189528281839, 0.906250907041538, 
    0.770762860306121, 0.704446496934398, 0.805908330153981, 
    0.652426738364919, 0.490900219800415), sample_id = c("x6494", 
    "x1867", "x5038", "x5118", "x4631", "x6126", "x2051", "x0346", 
    "x2056", "x4949", "x5784", "x7357", "x1509", "x9449", "x0167", 
    "x9521", "x1494", "x7623", "x9705", "x4810", "x3549", "x6336", 
    "x9699", "x8727"), patient_id = c("x6494", "x1867", "x5038", 
    "x5118", "x4631", "x6126", "x2051", "x0346", "x2056", "x4949", 
    "x5784", "x7357", "x1509", "x9449", "x0167", "x9521", "x1494", 
    "x7623", "x9705", "x4810", "x3549", "x6336", "x9699", "x8727"
    ), treated = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
    TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
    TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE), timing = c("post", 
    "pre", "post", "post", "post", "pre", "pre", "post", "pre", 
    "post", "pre", "post", "post", "post", "pre", "post", "pre", 
    "post", "post", "post", "pre", "post", "post", "pre"), response = c("nonresp", 
    "nonresp", "nonresp", "nonresp", "nonresp", "resp", "nonresp", 
    "nonresp", "nonresp", "nonresp", "nonresp", "nonresp", "resp", 
    "nonresp", "nonresp", "nonresp", "nonresp", "resp", "nonresp", 
    "nonresp", "nonresp", "nonresp", "resp", "nonresp"), dataset = c("sny", 
    "sny", "sny", "sny", "sny", "sny", "sny", "sny", "sny", "sny", 
    "sny", "sny", "sny", "sny", "sny", "sny", "sny", "sny", "sny", 
    "sny", "sny", "sny", "sny", "sny"), OS_status = c(1, 1, 1, 
    1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 
    0, 1), OS_time = c(36.5, 78.2142857142857, 62.5714285714286, 
    140.785714285714, 26.0714285714286, 99.0714285714286, 41.7142857142857, 
    151.214285714286, 203.357142857143, 36.5, 26.0714285714286, 
    104.285714285714, 234.642857142857, 31.2857142857143, 140.785714285714, 
    140.785714285714, 104.285714285714, 208.571428571429, 62.5714285714286, 
    20.8571428571429, 26.0714285714286, 359.785714285714, 229.428571428571, 
    20.8571428571429), filtercol = structure(c(2L, 3L, 2L, 2L, 
    2L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 
    2L, 3L, 2L, 2L, 3L), .Label = c("on", "post", "pre"), class = "factor")), class = "data.frame", row.names = c(NA, 
-24L), .Names = c("A", "B", "C", "D", "E", "F", "G", "H", "I", 
"P-value", "Correlation", "RMSE", "sample_id", "patient_id", 
"treated", "timing", "response", "dataset", "OS_status", "OS_time", 
"filtercol"))

除了以将代码存储为字符串不好为由的反对意见外,在文档中的示例中解释了更喜欢 aes_ 而不是 aes_string 背后的想法:

# You can't easily mimic these calls with aes_string
aes(`0`, colour = "smooth")
aes_(~ `0`, colour = "smooth")
# Ok, you can, but it requires a _lot_ of quotes
aes_string("`0`", colour = '"smooth"')

它还列出了如何让您的示例工作:

# Three ways of generating the same aesthetics
aes(mpg, wt, col = cyl)
aes_(quote(mpg), quote(wt), col = quote(cyl))
aes_(~mpg, ~wt, col = ~cyl)
aes_string("mpg", "wt", col = "cyl")
...
# Convert strings to names with as.name
var <- "cyl"
...
aes_(col = as.name(var))

那么,在上下文中,

library(ggplot2)

plots <- lapply(LETTERS[1:9], function(cell){
    ggplot(data = btest, 
           aes_(~response, as.name(cell), color = ~response)) + 
        geom_point()
})

plots[[3]]

但是,at some point in the future, ggplot2 NSE will change, likely to use rlang 所以它与 tidyverse 的其余部分更加统一,所以现在不要花太多精力去弄清楚它的微妙之处;如果你能让 aes_string 做你想做的事,那就去做吧。现在。

相反,我可能会建议一种完全不同的方法:将您的九个地块转换为一个多面的地块。您需要将数据重塑为长格式,但到那时它并不复杂:

library(tidyr)

btest %>% 
    gather(cell, value, A:I) %>% 
    ggplot(aes(response, value, color = response)) + 
    geom_point() + 
    facet_wrap(~cell)