Loess 按组错误平滑提取值

Question

我正在尝试提取按组平滑的数据集的值（就像我如何使用 geom_smooth() 获取它们一样，但我需要能够绘制实际拟合值）。我尝试了此线程中提供的所有解决方案但我一直收到以下所有解决方案的错误

来自'neat tidyverse way'的回答：

library(dplyr)
library(tidyr)
library(purrr)

models <- test %>%
  tidyr::nest(-id) %>%
  dplyr::mutate(
    # Perform loess calculation on each CpG group
    m = purrr::map(data, loess,
                   formula = y ~ x, span = .5),
    # Retrieve the fitted values from each model
    fitted = purrr::map(m, `[[`, "fitted")
  )

Warning message:
All elements of `...` must be named.
Did you want `data = c(x, y)`?

来自接受的答案：

test <- test %>% 
  group_by(id) %>% 
  arrange(id, x) %>% 
  mutate(Loess = predict(loess(y ~ x, span = .5, data=.),
                         data.frame(x = seq(min(x), max(x), 1))))

Error: Problem with `mutate()` input `Loess`.
x Input `Loess` can't be recycled to size 60.
ℹ Input `Loess` is `predict(...)`.
ℹ Input `Loess` must be size 60 or 1, not 3.
ℹ The error occurred in group 2: id = 2.

来自评论

test2 <- test %>% nest(-id) %>% 
  mutate(fit = map(data, ~ loess(y ~ x, .))) %>% 
  unnest(map2(fit, data, augment))

Error: object 'fit' not found
Run `rlang::last_error()` to see where the error occurred.
In addition: Warning message:
All elements of `...` must be named.
Did you want `data = c(x, y)`?

我对 tidyverse 不是很有经验，所以我不完全确定与那些示例相比我在做什么不同。非常感谢任何帮助。

#data
test<-structure(list(x = c(-8.09566199976713, -8.06147618447585, -8.00975809213924, 
-7.96200959687978, -7.91437069721383, -7.89794310600054, -7.90270753398918, 
-7.90758707202371, -7.91285423074951, -7.92478106752289, -7.93691825045689, 
-8.01874278066444, -7.963527427585, -7.93407938329611, -7.94323616248026, 
-7.94085332876942, -7.93203791319322, -7.94357702787401, -7.91377080575536, 
-7.88252433889846, -7.85499829071686, -7.86566601358475, -7.91275434062848, 
-7.96059415201768, -8.00264819208551, -8.02577207019623, -7.95146178490521, 
-7.86605403953235, -7.78476412041177, -7.88927033537369, -7.8876796916652, 
-7.95187190641719, -7.98401088593829, -8.00012692860024, -8.01433523159787, 
-8.00807092407049, -8.00178416213873, -8.00011043534128, -8.02862971973644, 
-8.14101616921556, -8.05197547777147, -8.06579012466988, -8.07977339594891, 
-9.91048749031936, -9.66560592747977, -10.6156432290275, -10.8015737085674, 
-11.1501986169795, -11.7159178445694, -11.852600007071, -11.9500158919174, 
-11.8099796968354, -11.5041369336859, -11.2139686794437, -10.9382871968791, 
-10.6564566029614, -9.79377558892992, -9.09630034541423, -10.9883787360288, 
-11.2269861445731, -11.4824854018762, -11.6079686487163, -10.428108852748, 
-9.6062104865303, -9.46949542587671, -9.85124539788883, -10.276205029234, 
-10.4776426742835, -10.6400079178346, -10.4147379554317, -10.2038339860429, 
-10.0934609736433, -10.0824486553681, -9.96530249613719, -9.94853909056431, 
-9.93151185565445, -9.91417478382807, -9.90180880165919, -9.91333691008172, 
-10.0373498803589, -10.1768025338422, -10.320572551138, -10.4688466820861, 
-10.6218220581835, -10.6343234090347, -10.3264187862842, -9.98775786739598, 
-9.58260323586509, -9.42043071710701, -9.43924036934577, -9.52799164804214, 
-9.62866487252844, -9.72887391315455, -9.81243902432872, -9.70821477146116, 
-10.349558979838, -10.1458405548694, -9.99415308822563, -9.84536020757233, 
-9.79599547035806, -9.75295135996101, -9.79904377903862, -9.25340041876441, 
-15.3471460066526, -15.8962098900031, -16.4807765780841, -16.3237393813877, 
-17.3021769512891, -16.3800352448021, -15.8070127149332, -15.3891912546897, 
-14.9758961247955, -14.3049193201036, -13.6951530131572, -13.6426403073008, 
-13.7192755520964, -13.317953401665, -12.9213325990039, -12.4594681682978, 
-12.0191583645286, -11.8053727831142, -11.6872352172752, -11.5737935404884, 
-11.4107390421452, -11.1892161252634, -10.993043829208, -10.9330292789471, 
-10.8749587447015, -10.839113401307, -10.812108561392, -10.9713463919414, 
-11.1680590680373, -11.2539207698049, -11.3405390697112, -11.4280103071663, 
-11.5164358909023, -11.6059223744961, -11.6965817015189, -11.6020622433579, 
-11.5436710566386, -11.5081631613201, -11.6534368262729, -11.808341217397, 
-11.9736843692008, -12.1503511448775), y = c(0.657500808303136, 
0.738124328538192, 0.846237100479524, 0.905780104908876, 0.964065611141255, 
0.97130448683885, 0.961952809454258, 0.952520035393847, 0.942792088995166, 
0.927852423620437, 0.912757550459065, 0.759400682943845, 0.816109383509902, 
0.87020033850233, 0.915815641213451, 0.967873386043728, 1.02174887297134, 
1.05888614752619, 1.09417181629346, 1.12927656453054, 1.15845597740891, 
1.14281223660865, 1.09951983390484, 1.05556252495082, 1.02150454110485, 
1.00407662866727, 1.0383078668771, 1.07743614470557, 1.1142850320069, 
0.849172703363773, 0.928570803393031, 1.03718890041412, 1.04033830574734, 
1.03347118859473, 1.02808408212257, 1.03805023835948, 1.04800826979005, 
1.05065203275594, 1.00517755278862, 0.816786352254452, 0.987099861643988, 
0.986294681427012, 0.985411697682411, 6.30490194156339, 6.23591979381374, 
6.54324631274467, 6.69393500338639, 6.89041829514468, 7.17507154906183, 
7.18466565037316, 7.17069075506272, 7.09749953887751, 6.99963625007418, 
6.90682321485407, 6.818675464992, 6.72319228655747, 6.35779144412249, 
6.07675755691038, 6.71116104273407, 6.84049060491686, 6.97924607466303, 
7.05864889503111, 6.59454969605345, 6.27016677264436, 6.19033669905758, 
6.37773429443436, 6.58723774958637, 6.68536624180541, 6.76713349215161, 
6.63576249672338, 6.51308895413708, 6.43262161144123, 6.42705851495013, 
6.37935821335848, 6.37095186987229, 6.36245251810731, 6.35384217814464, 
6.34823344504427, 6.3568189796202, 6.40093531258066, 6.45018168045593, 
6.50106303671575, 6.55364891437324, 6.60801264620102, 6.61714449186606, 
6.55356637339973, 6.47759629080218, 6.34852444038794, 6.26379776807968, 
6.23209002392533, 6.22130259164532, 6.21289851462945, 6.20309983882547, 
6.18744917387348, 6.10894055296196, 6.25148917581075, 6.17293529625129, 
6.15075529839679, 6.12848960212141, 6.1326318372733, 6.13833262010545, 
6.16395466665727, 6.05567879845981, 6.25186104520219, 6.44281768293323, 
6.65324901403219, 6.72950091493087, 7.03322640156021, 6.88309899890625, 
6.70207702789022, 6.54079770390583, 6.38549695194146, 6.18839711001894, 
6.01102800783953, 5.87133691513539, 5.77306022102593, 5.67740669749789, 
5.58788061886448, 5.50325665174288, 5.42363100637011, 5.35172803418452, 
5.28384694735869, 5.21822380115916, 5.14919582516762, 5.07712142295026, 
5.0106828132089, 4.95786444890208, 4.90608940527112, 4.85658988565748, 
4.80812641990368, 4.77955320604372, 4.75486746882052, 4.73784287455848, 
4.72038983462283, 4.70251299307059, 4.68421637386314, 4.66550330220186, 
4.64637629394034, 4.64370313837652, 4.65439969630172, 4.67177327419859, 
4.74790550644152, 4.82753161090928, 4.91100496765981, 4.9987159528997
), id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L)), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 
22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 
35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 
48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 
61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 
74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 
87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 
100L, 101L, 102L, 103L, 131L, 132L, 133L, 134L, 135L, 136L, 137L, 
138L, 139L, 140L, 141L, 142L, 143L, 144L, 145L, 146L, 147L, 148L, 
149L, 150L, 151L, 152L, 153L, 154L, 155L, 156L, 157L, 158L, 159L, 
160L, 161L, 162L, 163L, 164L, 165L, 166L, 167L, 168L, 169L, 170L, 
171L, 172L), class = "data.frame")

Answer 1

predict 中的输入数据与原始数据的长度不同我建议将输出保存在 list 中，然后使用 unnest 得到一个长数据框。

library(dplyr)
library(tidyr)

test %>% 
  arrange(id, x) %>% 
  group_by(id) %>% 
  summarise(Loess = list(predict(loess(y ~ x, span = .5, data=cur_data()),
                         data.frame(x = seq(min(x), max(x), 1))))) %>%
  unnest(Loess)

 #    id Loess
 #  <int> <dbl>
# 1     1 0.784
# 2     2 7.20 
# 3     2 6.78 
# 4     2 6.34 
# 5     3 7.01 
# 6     3 6.73 
# 7     3 6.43 
# 8     3 6.11 
# 9     3 5.73 
#10     3 5.31 
#11     3 4.82

Answer 2

首先按 id 分组，然后将模型拟合到每个组的数据并使用 broom::augment 提取拟合值，作为奖励，提取残差。这保留了输入 x 和 y；例如，绘制 x 与 .fitted 会更容易。

library("tidyverse")

test %>%
  group_by(id) %>%
  group_modify(
    # .x refers to the subset of rows that belong to a group.
    # It's a smaller data frame with the same columns as the input
    # but fewer rows.
    ~ loess(y ~ x, span = .5, data = .x) %>% broom::augment()
  )
#> # A tibble: 145 × 5
#> # Groups:   id [3]
#>       id     y     x .fitted  .resid
#>    <int> <dbl> <dbl>   <dbl>   <dbl>
#>  1     1 0.658 -8.10   0.834 -0.176 
#>  2     1 0.738 -8.06   0.897 -0.159 
#>  3     1 0.846 -8.01   0.989 -0.142 
#>  4     1 0.906 -7.96   0.975 -0.0688
#>  5     1 0.964 -7.91   0.982 -0.0180
#>  6     1 0.971 -7.90   0.983 -0.0121
#>  7     1 0.962 -7.90   0.980 -0.0179
#>  8     1 0.953 -7.91   0.981 -0.0289
#>  9     1 0.943 -7.91   0.983 -0.0404
#> 10     1 0.928 -7.92   0.977 -0.0487
#> # … with 135 more rows

原始数据框的列数可能多于 x 和 y 未在 loess 拟合中使用的列数。要保留这些列，请将组数据 .x 也传递给 augment 函数。

test %>%
  mutate(
    # Extra columns that we don't need for the loess fit but we want to keep.
    z = rnorm(n()),
    w = rnorm(n())
  ) %>%
  group_by(id) %>%
  group_modify(
    # Now `broom::augment` appends .fitted and .resid to the original columns.
    ~ loess(y ~ x, span = .5, data = .x) %>% broom::augment(.x)
  )
#> # A tibble: 145 × 7
#> # Groups:   id [3]
#>       id     x     y      z       w .fitted  .resid
#>    <int> <dbl> <dbl>  <dbl>   <dbl>   <dbl>   <dbl>
#>  1     1 -8.10 0.658  1.31  -0.199    0.834 -0.176 
#>  2     1 -8.06 0.738  0.395 -1.74     0.897 -0.159 
#>  3     1 -8.01 0.846 -1.34   0.382    0.989 -0.142 
#>  4     1 -7.96 0.906  0.376  0.231    0.975 -0.0688
#>  5     1 -7.91 0.964 -0.224  0.327    0.982 -0.0180
#>  6     1 -7.90 0.971  0.678 -0.504    0.983 -0.0121
#>  7     1 -7.90 0.962  0.736 -0.0186   0.980 -0.0179
#>  8     1 -7.91 0.953  0.368  0.0313   0.981 -0.0289
#>  9     1 -7.91 0.943 -1.35   1.02     0.983 -0.0404
#> 10     1 -7.92 0.928  0.280 -0.471    0.977 -0.0487
#> # … with 135 more rows

^{由 reprex package (v2.0.1)}

于 2022-03-08 创建

Loess 按组错误平滑提取值

Loess smooth extracted values by group errors

r

loess

dplyr

tidyverse