使用 dplyr::group_by() 预测每组黄土回归后的特定 x 值

Predict specific x-value after loess regression on each group with dplyr::group_by()

我只想合并 predict 函数,以便根据常量 y 值预测 x 值。在这个例子中,我对 cat 组和 dog 组进行了 distance 测量,而不是 time。到目前为止,我所做的是为每个组建立一个具有拟合值的模型,并在那些观察到的时间点提取预测值。我遇到困难的地方是使用这些模型来预测 70 的恒定 y 值 (measurement),对此我没有进行准确的测量。也就是说,我想知道 catdog 花了多长时间(time)才达到 measurement == 70


到目前为止的代码:

library(dplyr)
library(tidyr)
library(purrr)

model <- df %>%
    nest(-sample) %>%
    drop_na() %>%
    group_by(sample) %>%
    mutate(m = purrr::map(data, loess,                 # Perform loess calculation on each sample_long group
           formula = measurement ~ time, span = 0.1),  # Make span as small as possible in order to draw the nearest straighest line
           fitted = purrr::map(m, `[[`, "fitted"))     # Retrieve the fitted values from each model

# Create prediction column
results <- model %>%
        dplyr::select(-m) %>%
        tidyr::unnest(cols = c(data, fitted))

可重现代码:

df <- structure(list(time = c(5.4919, 5.9919, 6.4919, 6.9919, 7.4919, 
7.9919, 8.4919, 8.9919, 9.4919, 9.9919, 10.4919, 10.9919, 11.4919, 
11.9919, 12.4919, 12.9919, 13.4919, 13.9919, 14.4919, 14.9919, 
15.4919, 15.9919, 16.4919, 16.9919, 17.4919, 17.9919, 18.4919, 
18.9919, 19.4919, 19.9919, 20.4919, 20.9919, 21.4919, 21.9919, 
22.4919, 22.9919, 23.4919, 23.9919, 24.4919, 24.9919, 25.4919, 
25.9919, 26.4919, 26.9919, 27.4919, 27.9919, 28.4919, 28.9919, 
29.4919, 29.9919, 30.4919, 30.9919, 31.4919, 31.9919, 32.4919, 
32.9919, 33.4919, 33.9919, 34.4919, 34.9919, 35.4919, 35.9919, 
36.4919, 36.9919, 37.4919, 37.9919, 38.4919, 38.9919, 39.4919, 
39.9919, 40.4919, 40.9919, 41.4919, 41.9919, 42.4919, 42.9919, 
43.4919, 43.9919, 44.4919, 44.9919, 45.4919, 45.9919, 46.4919, 
46.9919, 47.4919, 47.9919, 48.4919, 48.9919, 49.4919, 49.9919, 
50.4919, 50.9919, 51.4919, 51.9919, 52.4919, 52.9919, 53.4919, 
53.9919, 54.4919, 54.9919, 55.4919, 55.9919, 56.4919, 56.9919, 
57.4919, 57.9919, 58.4919, 58.9919, 59.4919, 59.9919, 60.4919, 
60.9919, 61.4919, 61.9919, 62.4919, 62.9919, 63.4919, 63.9919, 
64.4919, 64.9919, 65.4919, 65.9919, 66.4919, 66.9919, 67.4919, 
67.9919, 68.4919, 68.9919, 69.4919, 69.9919, 70.4919, 70.9919, 
71.4919, 71.9919, 5.4919, 5.9919, 6.4919, 6.9919, 7.4919, 7.9919, 
8.4919, 8.9919, 9.4919, 9.9919, 10.4919, 10.9919, 11.4919, 11.9919, 
12.4919, 12.9919, 13.4919, 13.9919, 14.4919, 14.9919, 15.4919, 
15.9919, 16.4919, 16.9919, 17.4919, 17.9919, 18.4919, 18.9919, 
19.4919, 19.9919, 20.4919, 20.9919, 21.4919, 21.9919, 22.4919, 
22.9919, 23.4919, 23.9919, 24.4919, 24.9919, 25.4919, 25.9919, 
26.4919, 26.9919, 27.4919, 27.9919, 28.4919, 28.9919, 29.4919, 
29.9919, 30.4919, 30.9919, 31.4919, 31.9919, 32.4919, 32.9919, 
33.4919, 33.9919, 34.4919, 34.9919, 35.4919, 35.9919, 36.4919, 
36.9919, 37.4919, 37.9919, 38.4919, 38.9919, 39.4919, 39.9919, 
40.4919, 40.9919, 41.4919, 41.9919, 42.4919, 42.9919, 43.4919, 
43.9919, 44.4919, 44.9919, 45.4919, 45.9919, 46.4919, 46.9919, 
47.4919, 47.9919, 48.4919, 48.9919, 49.4919, 49.9919, 50.4919, 
50.9919, 51.4919, 51.9919, 52.4919, 52.9919, 53.4919, 53.9919, 
54.4919, 54.9919, 55.4919, 55.9919, 56.4919, 56.9919, 57.4919, 
57.9919, 58.4919, 58.9919, 59.4919, 59.9919, 60.4919, 60.9919, 
61.4919, 61.9919, 62.4919, 62.9919, 63.4919, 63.9919, 64.4919, 
64.9919, 65.4919, 65.9919, 66.4919, 66.9919, 67.4919, 67.9919, 
68.4919, 68.9919, 69.4919, 69.9919, 70.4919, 70.9919, 71.4919, 
71.9919), measurement_type = structure(c(1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "distance", class = "factor"), 
    measurement = c(27.3, 27.7, 28.3, 29.1, 30, 31.1, 32.3, 33.6, 
    34.8, 36.2, 37.6, 39.2, 40.9, 42.6, 44.5, 46.4, 48.6, 50.7, 
    53.1, 55.6, 58.2, 60.9, 63.5, 66.4, 69.1, 72, 74.7, 77.2, 
    79.5, 82.3, 85, 87.4, 89.6, 91.8, 91.7, 92.5, 92.5, 92.7, 
    92.5, 92.2, 91.9, 91.7, 91.5, 91.2, 91, 90.8, 90.7, 90.6, 
    90.4, 90.4, 90.3, 90.2, 90.2, 90.2, 90.1, 90.1, 90.1, 90.1, 
    90.1, 90.1, 90.1, 90.1, 90.1, 90.1, 90.1, 90.1, 90.1, 90.1, 
    90.2, 90.2, 90.2, 90.2, 90.2, 90.2, 90.2, 90.2, 90.3, 90.3, 
    90.3, 90.2, 90.3, 90.3, 90.3, 90.3, 90.3, 90.3, 90.3, 90.3, 
    90.3, 90.3, 90.3, 90.2, 90.2, 90.2, 90.2, 90.2, 90.2, 90.1, 
    90.1, 90.1, 90.1, 90.1, 90, 90, 90, 89.9, 89.9, 89.8, 89.8, 
    89.7, 89.7, 89.7, 89.6, 89.5, 89.5, 89.4, 89.4, 89.4, 89.3, 
    89.2, 89.2, 89.1, 89.1, 89, 88.9, 88.9, 88.9, 88.7, 88.7, 
    88.7, 88.6, 88.6, 88.5, 88.5, 29.6, 31.5, 33.5, 35.8, 38.3, 
    40.8, 43.2, 45.5, 47.8, 50, 52.1, 54.3, 56.3, 58.3, 60.3, 
    62.2, 64, 66, 67.8, 69.7, 71.4, 73.3, 74.9, 76.6, 78.3, 79.7, 
    81.2, 82.6, 83.9, 85.2, 86.4, 87.6, 88.7, 89.9, 90.7, 91.7, 
    92.5, 93.2, 93.9, 94.4, 94.9, 95.2, 95.5, 95.7, 95.7, 95.7, 
    95.7, 95.6, 95.6, 95.6, 95.5, 95.6, 95.5, 95.5, 95.5, 95.5, 
    95.6, 95.6, 95.6, 95.7, 95.7, 95.7, 95.8, 95.8, 95.8, 95.8, 
    95.8, 95.9, 95.9, 95.9, 95.9, 96, 96, 96, 96.1, 96, 96, 96, 
    96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 
    95.9, 96, 95.9, 95.9, 95.8, 95.8, 95.8, 95.8, 95.8, 95.9, 
    95.7, 95.7, 95.6, 95.6, 95.6, 95.5, 95.6, 95.4, 95.4, 95.4, 
    95.3, 95.2, 95.3, 95.2, 95.2, 95.1, 95.1, 95.1, 95, 95, 94.9, 
    94.9, 94.9, 94.9, 94.8, 94.7, 94.6, 94.6, 94.6, 94.5, 94.6
    ), sample = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L), .Label = c("cat", "dog"), class = "factor")), row.names = c(NA, 
-268L), class = "data.frame")

我已经按照这个 SO 问题走了这么远:

感觉我把这个说得太简单了,但你不只是想要...

map(model$m, ~ predict(.x, newdata = 70))
[[1]]
[1] 88.66499

[[2]]
[1] 94.66321

反转预测的方向,因为它是双变量的

library(dplyr)
library(purrr)
library(tidyr)
model <- df %>%
  nest(-sample) %>%
  drop_na() %>%
  group_by(sample) %>%
  mutate(m = purrr::map(data, loess,                 # Perform loess calculation on each sample_long group
                        formula = time ~ measurement, span = 0.25),  # Make span as small as possible in order to draw the nearest straighest line
         fitted = purrr::map(m, `[[`, "fitted"))     # Retrieve the fitted values from each model
#> Warning: Problem with `mutate()` input `m`.
#> x pseudoinverse used at 90.2
#> ℹ Input `m` is `purrr::map(data, loess, formula = time ~ measurement, span = 0.25)`.


names(model$m) <-  model$sample
map(model$m, ~ predict(.x, newdata = 70))
#> $cat
#> [1] 17.08772
#> 
#> $dog
#> [1] 15.03579