DPLYR

Question

我在单个数据框中有一系列行。我正在尝试为每个 ID 聚合前两行 - 即 - 我想将 ID 1 的事件 1 和 2 合并到一行中，将 ID 2 的事件 1 和 2 合并到单个行等，但完全保留事件 3未受影响。

id <- c(1,1,1,2,2,2,3,3,3,4,4,4,5,5,5)

event <- c(1,2,3,1,2,3,1,2,3,1,2,3,1,2,3)

score <- c(3,NA,1,3,NA,2,6,NA,1,8,NA,2,4,NA,1)

score2 <- c(NA,4,1,NA,5,2,NA,0,3,NA,5,6,NA,8,7)

df <- tibble(id, event, score, score2)
# A tibble: 15 x 4
      id event score score2
   <dbl> <dbl> <dbl>  <dbl>
 1     1     1     3     NA
 2     1     2    NA      4
 3     1     3     1      1
 4     2     1     3     NA
 5     2     2    NA      5
 6     2     3     2      2
 7     3     1     6     NA
 8     3     2    NA      0
 9     3     3     1      3
10     4     1     8     NA
11     4     2    NA      5
12     4     3     2      6
13     5     1     4     NA
14     5     2    NA      8
15     5     3     1      7

我试过了：

 df_merged<- df %>% group_by (id) %>% summarise_all(funs(min(as.character(.),na.rm=TRUE))),

很好地聚合了这些，但后来我努力将它们合并回原始 dataframe/tibble（整个数据集中实际上有大约 300 个不同的“分数”列，所以 right_join到处都是 score.x、score.y、score2.x、score2.y 的头痛...)

理想情况下，情况需要是 dplyr，因为我的其余代码都在此运行！

编辑：

理想情况下，我的预期输出是：

# A tibble: 10 x 4
      id event score score2
   <dbl> <dbl> <dbl>  <dbl>
 1     1     1     3      4
 3     1     3     1      1
 4     2     1     3      5
 6     2     3     2      2
 7     3     1     6      0
 9     3     3     1      3
10     4     1     8      5
12     4     3     2      6
13     5     1     4      8
15     5     3     1      7

Answer 1

我们可能会更改 NA 个元素的顺序 replace

library(dplyr)
df %>%
    group_by(id) %>%
     mutate(across(starts_with('score'),
         ~replace(., 1:2, .[1:2][order(is.na(.[1:2]))])))  %>% 
    ungroup  %>%   
    filter(if_all(starts_with('score'), Negate(is.na)))

-输出

# A tibble: 10 x 4
      id event score score2
   <dbl> <dbl> <dbl>  <dbl>
 1     1     1     3      4
 2     1     3     1      1
 3     2     1     3      5
 4     2     3     2      2
 5     3     1     6      0
 6     3     3     1      3
 7     4     1     8      5
 8     4     3     2      6
 9     5     1     4      8
10     5     3     1      7

Answer 2

这是使用 tidyr 包中的 fill 完成任务的另一种方法：

library(dplyr)
library(tidyr)
df %>%
    group_by(id) %>%
    fill(everything(), .direction = "down") %>%
    fill(everything(), .direction = "up") %>%
    slice(1,3)

      id event score score2
   <dbl> <dbl> <dbl>  <dbl>
 1     1     1     3      4
 2     1     3     1      1
 3     2     1     3      5
 4     2     3     2      2
 5     3     1     6      0
 6     3     3     1      3
 7     4     1     8      5
 8     4     3     2      6
 9     5     1     4      8
10     5     3     1      7

Answer 3

这个怎么样？

library(dplyr)

df_e12 <- df %>%
  filter(event %in% c(1, 2)) %>% 
  group_by(id) %>% 
  mutate(across(starts_with("score"), ~min(.x, na.rm = TRUE))) %>% 
  ungroup() %>% 
  distinct(id, .keep_all = TRUE)
  
df_e3 <- df %>%
  filter(event == 3)

df <- bind_rows(df_e12, df_e3) %>% 
  arrange(id, event)

df

> df
# A tibble: 10 x 4
      id event score score2
   <dbl> <dbl> <dbl>  <dbl>
 1     1     1     3      4
 2     1     3     1      1
 3     2     1     3      5
 4     2     3     2      2
 5     3     1     6      0
 6     3     3     1      3
 7     4     1     8      5
 8     4     3     2      6
 9     5     1     4      8
10     5     3     1      7

DPLYR - 使用列值作为条件将行合并在一起

DPLYR - merging rows together using a column value as a conditional

r

rows