用R dplyr中的一系列字符填充NA

Fill NA with a series of characters in R dplyr

我有一个看起来像这样的大数据框。每个玩家都被分配到一个组。

library(tidyverse)

df <- tibble(player=c(1,2,3,4,5),groups=c("group1","group2","group2",NA,NA))
df
#> # A tibble: 5 × 2
#>   player groups
#>    <dbl> <chr> 
#> 1      1 group1
#> 2      2 group2
#> 3      3 group2
#> 4      4 <NA>  
#> 5      5 <NA>

reprex package (v2.0.1) 于 2022-04-12 创建 有些玩家没有被分配到组中,我想连续填充它们 - 即像这样-

#> # A tibble: 5 × 2
#>   player groups
#>    <dbl> <chr> 
#> 1      1 group1
#> 2      2 group2
#> 3      3 group2
#> 4      4 group3
#> 5      5 group4

这很棘手,最后我认为我们可以这样做:

library(dplyr)

df %>% 
  mutate(x = cumsum(groups %in% NA)+1) %>% 
  mutate(groups = ifelse(is.na(groups), paste0("group", x+1), groups), .keep="unused")
  player groups
   <dbl> <chr> 
1      1 group1
2      2 group2
3      3 group2
4      4 group3
5      5 group4

dplyr

library(dplyr)
df %>%
  mutate(
    maxgrp = max(as.integer(gsub("[^0-9]", "", groups)), na.rm = TRUE),
    groups = if_else(is.na(groups), paste0("group", maxgrp + cumsum(is.na(groups))), groups)
  ) %>%
  select(-maxgrp)
# # A tibble: 5 x 2
#   player groups
#    <dbl> <chr> 
# 1      1 group1
# 2      2 group2
# 3      3 group2
# 4      4 group3
# 5      5 group4

data.table

library(data.table)
DT <- as.data.table(df)
DT[, groups := fifelse(
  is.na(groups),
  paste0("group", cumsum(is.na(groups)) + max(as.integer(gsub("[^0-9]", "", groups)), na.rm = TRUE)),
  groups) ]

你可以这样做:

df |>
  mutate(new_group = max(parse_number(groups), na.rm = TRUE) + cumsum(is.na(groups)),
         groups = if_else(is.na(groups), paste0("group", new_group), groups)) |> 
  select(-new_group)

使用一个略有不同的数据示例,在缺失值之后提到另一组,这会给你:

输入:

library(tidyverse)
df <- tibble(player=c(1,2,3,4,5,6),groups=c("group1","group2","group2",NA,NA, "group3"))
# A tibble: 6 x 2
  player groups
   <dbl> <chr> 
1      1 group1
2      2 group2
3      3 group2
4      4 NA    
5      5 NA    
6      6 group3

输出:

# A tibble: 6 x 2
  player groups
   <dbl> <chr> 
1      1 group1
2      2 group2
3      3 group2
4      4 group4
5      5 group5
6      6 group3