用R dplyr中的一系列字符填充NA
Fill NA with a series of characters in R dplyr
我有一个看起来像这样的大数据框。每个玩家都被分配到一个组。
library(tidyverse)
df <- tibble(player=c(1,2,3,4,5),groups=c("group1","group2","group2",NA,NA))
df
#> # A tibble: 5 × 2
#> player groups
#> <dbl> <chr>
#> 1 1 group1
#> 2 2 group2
#> 3 3 group2
#> 4 4 <NA>
#> 5 5 <NA>
由 reprex package (v2.0.1) 于 2022-04-12 创建
有些玩家没有被分配到组中,我想连续填充它们 - 即像这样-
#> # A tibble: 5 × 2
#> player groups
#> <dbl> <chr>
#> 1 1 group1
#> 2 2 group2
#> 3 3 group2
#> 4 4 group3
#> 5 5 group4
这很棘手,最后我认为我们可以这样做:
library(dplyr)
df %>%
mutate(x = cumsum(groups %in% NA)+1) %>%
mutate(groups = ifelse(is.na(groups), paste0("group", x+1), groups), .keep="unused")
player groups
<dbl> <chr>
1 1 group1
2 2 group2
3 3 group2
4 4 group3
5 5 group4
dplyr
library(dplyr)
df %>%
mutate(
maxgrp = max(as.integer(gsub("[^0-9]", "", groups)), na.rm = TRUE),
groups = if_else(is.na(groups), paste0("group", maxgrp + cumsum(is.na(groups))), groups)
) %>%
select(-maxgrp)
# # A tibble: 5 x 2
# player groups
# <dbl> <chr>
# 1 1 group1
# 2 2 group2
# 3 3 group2
# 4 4 group3
# 5 5 group4
data.table
library(data.table)
DT <- as.data.table(df)
DT[, groups := fifelse(
is.na(groups),
paste0("group", cumsum(is.na(groups)) + max(as.integer(gsub("[^0-9]", "", groups)), na.rm = TRUE)),
groups) ]
你可以这样做:
df |>
mutate(new_group = max(parse_number(groups), na.rm = TRUE) + cumsum(is.na(groups)),
groups = if_else(is.na(groups), paste0("group", new_group), groups)) |>
select(-new_group)
使用一个略有不同的数据示例,在缺失值之后提到另一组,这会给你:
输入:
library(tidyverse)
df <- tibble(player=c(1,2,3,4,5,6),groups=c("group1","group2","group2",NA,NA, "group3"))
# A tibble: 6 x 2
player groups
<dbl> <chr>
1 1 group1
2 2 group2
3 3 group2
4 4 NA
5 5 NA
6 6 group3
输出:
# A tibble: 6 x 2
player groups
<dbl> <chr>
1 1 group1
2 2 group2
3 3 group2
4 4 group4
5 5 group5
6 6 group3
我有一个看起来像这样的大数据框。每个玩家都被分配到一个组。
library(tidyverse)
df <- tibble(player=c(1,2,3,4,5),groups=c("group1","group2","group2",NA,NA))
df
#> # A tibble: 5 × 2
#> player groups
#> <dbl> <chr>
#> 1 1 group1
#> 2 2 group2
#> 3 3 group2
#> 4 4 <NA>
#> 5 5 <NA>
由 reprex package (v2.0.1) 于 2022-04-12 创建 有些玩家没有被分配到组中,我想连续填充它们 - 即像这样-
#> # A tibble: 5 × 2
#> player groups
#> <dbl> <chr>
#> 1 1 group1
#> 2 2 group2
#> 3 3 group2
#> 4 4 group3
#> 5 5 group4
这很棘手,最后我认为我们可以这样做:
library(dplyr)
df %>%
mutate(x = cumsum(groups %in% NA)+1) %>%
mutate(groups = ifelse(is.na(groups), paste0("group", x+1), groups), .keep="unused")
player groups
<dbl> <chr>
1 1 group1
2 2 group2
3 3 group2
4 4 group3
5 5 group4
dplyr
library(dplyr)
df %>%
mutate(
maxgrp = max(as.integer(gsub("[^0-9]", "", groups)), na.rm = TRUE),
groups = if_else(is.na(groups), paste0("group", maxgrp + cumsum(is.na(groups))), groups)
) %>%
select(-maxgrp)
# # A tibble: 5 x 2
# player groups
# <dbl> <chr>
# 1 1 group1
# 2 2 group2
# 3 3 group2
# 4 4 group3
# 5 5 group4
data.table
library(data.table)
DT <- as.data.table(df)
DT[, groups := fifelse(
is.na(groups),
paste0("group", cumsum(is.na(groups)) + max(as.integer(gsub("[^0-9]", "", groups)), na.rm = TRUE)),
groups) ]
你可以这样做:
df |>
mutate(new_group = max(parse_number(groups), na.rm = TRUE) + cumsum(is.na(groups)),
groups = if_else(is.na(groups), paste0("group", new_group), groups)) |>
select(-new_group)
使用一个略有不同的数据示例,在缺失值之后提到另一组,这会给你:
输入:
library(tidyverse)
df <- tibble(player=c(1,2,3,4,5,6),groups=c("group1","group2","group2",NA,NA, "group3"))
# A tibble: 6 x 2
player groups
<dbl> <chr>
1 1 group1
2 2 group2
3 3 group2
4 4 NA
5 5 NA
6 6 group3
输出:
# A tibble: 6 x 2
player groups
<dbl> <chr>
1 1 group1
2 2 group2
3 3 group2
4 4 group4
5 5 group5
6 6 group3