如何将数据框行中的字符串复制到共享相同 ID 的所有后续行?
How to copy a character string in a dataframe row to all subsequent rows sharing the same ID?
假设我们从如下所示的数据框 df
开始:
ID Flag
1 1 NULL
2 1 NULL
3 1 FRY
4 1 CRY
5 1 NULL
6 5 CRY
7 5 NULL
8 5 NULL
ID <- c(1, 1, 1, 1,1, 5, 5, 5)
Flag <- c("NULL","NULL","FRY","CRY","NULL","CRY","NULL","NULL")
df <- data.frame(ID, Flag)
df
我想更改“Flag”列,以便第一次给定 ID 的 Flag 行不是“NULL”时,该非 NULL 项将复制到同一 ID 的所有剩余行.所以我们最终会得到以下数据框:
ID Flag [Explain]
1 1 NULL
2 1 NULL
3 1 FRY First row for ID 1 where Flag <> NULL, so apply row 3 FRY to all subsequent rows for ID 1
4 1 FRY Override original row 4 CRY since FRY came first
5 1 FRY FRY rules for all remaining ID = 1 rows
6 5 CRY First row for ID 5 where Flag <> NULL, so apply row 1 CRY to all subsequent rows for ID 5
7 5 CRY CRY rules for all remaining ID = 5 rows
8 5 CRY
这将如何使用 dplyr 完成?我一直在摆弄 group()
、fill()
、coalesce()
,但我绊倒了。
使用 tidyr::fill
和一些额外的数据整理你可以做:
library(dplyr)
library(tidyr)
df %>%
group_by(ID) %>%
mutate(Flag = ifelse(Flag != "NULL", first(Flag[Flag != "NULL"]), NA_character_)) %>%
fill(Flag) %>%
replace_na(list(Flag = "NULL")) %>%
ungroup()
#> # A tibble: 8 × 2
#> ID Flag
#> <dbl> <chr>
#> 1 1 NULL
#> 2 1 NULL
#> 3 1 FRY
#> 4 1 FRY
#> 5 1 FRY
#> 6 5 CRY
#> 7 5 CRY
#> 8 5 CRY
下面是一个借助小辅助函数执行此操作的方法。
library(dplyr)
fill_values <- function(x) {
#get the position of first non-NULL value
inds <- match(TRUE, x != "NULL")
#replace all the value from that position till end
#with the first non-NULL value.
x[inds:length(x)] <- x[inds]
x
}
#apply the function for each group (ID).
df %>%
group_by(ID) %>%
mutate(Flag = fill_values(Flag)) %>%
ungroup
# ID Flag
# <dbl> <chr>
#1 1 NULL
#2 1 NULL
#3 1 FRY
#4 1 FRY
#5 1 FRY
#6 5 CRY
#7 5 CRY
#8 5 CRY
# Base R solution:
transform(
df,
Flag = ave(
Flag,
ID,
FUN = function(x){
ifelse(
cumsum(x != "NULL") > 0,
x[min(which(x != "NULL"))],
"NULL"
)
}
)
)
# Dplyr solution:
library(dplyr)
df %>%
group_by(ID) %>%
mutate(
Flag = if_else(
cumsum(Flag != "NULL") > 0,
first(Flag[which(Flag != "NULL")]),
"NULL"
)
) %>%
ungroup()
# data.table solution:
library(data.table)
dt <- data.table(df)
dt[, Flag := ifelse(
cumsum(Flag != "NULL") > 0,
Flag[min(which(Flag != "NULL"))],
"NULL"),
by = ID
]
假设我们从如下所示的数据框 df
开始:
ID Flag
1 1 NULL
2 1 NULL
3 1 FRY
4 1 CRY
5 1 NULL
6 5 CRY
7 5 NULL
8 5 NULL
ID <- c(1, 1, 1, 1,1, 5, 5, 5)
Flag <- c("NULL","NULL","FRY","CRY","NULL","CRY","NULL","NULL")
df <- data.frame(ID, Flag)
df
我想更改“Flag”列,以便第一次给定 ID 的 Flag 行不是“NULL”时,该非 NULL 项将复制到同一 ID 的所有剩余行.所以我们最终会得到以下数据框:
ID Flag [Explain]
1 1 NULL
2 1 NULL
3 1 FRY First row for ID 1 where Flag <> NULL, so apply row 3 FRY to all subsequent rows for ID 1
4 1 FRY Override original row 4 CRY since FRY came first
5 1 FRY FRY rules for all remaining ID = 1 rows
6 5 CRY First row for ID 5 where Flag <> NULL, so apply row 1 CRY to all subsequent rows for ID 5
7 5 CRY CRY rules for all remaining ID = 5 rows
8 5 CRY
这将如何使用 dplyr 完成?我一直在摆弄 group()
、fill()
、coalesce()
,但我绊倒了。
使用 tidyr::fill
和一些额外的数据整理你可以做:
library(dplyr)
library(tidyr)
df %>%
group_by(ID) %>%
mutate(Flag = ifelse(Flag != "NULL", first(Flag[Flag != "NULL"]), NA_character_)) %>%
fill(Flag) %>%
replace_na(list(Flag = "NULL")) %>%
ungroup()
#> # A tibble: 8 × 2
#> ID Flag
#> <dbl> <chr>
#> 1 1 NULL
#> 2 1 NULL
#> 3 1 FRY
#> 4 1 FRY
#> 5 1 FRY
#> 6 5 CRY
#> 7 5 CRY
#> 8 5 CRY
下面是一个借助小辅助函数执行此操作的方法。
library(dplyr)
fill_values <- function(x) {
#get the position of first non-NULL value
inds <- match(TRUE, x != "NULL")
#replace all the value from that position till end
#with the first non-NULL value.
x[inds:length(x)] <- x[inds]
x
}
#apply the function for each group (ID).
df %>%
group_by(ID) %>%
mutate(Flag = fill_values(Flag)) %>%
ungroup
# ID Flag
# <dbl> <chr>
#1 1 NULL
#2 1 NULL
#3 1 FRY
#4 1 FRY
#5 1 FRY
#6 5 CRY
#7 5 CRY
#8 5 CRY
# Base R solution:
transform(
df,
Flag = ave(
Flag,
ID,
FUN = function(x){
ifelse(
cumsum(x != "NULL") > 0,
x[min(which(x != "NULL"))],
"NULL"
)
}
)
)
# Dplyr solution:
library(dplyr)
df %>%
group_by(ID) %>%
mutate(
Flag = if_else(
cumsum(Flag != "NULL") > 0,
first(Flag[which(Flag != "NULL")]),
"NULL"
)
) %>%
ungroup()
# data.table solution:
library(data.table)
dt <- data.table(df)
dt[, Flag := ifelse(
cumsum(Flag != "NULL") > 0,
Flag[min(which(Flag != "NULL"))],
"NULL"),
by = ID
]