基于 r 中行值的条件 if 语句
Conditional if statement based on row values in r
我是 R 的新手,非常感谢您在这方面的帮助。
我有一个数据框,2 个级别是 'Y' 和 'N' 指标,针对 11 个变量。
我想要一个新列,当行值等于 'Y'
时连接列名
即
在 base R 中,我们可以使用 which
创建一个 row/column 索引矩阵,其中值为 "Y"
。使用 tapply
,我们可以 paste
每行的列名。
cols <- paste0('col', 1:9)
mat <- which(df[cols] == 'Y', arr.ind = TRUE)
df$new_col <- as.character(tapply(names(df)[mat[, 2]], mat[, 1],
paste, collapse = "_"))
df
# col1 col2 col3 col4 col5 col6 col7 col8 col9 col10 col11 new_col
#1 N Y N Y Y Y N Y Y 1 624 col2_col4_col5_col6_col8_col9
#2 N Y N Y Y Y N Y N 7 548 col2_col4_col5_col6_col8
使用 tidyverse
我们可以得到长格式的数据,filter
行,其中 value
是 "Y"
,并为每一行粘贴列值。
library(dplyr)
df %>%
mutate(row = row_number()) %>%
tidyr::pivot_longer(cols = -c(col10, col11, row)) %>%
filter(value == 'Y') %>%
group_by(row, col10, col11) %>%
summarise(newcol = toString(name)) %>%
ungroup() %>%
select(-row)
数据
df <- structure(list(col1 = structure(c(1L, 1L), .Label = "N", class = "factor"),
col2 = structure(c(1L, 1L), .Label = "Y", class = "factor"),
col3 = structure(c(1L, 1L), .Label = "N", class = "factor"),
col4 = structure(c(1L, 1L), .Label = "Y", class = "factor"),
col5 = structure(c(1L, 1L), .Label = "Y", class = "factor"),
col6 = structure(c(1L, 1L), .Label = "Y", class = "factor"),
col7 = structure(c(1L, 1L), .Label = "N", class = "factor"),
col8 = structure(c(1L, 1L), .Label = "Y", class = "factor"),
col9 = structure(2:1, .Label = c("N", "Y"), class = "factor"),
col10 = c(1, 7), col11 = c(623.53, 548.028)), row.names = c(NA, -2L),
class = "data.frame")
一个简单的基础 R 方法是
df1$newcol <- apply(df1, 1, function(x){
paste(names(df1)[x == "Y"], collapse = "_")
})
测试数据创建代码。
set.seed(1234)
df1 <- t(replicate(2, sample(c("N", "Y"), 10, TRUE)))
df1 <- as.data.frame(df1)
df1 <- cbind(df1, matrix(1:4, 2))
names(df1) <- paste0("col", 1:ncol(df1))
我是 R 的新手,非常感谢您在这方面的帮助。
我有一个数据框,2 个级别是 'Y' 和 'N' 指标,针对 11 个变量。
我想要一个新列,当行值等于 'Y'
时连接列名即
在 base R 中,我们可以使用 which
创建一个 row/column 索引矩阵,其中值为 "Y"
。使用 tapply
,我们可以 paste
每行的列名。
cols <- paste0('col', 1:9)
mat <- which(df[cols] == 'Y', arr.ind = TRUE)
df$new_col <- as.character(tapply(names(df)[mat[, 2]], mat[, 1],
paste, collapse = "_"))
df
# col1 col2 col3 col4 col5 col6 col7 col8 col9 col10 col11 new_col
#1 N Y N Y Y Y N Y Y 1 624 col2_col4_col5_col6_col8_col9
#2 N Y N Y Y Y N Y N 7 548 col2_col4_col5_col6_col8
使用 tidyverse
我们可以得到长格式的数据,filter
行,其中 value
是 "Y"
,并为每一行粘贴列值。
library(dplyr)
df %>%
mutate(row = row_number()) %>%
tidyr::pivot_longer(cols = -c(col10, col11, row)) %>%
filter(value == 'Y') %>%
group_by(row, col10, col11) %>%
summarise(newcol = toString(name)) %>%
ungroup() %>%
select(-row)
数据
df <- structure(list(col1 = structure(c(1L, 1L), .Label = "N", class = "factor"),
col2 = structure(c(1L, 1L), .Label = "Y", class = "factor"),
col3 = structure(c(1L, 1L), .Label = "N", class = "factor"),
col4 = structure(c(1L, 1L), .Label = "Y", class = "factor"),
col5 = structure(c(1L, 1L), .Label = "Y", class = "factor"),
col6 = structure(c(1L, 1L), .Label = "Y", class = "factor"),
col7 = structure(c(1L, 1L), .Label = "N", class = "factor"),
col8 = structure(c(1L, 1L), .Label = "Y", class = "factor"),
col9 = structure(2:1, .Label = c("N", "Y"), class = "factor"),
col10 = c(1, 7), col11 = c(623.53, 548.028)), row.names = c(NA, -2L),
class = "data.frame")
一个简单的基础 R 方法是
df1$newcol <- apply(df1, 1, function(x){
paste(names(df1)[x == "Y"], collapse = "_")
})
测试数据创建代码。
set.seed(1234)
df1 <- t(replicate(2, sample(c("N", "Y"), 10, TRUE)))
df1 <- as.data.frame(df1)
df1 <- cbind(df1, matrix(1:4, 2))
names(df1) <- paste0("col", 1:ncol(df1))