条件检查 - 稀疏数据
condition check - sparse data
我想检查一个条件,如果列是 == 'value' - 那么如果从列列表中,是任何列 == 'value2'
# create dummy data set
pb=c('1','0','0','0','0','1','Not_ans','1','0','Not_ans')
qa=c('1','1','0','0','1','0','Not_ans','1','Not_ans','Not_ans')
#zy=c('1','Not_ans','0','1','Not_ans','0','1','1','1','Not_ans')
#sub questions for pb
pb.abr=c('1','0','0','0','0','1','0','1','0','0')
pb.ras=c('0','0','0','0','1','0','0','1','0','0')
pb.sfg=c('1','0','0','0','0','0','0','1','0','0')
#sub questions for qa
qa.fgs=c('1','0','0','0','0','0','0','1','0','0')
qa.sdf=c('0','1','0','0','0','0','0','0','0','0')
qa.tyu=c('0','0','0','0','1','0','0','1','0','0')
df=data.frame(pb,qa,pb.abr,pb.ras,pb.sfg,qa.fgs,qa.sdf,qa.tyu)
df
pb qa pb.abr pb.ras pb.sfg qa.fgs qa.sdf qa.tyu
1 1 1 1 0 1 1 0 0
2 0 1 0 0 0 0 1 0
3 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0
5 0 1 0 1 0 0 0 1
6 1 0 1 0 0 0 0 0
7 Not_ans Not_ans 0 0 0 0 0 0
8 1 1 1 1 1 1 0 1
9 0 Not_ans 0 0 0 0 0 0
10 Not_ans Not_ans 0 0 0 0 0 0
在上面的数据集中 - 我想检查的是如果列 'pb' 是 0 ,那么如果有任何列 pb.abr OR pb.ras OR pb.sfg == 1
subset_df=subset(df,(pb==0) & ((pb.abr==1) | (pb.ras==1)|(pb.sfg==1)))
挑战是我有 100 多列具有 pb.xxx 格式并且手动写入子集中的所有列不是可行的解决方案。我如何使用使用 contains("pb.")
的逻辑检查上述内容,并在列之间使用 OR
条件检查列,最后提供数据框?
示例数据
dont' forget to set StringsAsFactors to FALSE!
df=data.frame(pb,qa,pb.abr,pb.ras,pb.sfg,qa.fgs,qa.sdf,qa.tyu, stringsAsFactors = FALSE)
代码
library(dplyr)
df %>%
#set all columns starting with 'pb.' to numeric
mutate_at( vars( starts_with("pb.") ), funs( as.numeric ) ) %>%
#first filter
filter( pb == "0" ) %>%
#second filter
filter( rowSums( .[, grep("pb\.", names(df))]) > 0 )
输出
pb qa pb.abr pb.ras pb.sfg qa.fgs qa.sdf qa.tyu
1 0 1 0 1 0 0 0 1
我们可以使用 filter_at
library(dplyr)
df %>%
filter(pb == 0) %>%
filter_at(vars(matches("pb\.")), any_vars(. == 1))
# pb qa pb.abr pb.ras pb.sfg qa.fgs qa.sdf qa.tyu
#1 0 1 0 1 0 0 0 1
或以 R 为基数
df[df$pb == 0 & rowSums(df[grep("pb\.", names(df))] == 1) > 0, ]
# pb qa pb.abr pb.ras pb.sfg qa.fgs qa.sdf qa.tyu
#5 0 1 0 1 0 0 0 1
作为一个班轮:
filter(df,pb==0 & rowSums(z[,grepl("pb\.",names(z))])>0)
我想检查一个条件,如果列是 == 'value' - 那么如果从列列表中,是任何列 == 'value2'
# create dummy data set
pb=c('1','0','0','0','0','1','Not_ans','1','0','Not_ans')
qa=c('1','1','0','0','1','0','Not_ans','1','Not_ans','Not_ans')
#zy=c('1','Not_ans','0','1','Not_ans','0','1','1','1','Not_ans')
#sub questions for pb
pb.abr=c('1','0','0','0','0','1','0','1','0','0')
pb.ras=c('0','0','0','0','1','0','0','1','0','0')
pb.sfg=c('1','0','0','0','0','0','0','1','0','0')
#sub questions for qa
qa.fgs=c('1','0','0','0','0','0','0','1','0','0')
qa.sdf=c('0','1','0','0','0','0','0','0','0','0')
qa.tyu=c('0','0','0','0','1','0','0','1','0','0')
df=data.frame(pb,qa,pb.abr,pb.ras,pb.sfg,qa.fgs,qa.sdf,qa.tyu)
df
pb qa pb.abr pb.ras pb.sfg qa.fgs qa.sdf qa.tyu
1 1 1 1 0 1 1 0 0
2 0 1 0 0 0 0 1 0
3 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0
5 0 1 0 1 0 0 0 1
6 1 0 1 0 0 0 0 0
7 Not_ans Not_ans 0 0 0 0 0 0
8 1 1 1 1 1 1 0 1
9 0 Not_ans 0 0 0 0 0 0
10 Not_ans Not_ans 0 0 0 0 0 0
在上面的数据集中 - 我想检查的是如果列 'pb' 是 0 ,那么如果有任何列 pb.abr OR pb.ras OR pb.sfg == 1
subset_df=subset(df,(pb==0) & ((pb.abr==1) | (pb.ras==1)|(pb.sfg==1)))
挑战是我有 100 多列具有 pb.xxx 格式并且手动写入子集中的所有列不是可行的解决方案。我如何使用使用 contains("pb.")
的逻辑检查上述内容,并在列之间使用 OR
条件检查列,最后提供数据框?
示例数据
dont' forget to set StringsAsFactors to FALSE!
df=data.frame(pb,qa,pb.abr,pb.ras,pb.sfg,qa.fgs,qa.sdf,qa.tyu, stringsAsFactors = FALSE)
代码
library(dplyr)
df %>%
#set all columns starting with 'pb.' to numeric
mutate_at( vars( starts_with("pb.") ), funs( as.numeric ) ) %>%
#first filter
filter( pb == "0" ) %>%
#second filter
filter( rowSums( .[, grep("pb\.", names(df))]) > 0 )
输出
pb qa pb.abr pb.ras pb.sfg qa.fgs qa.sdf qa.tyu 1 0 1 0 1 0 0 0 1
我们可以使用 filter_at
library(dplyr)
df %>%
filter(pb == 0) %>%
filter_at(vars(matches("pb\.")), any_vars(. == 1))
# pb qa pb.abr pb.ras pb.sfg qa.fgs qa.sdf qa.tyu
#1 0 1 0 1 0 0 0 1
或以 R 为基数
df[df$pb == 0 & rowSums(df[grep("pb\.", names(df))] == 1) > 0, ]
# pb qa pb.abr pb.ras pb.sfg qa.fgs qa.sdf qa.tyu
#5 0 1 0 1 0 0 0 1
作为一个班轮:
filter(df,pb==0 & rowSums(z[,grepl("pb\.",names(z))])>0)