基于R数据帧中静态函数的逻辑函数
logical functions based on static functions in R dataframe
我有一个非常大的 Excel 电子表格,其中包含许多 'checks' 观察结果(300 多列)。检查由布尔运算符(大于、等于)和一些 summation/subtraction:
组成
df <-data.frame(checkID = c(1,2,3,4), checkpart1 = c(50, 70, 111, 320),
checkpart2 = c("+", "==", "*", ">"), checkpart3 = c(18, 17, 6, 3), checkpart4 = c("==", NA, "-", NA), checkpart5 = c(80, NA,76,NA), checkpart6 = c(NA, NA, "==", NA), checkpart7 = c(NA,NA,590, NA))
head(df) ##this is the input
#checkID checkpart1 checkpart2 checkpart3 checkpart4 checkpart5 checkpart6 checkpart7
#1 50 + 18 == 80 <NA> NA
#2 70 == 17 <NA> NA <NA> NA
#3 111 * 6 - 76 == 590
#4 320 > 3 <NA> NA <NA> NA
INSERT CODE THAT MAKES THE EXCEL FUNCTIONS COME TO LIFE HERE.
Mind you that some rows have much longer checks than others, so you can't rely on the column names.
#outcome data frame should look like this, where the checks have been conducted:
View(outputchecks)
#checkID
#1 FALSE
#2 FALSE
#3 TRUE
#4 TRUE
有人知道 R 中的某些 tidyr/dplyr/other 应用程序可以在数据帧中执行这些 'static functions' 吗?
谢谢!
这是 eval/parse
的一种方式。首先用操作形成一个字符串,然后计算表达式。
txt <- apply(df[-1], 1, function(x) paste(trimws(x[!is.na(x)]), collapse = ""))
sapply(txt, function(x) eval(parse(text = x)))
# 50+18==80 70==17 111*6-76==590 320>3
# FALSE FALSE TRUE TRUE
这里有一个 tidyr
和 dplyr
的可能性:
library(tidyr)
library(dplyr)
df %>%
tibble() %>%
unite(check, starts_with("checkpart"), sep=" ", na.rm = TRUE) %>%
rowwise() %>%
mutate(check = eval(str2expression(check))) %>%
ungroup()
returns
# A tibble: 4 x 2
checkID check
<dbl> <lgl>
1 1 FALSE
2 2 FALSE
3 3 TRUE
4 4 TRUE
使用pmap
df <-data.frame(checkID = c(1,2,3,4), checkpart1 = c(50, 70, 111, 320),
checkpart2 = c("+", "==", "*", ">"), checkpart3 = c(18, 17, 6, 3), checkpart4 = c("==", NA, "-", NA), checkpart5 = c(80, NA,76,NA), checkpart6 = c(NA, NA, "==", NA), checkpart7 = c(NA,NA,590, NA))
library(tidyverse)
df %>% mutate(exp = pmap_lgl(df[-1], ~ eval(parse(text = paste(na.omit(c(...)), collapse = '')))))
#> checkID checkpart1 checkpart2 checkpart3 checkpart4 checkpart5 checkpart6
#> 1 1 50 + 18 == 80 <NA>
#> 2 2 70 == 17 <NA> NA <NA>
#> 3 3 111 * 6 - 76 ==
#> 4 4 320 > 3 <NA> NA <NA>
#> checkpart7 exp
#> 1 NA FALSE
#> 2 NA FALSE
#> 3 590 TRUE
#> 4 NA TRUE
由 reprex package (v2.0.0)
于 2021-07-04 创建
df <-data.frame(checkID = c(1,2,3,4), checkpart1 = c(50, 70, 111, 320),
checkpart2 = c("+", "==", "*", ">"), checkpart3 = c(18, 17, 6, 3), checkpart4 = c("==", NA, "-", NA), checkpart5 = c(80, NA,76,NA), checkpart6 = c(NA, NA, "==", NA), checkpart7 = c(NA,NA,590, NA))
library(tidyverse)
df %>% group_by(checkID) %>%
mutate(across(everything(), ~ifelse(is.na(.), '', as.character(.)))) %>%
rowwise() %>%
mutate(exp = eval(parse(text = paste(c_across(everything()), collapse = ''))))
# A tibble: 4 x 9
# Rowwise: checkID
checkID checkpart1 checkpart2 checkpart3 checkpart4 checkpart5 checkpart6 checkpart7 exp
<dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <lgl>
1 1 50 + 18 "==" "80" "" "" FALSE
2 2 70 == 17 "" "" "" "" FALSE
3 3 111 * 6 "-" "76" "==" "590" TRUE
4 4 320 > 3 "" "" "" "" TRUE
或者transmute
会得到
df %>% group_by(checkID) %>%
mutate(across(everything(), ~ifelse(is.na(.), '', as.character(.)))) %>%
rowwise() %>%
transmute(exp = eval(parse(text = paste(c_across(everything()), collapse = ''))))
# A tibble: 4 x 2
# Rowwise: checkID
checkID exp
<dbl> <lgl>
1 1 FALSE
2 2 FALSE
3 3 TRUE
4 4 TRUE
使用 summarise
也会删除组
df %>% group_by(checkID) %>%
mutate(across(everything(), ~ifelse(is.na(.), '', as.character(.)))) %>%
rowwise() %>%
summarise(exp = eval(parse(text = paste(c_across(everything()), collapse = ''))), .groups = 'drop')
# A tibble: 4 x 2
checkID exp
<dbl> <lgl>
1 1 FALSE
2 2 FALSE
3 3 TRUE
4 4 TRUE
您也可以使用以下解决方案:
- 我使用
pmap
函数将数据集中的每一行捕获为字符串,省略了第一个变量 (checkID)
- 然后我保留了每一行中的所有
NA
个值
- 之后,为了对我们的公式(现在以字符串形式)进行评估,我们首先需要将它们折叠成长度为
1
的字符串
- 然后我使用
rlang
中的 parse_expr
等效于基础 R 中的 eval
将字符串转换为表达式
- 最后我使用了
eval_tidy
这又是等价于来自基 R 的 eval
函数来评估我们的表达式
这里没有必要使用 rlang
,因为您可以很容易地使用基本 R 函数,但我的意思是向您展示替代方案。
library(purrr)
library(rlang)
df %>%
mutate(output = pmap_lgl(select(cur_data(), !checkID), ~ {x <- c(...)[!is.na(c(...))]
parse_expr(paste(x, collapse = " ")) %>%
eval_tidy()}))
checkID checkpart1 checkpart2 checkpart3 checkpart4 checkpart5 checkpart6 checkpart7 output
1 1 50 + 18 == 80 <NA> NA FALSE
2 2 70 == 17 <NA> NA <NA> NA FALSE
3 3 111 * 6 - 76 == 590 TRUE
4 4 320 > 3 <NA> NA <NA> NA TRUE
我有一个非常大的 Excel 电子表格,其中包含许多 'checks' 观察结果(300 多列)。检查由布尔运算符(大于、等于)和一些 summation/subtraction:
组成df <-data.frame(checkID = c(1,2,3,4), checkpart1 = c(50, 70, 111, 320),
checkpart2 = c("+", "==", "*", ">"), checkpart3 = c(18, 17, 6, 3), checkpart4 = c("==", NA, "-", NA), checkpart5 = c(80, NA,76,NA), checkpart6 = c(NA, NA, "==", NA), checkpart7 = c(NA,NA,590, NA))
head(df) ##this is the input
#checkID checkpart1 checkpart2 checkpart3 checkpart4 checkpart5 checkpart6 checkpart7
#1 50 + 18 == 80 <NA> NA
#2 70 == 17 <NA> NA <NA> NA
#3 111 * 6 - 76 == 590
#4 320 > 3 <NA> NA <NA> NA
INSERT CODE THAT MAKES THE EXCEL FUNCTIONS COME TO LIFE HERE.
Mind you that some rows have much longer checks than others, so you can't rely on the column names.
#outcome data frame should look like this, where the checks have been conducted:
View(outputchecks)
#checkID
#1 FALSE
#2 FALSE
#3 TRUE
#4 TRUE
有人知道 R 中的某些 tidyr/dplyr/other 应用程序可以在数据帧中执行这些 'static functions' 吗?
谢谢!
这是 eval/parse
的一种方式。首先用操作形成一个字符串,然后计算表达式。
txt <- apply(df[-1], 1, function(x) paste(trimws(x[!is.na(x)]), collapse = ""))
sapply(txt, function(x) eval(parse(text = x)))
# 50+18==80 70==17 111*6-76==590 320>3
# FALSE FALSE TRUE TRUE
这里有一个 tidyr
和 dplyr
的可能性:
library(tidyr)
library(dplyr)
df %>%
tibble() %>%
unite(check, starts_with("checkpart"), sep=" ", na.rm = TRUE) %>%
rowwise() %>%
mutate(check = eval(str2expression(check))) %>%
ungroup()
returns
# A tibble: 4 x 2
checkID check
<dbl> <lgl>
1 1 FALSE
2 2 FALSE
3 3 TRUE
4 4 TRUE
使用pmap
df <-data.frame(checkID = c(1,2,3,4), checkpart1 = c(50, 70, 111, 320),
checkpart2 = c("+", "==", "*", ">"), checkpart3 = c(18, 17, 6, 3), checkpart4 = c("==", NA, "-", NA), checkpart5 = c(80, NA,76,NA), checkpart6 = c(NA, NA, "==", NA), checkpart7 = c(NA,NA,590, NA))
library(tidyverse)
df %>% mutate(exp = pmap_lgl(df[-1], ~ eval(parse(text = paste(na.omit(c(...)), collapse = '')))))
#> checkID checkpart1 checkpart2 checkpart3 checkpart4 checkpart5 checkpart6
#> 1 1 50 + 18 == 80 <NA>
#> 2 2 70 == 17 <NA> NA <NA>
#> 3 3 111 * 6 - 76 ==
#> 4 4 320 > 3 <NA> NA <NA>
#> checkpart7 exp
#> 1 NA FALSE
#> 2 NA FALSE
#> 3 590 TRUE
#> 4 NA TRUE
由 reprex package (v2.0.0)
于 2021-07-04 创建df <-data.frame(checkID = c(1,2,3,4), checkpart1 = c(50, 70, 111, 320),
checkpart2 = c("+", "==", "*", ">"), checkpart3 = c(18, 17, 6, 3), checkpart4 = c("==", NA, "-", NA), checkpart5 = c(80, NA,76,NA), checkpart6 = c(NA, NA, "==", NA), checkpart7 = c(NA,NA,590, NA))
library(tidyverse)
df %>% group_by(checkID) %>%
mutate(across(everything(), ~ifelse(is.na(.), '', as.character(.)))) %>%
rowwise() %>%
mutate(exp = eval(parse(text = paste(c_across(everything()), collapse = ''))))
# A tibble: 4 x 9
# Rowwise: checkID
checkID checkpart1 checkpart2 checkpart3 checkpart4 checkpart5 checkpart6 checkpart7 exp
<dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <lgl>
1 1 50 + 18 "==" "80" "" "" FALSE
2 2 70 == 17 "" "" "" "" FALSE
3 3 111 * 6 "-" "76" "==" "590" TRUE
4 4 320 > 3 "" "" "" "" TRUE
或者transmute
会得到
df %>% group_by(checkID) %>%
mutate(across(everything(), ~ifelse(is.na(.), '', as.character(.)))) %>%
rowwise() %>%
transmute(exp = eval(parse(text = paste(c_across(everything()), collapse = ''))))
# A tibble: 4 x 2
# Rowwise: checkID
checkID exp
<dbl> <lgl>
1 1 FALSE
2 2 FALSE
3 3 TRUE
4 4 TRUE
使用 summarise
也会删除组
df %>% group_by(checkID) %>%
mutate(across(everything(), ~ifelse(is.na(.), '', as.character(.)))) %>%
rowwise() %>%
summarise(exp = eval(parse(text = paste(c_across(everything()), collapse = ''))), .groups = 'drop')
# A tibble: 4 x 2
checkID exp
<dbl> <lgl>
1 1 FALSE
2 2 FALSE
3 3 TRUE
4 4 TRUE
您也可以使用以下解决方案:
- 我使用
pmap
函数将数据集中的每一行捕获为字符串,省略了第一个变量 (checkID) - 然后我保留了每一行中的所有
NA
个值 - 之后,为了对我们的公式(现在以字符串形式)进行评估,我们首先需要将它们折叠成长度为
1
的字符串
- 然后我使用
rlang
中的parse_expr
等效于基础 R 中的eval
将字符串转换为表达式 - 最后我使用了
eval_tidy
这又是等价于来自基 R 的eval
函数来评估我们的表达式
这里没有必要使用 rlang
,因为您可以很容易地使用基本 R 函数,但我的意思是向您展示替代方案。
library(purrr)
library(rlang)
df %>%
mutate(output = pmap_lgl(select(cur_data(), !checkID), ~ {x <- c(...)[!is.na(c(...))]
parse_expr(paste(x, collapse = " ")) %>%
eval_tidy()}))
checkID checkpart1 checkpart2 checkpart3 checkpart4 checkpart5 checkpart6 checkpart7 output
1 1 50 + 18 == 80 <NA> NA FALSE
2 2 70 == 17 <NA> NA <NA> NA FALSE
3 3 111 * 6 - 76 == 590 TRUE
4 4 320 > 3 <NA> NA <NA> NA TRUE