加入并 group_by 整理评估问题
Join and group_by tidy eval issue
我有以下我整理的功能。它一直工作到最后一部分(在代码的注释中注明)必须将对象连接在一起。我不知道如何让它工作。我相信我的主要问题与将 colName 参数转换为连接函数的“by =”参数的字符串有关。关于 group_by 函数,我不确定放在大括号中的内容是否有效。如果有人能提供帮助那就太好了!
emp_turnover_fun <- function(data, colName, year = "2015") {
# Convert colName to symbol or check if symbol
colName <- ensym(colName)
# Terminations by year and variable in df
term_test <- data %>%
filter(year(DateofTermination) == year) %>%
count(!!(colName)) %>%
clean_names()
# Start employees by var and year
fun_year_job <- paste(year, "-01-01", sep = "")
start_test <- data %>%
select(DateofHire, DateofTermination, !!(colName)) %>%
filter(
DateofHire <= fun_year_job,
DateofTermination > fun_year_job | is.na(DateofTermination)
) %>%
count(!!(colName))
# End employees by year and var
year_pos <- year %>% as.character()
year_num_plus_pos <- as.character(as.numeric(year_pos) + 1)
fun_year2_pos <- paste(year_num_plus_pos, "-01-01", sep = "")
end_test <- data %>%
select(DateofHire, DateofTermination, !!(colName)) %>%
filter(
DateofHire <= fun_year2_pos,
DateofTermination > fun_year2_pos | is.na(DateofTermination)
) %>%
count(!!(colName))
#### PROBLEM BEGINS HERE
join_turnover_year <- full_join(start_test, end_test, by = str(colName)) %>%
full_join(y = term_test, by = str(colName)) %>%
setNames(c(str(colName), "Start_Headcount", "End_Headcount", "Terminations")) %>%
group_by({{colName}}) %>%
summarise(Turnover = ((Terminations) / (Start_Headcount + End_Headcount)) * 100)
return(join_turnover_year)
}
问题是使用 str
获取对象的结构。假设 colName
作为字符串传递,我们不需要任何包装。在函数内部,它被转换为带有 ensym
的符号。因此,要么在将符号转换为不同的对象之前获取输入(假设它是一个字符串),要么使用 rlang
中的 as_string
emp_turnover_fun <- function(data, colName, year = "2015") {
# Convert colName to symbol or check if symbol
colName <- ensym(colName)
colName_str <- rlang::as_string(colName) ## converted to string
# Terminations by year and variable in df
term_test <- data %>%
filter(year(DateofTermination) == year) %>%
count(!!(colName)) %>%
clean_names()
# Start employees by var and year
fun_year_job <- paste(year, "-01-01", sep = "")
start_test <- data %>%
select(DateofHire, DateofTermination, !!(colName)) %>%
filter(
DateofHire <= fun_year_job,
DateofTermination > fun_year_job | is.na(DateofTermination)
) %>%
count(!!(colName))
# End employees by year and var
year_pos <- year %>% as.character()
year_num_plus_pos <- as.character(as.numeric(year_pos) + 1)
fun_year2_pos <- paste(year_num_plus_pos, "-01-01", sep = "")
end_test <- data %>%
select(DateofHire, DateofTermination, !!(colName)) %>%
filter(
DateofHire <= fun_year2_pos,
DateofTermination > fun_year2_pos | is.na(DateofTermination)
) %>%
count(!!(colName))
join_turnover_year <- full_join(start_test, end_test,
by = colName_str) %>% # use the string
full_join(y = term_test, by = colName_str) %>% # use the string
setNames(c(colName_str, "Start_Headcount", "End_Headcount",
"Terminations")) %>% # here as well
group_by({{colName}}) %>%
summarise(Turnover = ((Terminations) / (Start_Headcount + End_Headcount)) * 100)
return(join_turnover_year)
}
做 as_string
比直接将输入作为字符串更安全,即 ensym
可以使用不带引号或带引号的值,因此如果我们传递不带引号,然后获取输入不起作用,即它可能需要 deparse(substitute(colName))
。相反,首先转换为符号,然后使用 as_string
转换回字符串
我有以下我整理的功能。它一直工作到最后一部分(在代码的注释中注明)必须将对象连接在一起。我不知道如何让它工作。我相信我的主要问题与将 colName 参数转换为连接函数的“by =”参数的字符串有关。关于 group_by 函数,我不确定放在大括号中的内容是否有效。如果有人能提供帮助那就太好了!
emp_turnover_fun <- function(data, colName, year = "2015") {
# Convert colName to symbol or check if symbol
colName <- ensym(colName)
# Terminations by year and variable in df
term_test <- data %>%
filter(year(DateofTermination) == year) %>%
count(!!(colName)) %>%
clean_names()
# Start employees by var and year
fun_year_job <- paste(year, "-01-01", sep = "")
start_test <- data %>%
select(DateofHire, DateofTermination, !!(colName)) %>%
filter(
DateofHire <= fun_year_job,
DateofTermination > fun_year_job | is.na(DateofTermination)
) %>%
count(!!(colName))
# End employees by year and var
year_pos <- year %>% as.character()
year_num_plus_pos <- as.character(as.numeric(year_pos) + 1)
fun_year2_pos <- paste(year_num_plus_pos, "-01-01", sep = "")
end_test <- data %>%
select(DateofHire, DateofTermination, !!(colName)) %>%
filter(
DateofHire <= fun_year2_pos,
DateofTermination > fun_year2_pos | is.na(DateofTermination)
) %>%
count(!!(colName))
#### PROBLEM BEGINS HERE
join_turnover_year <- full_join(start_test, end_test, by = str(colName)) %>%
full_join(y = term_test, by = str(colName)) %>%
setNames(c(str(colName), "Start_Headcount", "End_Headcount", "Terminations")) %>%
group_by({{colName}}) %>%
summarise(Turnover = ((Terminations) / (Start_Headcount + End_Headcount)) * 100)
return(join_turnover_year)
}
问题是使用 str
获取对象的结构。假设 colName
作为字符串传递,我们不需要任何包装。在函数内部,它被转换为带有 ensym
的符号。因此,要么在将符号转换为不同的对象之前获取输入(假设它是一个字符串),要么使用 rlang
as_string
emp_turnover_fun <- function(data, colName, year = "2015") {
# Convert colName to symbol or check if symbol
colName <- ensym(colName)
colName_str <- rlang::as_string(colName) ## converted to string
# Terminations by year and variable in df
term_test <- data %>%
filter(year(DateofTermination) == year) %>%
count(!!(colName)) %>%
clean_names()
# Start employees by var and year
fun_year_job <- paste(year, "-01-01", sep = "")
start_test <- data %>%
select(DateofHire, DateofTermination, !!(colName)) %>%
filter(
DateofHire <= fun_year_job,
DateofTermination > fun_year_job | is.na(DateofTermination)
) %>%
count(!!(colName))
# End employees by year and var
year_pos <- year %>% as.character()
year_num_plus_pos <- as.character(as.numeric(year_pos) + 1)
fun_year2_pos <- paste(year_num_plus_pos, "-01-01", sep = "")
end_test <- data %>%
select(DateofHire, DateofTermination, !!(colName)) %>%
filter(
DateofHire <= fun_year2_pos,
DateofTermination > fun_year2_pos | is.na(DateofTermination)
) %>%
count(!!(colName))
join_turnover_year <- full_join(start_test, end_test,
by = colName_str) %>% # use the string
full_join(y = term_test, by = colName_str) %>% # use the string
setNames(c(colName_str, "Start_Headcount", "End_Headcount",
"Terminations")) %>% # here as well
group_by({{colName}}) %>%
summarise(Turnover = ((Terminations) / (Start_Headcount + End_Headcount)) * 100)
return(join_turnover_year)
}
做 as_string
比直接将输入作为字符串更安全,即 ensym
可以使用不带引号或带引号的值,因此如果我们传递不带引号,然后获取输入不起作用,即它可能需要 deparse(substitute(colName))
。相反,首先转换为符号,然后使用 as_string