使用 SE mutate 函数的动态变量名称
Dynamic variable names using SE mutate function
我不知道如何在 for 循环中将动态变量分组。
df
是包含 clm_april
- clm_sept
变量的数据框 double
。我也想为新列设置不同的名称。
每个月不同的组如下所示:
> groups_april
[1] "0" "500" "1000" "1500" "2000" "3500"
[7] "4500" "5000" "9500" "2000000"
以下无效:
vector <- c("april", "may", "june", "july", "aug", "sept")
for (i in vector) {
varname <- paste0("clm_", i)
df <- df %>%
mutate_(.dots =
cut(list(varname),
breaks = groups[i],
include.lowest = T,
dig.lab = 10))
}
我也试过:
for (i in vector) {
varname <- paste0("clm_", i)
df <- df %>%
mutate_(.dots = interp(~cut(list(varname),
breaks = groups[i],
include.lowest = T,
dig.lab = 10, varname=as.name(varname))))
}
R 抛出以下错误:
Error in mutate_impl(.data, dots) :
Evaluation error: 'x' must be numeric.
你能帮我解决这个问题吗?
编辑:
# A tibble: 10 x 6
clm_april clm_may clm_june clm_july clm_aug clm_sept
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 0 0 0 689 0
2 2000 0 1000 1000 1000 0
3 5000 1000 1000 1000 1500 1518
4 1000 1069 1100 1200 2019 2000
5 679 689 9000 10000 36681 2000
6 800 1000 1000 657 1815 2500
一个选项可能是使用 purrr
包中的 map2_df
。 map2_df
将允许使用传递实际 data.frame
的 column-wise 数据。并同时传递列的名称。自定义函数 find_breaks
使用列名查找对应的 breaks
然后调用 cut
。
Suppose different Groups are defined for each month is defined as:
groups_april <- c("0", "500", "1000", "1500", "2000", "3500", "4500", "5000", "9500", "2000000")
groups_may <- c("0", "500", "1000", "1500", "2000", "3500", "4500", "5000", "9500", "1000000")
groups_june <- c("0", "500", "1000", "1500", "2000", "3500", "4500", "5000", "20000", "2000000")
groups_july <- c("0", "500", "1000", "1500", "2000", "3500", "4500", "7000", "9500", "2000000")
groups_aug <- c("0", "500", "1000", "1500", "2000", "3500", "4500", "6000", "9500", "2000000")
groups_sept <- c("0", "500", "1000", "1500", "2000", "3500", "4500", "5000", "9500", "20000")
#Lets create a data.frame for Groups:
groups_df <- data.frame(groups_april, groups_may, groups_june, groups_july,
groups_aug, groups_sept, stringsAsFactors = FALSE)
# Create a function to find breaks and apply cut
find_breaks <- function(x, y){
#get the corresponding column name
breaks_group_col <- grep(gsub("clm_", "", y), names(groups_df), value = TRUE)
#apply cut using corresponding column from groups_df
cut(x, breaks = groups_df[,breaks_group_col], include.lowest = TRUE, dig.lab = 10)
}
library(purrr)
# Pass df and column names of df to map2.
map2_df(df, grep("clm_*", names(df), value = TRUE), function(.x, .y) find_breaks(.x, .y))
#Result
clm_april clm_may clm_june clm_july clm_aug clm_sept
<fctr> <fctr> <fctr> <fctr> <fctr> <fctr>
1 [0,500] [0,500] [0,500] [0,500] (500,1000] [0,500]
2 (1500,2000] [0,500] (500,1000] (500,1000] (500,1000] [0,500]
3 (4500,5000] (500,1000] (500,1000] (500,1000] (1000,1500] (1500,2000]
4 (500,1000] (1000,1500] (1000,1500] (1000,1500] (2000,3500] (1500,2000]
5 (500,1000] (500,1000] (5000,20000] (9500,2000000] (9500,2000000] (1500,2000]
6 (500,1000] (500,1000] (500,1000] (500,1000] (1500,2000] (2000,3500]
数据
df <- read.table(text = "clm_april clm_may clm_june clm_july clm_aug clm_sept
1 0 0 0 0 689 0
2 2000 0 1000 1000 1000 0
3 5000 1000 1000 1000 1500 1518
4 1000 1069 1100 1200 2019 2000
5 679 689 9000 10000 36681 2000
6 800 1000 1000 657 1815 2500", header = T, stringsAsFactors = F)
我不知道如何在 for 循环中将动态变量分组。
df
是包含 clm_april
- clm_sept
变量的数据框 double
。我也想为新列设置不同的名称。
每个月不同的组如下所示:
> groups_april
[1] "0" "500" "1000" "1500" "2000" "3500"
[7] "4500" "5000" "9500" "2000000"
以下无效:
vector <- c("april", "may", "june", "july", "aug", "sept")
for (i in vector) {
varname <- paste0("clm_", i)
df <- df %>%
mutate_(.dots =
cut(list(varname),
breaks = groups[i],
include.lowest = T,
dig.lab = 10))
}
我也试过:
for (i in vector) {
varname <- paste0("clm_", i)
df <- df %>%
mutate_(.dots = interp(~cut(list(varname),
breaks = groups[i],
include.lowest = T,
dig.lab = 10, varname=as.name(varname))))
}
R 抛出以下错误:
Error in mutate_impl(.data, dots) :
Evaluation error: 'x' must be numeric.
你能帮我解决这个问题吗?
编辑:
# A tibble: 10 x 6
clm_april clm_may clm_june clm_july clm_aug clm_sept
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 0 0 0 689 0
2 2000 0 1000 1000 1000 0
3 5000 1000 1000 1000 1500 1518
4 1000 1069 1100 1200 2019 2000
5 679 689 9000 10000 36681 2000
6 800 1000 1000 657 1815 2500
一个选项可能是使用 purrr
包中的 map2_df
。 map2_df
将允许使用传递实际 data.frame
的 column-wise 数据。并同时传递列的名称。自定义函数 find_breaks
使用列名查找对应的 breaks
然后调用 cut
。
Suppose different Groups are defined for each month is defined as:
groups_april <- c("0", "500", "1000", "1500", "2000", "3500", "4500", "5000", "9500", "2000000")
groups_may <- c("0", "500", "1000", "1500", "2000", "3500", "4500", "5000", "9500", "1000000")
groups_june <- c("0", "500", "1000", "1500", "2000", "3500", "4500", "5000", "20000", "2000000")
groups_july <- c("0", "500", "1000", "1500", "2000", "3500", "4500", "7000", "9500", "2000000")
groups_aug <- c("0", "500", "1000", "1500", "2000", "3500", "4500", "6000", "9500", "2000000")
groups_sept <- c("0", "500", "1000", "1500", "2000", "3500", "4500", "5000", "9500", "20000")
#Lets create a data.frame for Groups:
groups_df <- data.frame(groups_april, groups_may, groups_june, groups_july,
groups_aug, groups_sept, stringsAsFactors = FALSE)
# Create a function to find breaks and apply cut
find_breaks <- function(x, y){
#get the corresponding column name
breaks_group_col <- grep(gsub("clm_", "", y), names(groups_df), value = TRUE)
#apply cut using corresponding column from groups_df
cut(x, breaks = groups_df[,breaks_group_col], include.lowest = TRUE, dig.lab = 10)
}
library(purrr)
# Pass df and column names of df to map2.
map2_df(df, grep("clm_*", names(df), value = TRUE), function(.x, .y) find_breaks(.x, .y))
#Result
clm_april clm_may clm_june clm_july clm_aug clm_sept
<fctr> <fctr> <fctr> <fctr> <fctr> <fctr>
1 [0,500] [0,500] [0,500] [0,500] (500,1000] [0,500]
2 (1500,2000] [0,500] (500,1000] (500,1000] (500,1000] [0,500]
3 (4500,5000] (500,1000] (500,1000] (500,1000] (1000,1500] (1500,2000]
4 (500,1000] (1000,1500] (1000,1500] (1000,1500] (2000,3500] (1500,2000]
5 (500,1000] (500,1000] (5000,20000] (9500,2000000] (9500,2000000] (1500,2000]
6 (500,1000] (500,1000] (500,1000] (500,1000] (1500,2000] (2000,3500]
数据
df <- read.table(text = "clm_april clm_may clm_june clm_july clm_aug clm_sept
1 0 0 0 0 689 0
2 2000 0 1000 1000 1000 0
3 5000 1000 1000 1000 1500 1518
4 1000 1069 1100 1200 2019 2000
5 679 689 9000 10000 36681 2000
6 800 1000 1000 657 1815 2500", header = T, stringsAsFactors = F)