在用户创建的函数上使用列表和 mapply 在 r 中进行求和编码对比
Using lists and mapply on user created function for sum coding contrasts in r
我想在用户创建的函数上使用列表和映射来在 r 中进行总和编码对比。但是当我尝试时它不起作用。任何帮助将不胜感激。
具体来说,我想对 am
和 vs
应用求和对比,以在 mtcars
中创建求和编码变量 am_c
和 vs_c
数据。我可以长期执行此操作,但是当我尝试创建一个用户创建的函数来生成具有这些结果的数据框时,调用 function_data_frame__sum_contrast()
来完成此任务,但它不起作用。
当我输入单独的输入时该函数起作用:
### applied function to individually inputted ivs and datasets
head(function_data_frame__sum_contrast(vs, mtcars_short_way_df))
mpg cyl disp hp drat wt qsec vs am gear carb vs_c
1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 0
2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 0
3 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 0
4 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 0
5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 0
6 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 0
使用列表形式时无效
### applied function to first value in list
function_data_frame__sum_contrast(IV_info_short_way$IV_analyses[1], IV_info_short_way$dataset_analyses[1])
Error in get(nm1) :
object 'IV_info_short_way$dataset_analyses[1]' not found
3.
get(nm1)
2.
data.frame(get(nm1))
1.
function_data_frame__sum_contrast(IV_info_short_way$IV_analyses[1],
IV_info_short_way$dataset_analyses[1])
它也不适用于 mapply()。
### attempts to mapply for all parts of relevant lists
mtcars_short_way_df <-
mapply(function_data_frame__sum_contrast,
(IV_info$IV_original[IV_info$IV_nature == "nominal"]),
(IV_info$dataset_analyses[IV_info$IV_nature == "nominal"]),
SIMPLIFY = FALSE)
Error in mapply(function_data_frame__sum_contrast, (IV_info$IV_original[IV_info$IV_nature == :
zero-length inputs cannot be mixed with those of non-zero length
如果可能请帮忙。
练习代码如下:
# practice script
## loads packages for analyses
# ---- NOTE: data wrangling
if(!require(tidyverse)){install.packages("tidyverse")}
## gives information about datasets
### mtcars
# ---- NOTE: displays head of data
head(mtcars)
# ---- NOTE: gives structure of data
str(mtcars, list.len=ncol(mtcars))
# ---- NOTE: gives colnames of data
colnames(mtcars)
## produces IV_info chart
IV_info <-
data.frame(
cbind(
IV = c("vs", "am"),
IV_analyses = c("vs", "am"),
IV_nature = c("nominal", "nominal"),
dataset_name = c("mtcars"),
dataset_analyses = c("mtcars")
))
## produces datasets for practice
# ---- NOTE: creates long way dataset
mtcars_long_way_df <- mtcars
# ---- NOTE: creates long way dataset
mtcars_short_way_df <- mtcars
## long way
### changes IV_info to IV_info_short_way
# ---- NOTE: creates dataset
IV_info_short_way <- IV_info
# ---- NOTE: changes dataset_analyses variable
IV_info_short_way$dataset_analyses <- paste(IV_info_short_way$dataset_analysis, "_short_way_df", sep="")
### creates individual contrast variables
# ---- NOTE: based on IV_info_long_way$IV_analyses list
#### variable: vs
# ---- NOTE: gives levels of variable
unique(mtcars_long_way_df$vs)
# ---- NOTE: gives length of levels of variable
length(unique(mtcars_long_way_df$vs))
# ---- NOTE: creates contrast variable
mtcars_long_way_df$vs_c <- mtcars_long_way_df$vs
# ---- NOTE: turns contrast variable to factor
mtcars_long_way_df$vs_c <- as.factor(as.character(mtcars_long_way_df$vs_c))
# ---- NOTE: creates contrast variable
contrasts(mtcars_long_way_df$vs_c) <-
contr.sum(as.numeric(as.character(length(unique(mtcars_long_way_df$vs)))))
#### variable: am
# ---- NOTE: gives levels of variable
unique(mtcars_long_way_df$am)
# ---- NOTE: gives length of levels of variable
length(unique(mtcars_long_way_df$am))
# ---- NOTE: creates contrast variable
mtcars_long_way_df$am_c <- mtcars_long_way_df$am
# ---- NOTE: turns contrast variable to factor
mtcars_long_way_df$am_c <- as.factor(as.character(mtcars_long_way_df$am_c))
# ---- NOTE: creates contrast variable
contrasts(mtcars_long_way_df$am_c) <-
contr.sum(as.numeric(as.character(length(unique(mtcars_long_way_df$am)))))
## short way way
### changes IV_info to IV_info_short_way
# ---- NOTE: creates dataset
IV_info_short_way <- IV_info
# ---- NOTE: changes dataset_analyses variable
IV_info_short_way$dataset_analyses <- paste(IV_info_short_way$dataset_analyses, "_short_way_df", sep="")
### creates function function_data_frame__sum_contrast
# ---- NOTE: creates function
function_data_frame__sum_contrast <-
# ---- NOTE: turns variable into sum contrasted version of variable
# ---- NOTE: variable_name == variable to be turned to sum contrast
# ---- NOTE: dataset_name == dataset that contains variable name
# ---- NOTE: generally speaking, procedure is to create new variable with "_c" as suffix for corresponding sum contrasted variable
function(variable_name, dataset_name)
{
# ---- NOTE: # changes variable_name and dataset_name to strings
colnm <- deparse(substitute(variable_name))
nm1 <- deparse(substitute(dataset_name))
# ---- NOTE: # base data frame
dataset_funct_object_A <-
data.frame(get(nm1))
# ---- NOTE: adds merging column to base data frame
dataset_funct_object_A$merging_column <- dataset_funct_object_A[[colnm]]
# ---- NOTE: ## turns data into data frame
dataset_funct_object_A <- data.frame(dataset_funct_object_A)
# ---- NOTE: # sets up unique values part of data
# ---- NOTE: ## creates object with unique variable values
dataset_funct_object_B <- unique(dataset_funct_object_A[[colnm]])
# ---- NOTE: ### turns object to data frame
dataset_funct_object_B <- data.frame(dataset_funct_object_B)
# ---- NOTE: ### changes colnames
colnames(dataset_funct_object_B) <- c("variable_levels")
# ---- NOTE: ## gives info on whether a given variable_level has a value of NA
dataset_funct_object_B$isNA <- is.na(dataset_funct_object_B$variable_levels)
length(which(dataset_funct_object_B$isNA=="TRUE"))
# ---- NOTE: ## gives length of data frame column
dataset_funct_object_B$variable_level_number <- (as.numeric(as.character(length(dataset_funct_object_B$variable_levels))) - as.numeric(as.character(length(which(dataset_funct_object_B$isNA=="TRUE")))))
# ---- NOTE: ## displays distinct number of levels of variariable of interest, from variable_level_number
as.numeric(distinct(dataset_funct_object_B, variable_level_number))
# ---- NOTE: ## the contrast matrix for categorical variable with a given number of levels
contr.sum(as.numeric(distinct(dataset_funct_object_B, variable_level_number)))
# ---- NOTE: ## creates variable_levels_c variable, which will be used to hold contrast matrix
dataset_funct_object_B$variable_levels_c <- as.factor(as.character(dataset_funct_object_B$variable_levels))
# ---- NOTE: ## inserts the contrast matrix for categorical variable with a given number of levels transfers into appropriate variable
contrasts(dataset_funct_object_B$variable_levels_c) = contr.sum(as.numeric(distinct(dataset_funct_object_B, variable_level_number)))
# ---- NOTE: adds merging column to data frame
dataset_funct_object_B$merging_column <- dataset_funct_object_B$variable_levels
# ---- NOTE: merges original dataset with dataset of interest
dataset_funct_object_C <-
merge(dataset_funct_object_A,
dataset_funct_object_B,
by.x = "merging_column",
by.y = "merging_column",
all.x = TRUE,
all.y = FALSE,
no.dups = TRUE)
# ---- NOTE: # removes merging column from appropriate object
dataset_funct_object_D <-
dataset_funct_object_C %>%
select(
-c(merging_column,
variable_levels,
isNA,
variable_level_number)
)
# ---- NOTE: turns data into data frame
dataset_funct_object_D <- data.frame(dataset_funct_object_D)
# ---- NOTE: ## changes colname
names(dataset_funct_object_D)[names(dataset_funct_object_D) == "variable_levels_c"] <- paste(colnm, "_c", sep = "")
# ---- NOTE: turns data into data frame
dataset_funct_object_D <- data.frame(dataset_funct_object_D)
# ---- NOTE: # returns appropriate object/variable
return(dataset_funct_object_D)
}
### applied function to individually inputted ivs and datasets
head(function_data_frame__sum_contrast(vs, mtcars_short_way_df))
### applied function to first value in list
function_data_frame__sum_contrast(IV_info_short_way$IV_analyses[1], IV_info_short_way$dataset_analyses[1])
# ---- NOTE: does not work
### attempts to mapply for all parts of relevant lists
mtcars_short_way_df <-
mapply(function_data_frame__sum_contrast,
(IV_info$IV_original[IV_info$IV_nature == "nominal"]),
(IV_info$dataset_analyses[IV_info$IV_nature == "nominal"]),
SIMPLIFY = FALSE)
# ---- NOTE: does not work
deparse/substitute
在输入参数未被引号括起并且想要作为字符串检索时起作用。在循环中,我们直接传递一个字符串。所以,我们可以将该行更改为
colnm <- variable_name
nm1 <- dataset_name
-全功能
function_data_frame__sum_contrast <-
# ---- NOTE: turns variable into sum contrasted version of variable
# ---- NOTE: variable_name == variable to be turned to sum contrast
# ---- NOTE: dataset_name == dataset that contains variable name
# ---- NOTE: generally speaking, procedure is to create new variable with "_c" as suffix for corresponding sum contrasted variable
function(variable_name, dataset_name)
{
# ---- NOTE: # changes variable_name and dataset_name to strings
#colnm <- deparse(substitute(variable_name))
colnm <- variable_name
nm1 <- dataset_name
# ---- NOTE: # base data frame
dataset_funct_object_A <-
data.frame(get(nm1))
# ---- NOTE: adds merging column to base data frame
dataset_funct_object_A$merging_column <- dataset_funct_object_A[[colnm]]
# ---- NOTE: ## turns data into data frame
dataset_funct_object_A <- data.frame(dataset_funct_object_A)
# ---- NOTE: # sets up unique values part of data
# ---- NOTE: ## creates object with unique variable values
dataset_funct_object_B <- unique(dataset_funct_object_A[[colnm]])
# ---- NOTE: ### turns object to data frame
dataset_funct_object_B <- data.frame(dataset_funct_object_B)
# ---- NOTE: ### changes colnames
colnames(dataset_funct_object_B) <- c("variable_levels")
# ---- NOTE: ## gives info on whether a given variable_level has a value of NA
dataset_funct_object_B$isNA <- is.na(dataset_funct_object_B$variable_levels)
length(which(dataset_funct_object_B$isNA=="TRUE"))
# ---- NOTE: ## gives length of data frame column
dataset_funct_object_B$variable_level_number <- (as.numeric(as.character(length(dataset_funct_object_B$variable_levels))) - as.numeric(as.character(length(which(dataset_funct_object_B$isNA=="TRUE")))))
# ---- NOTE: ## displays distinct number of levels of variariable of interest, from variable_level_number
as.numeric(distinct(dataset_funct_object_B, variable_level_number))
# ---- NOTE: ## the contrast matrix for categorical variable with a given number of levels
contr.sum(as.numeric(distinct(dataset_funct_object_B, variable_level_number)))
# ---- NOTE: ## creates variable_levels_c variable, which will be used to hold contrast matrix
dataset_funct_object_B$variable_levels_c <- as.factor(as.character(dataset_funct_object_B$variable_levels))
# ---- NOTE: ## inserts the contrast matrix for categorical variable with a given number of levels transfers into appropriate variable
contrasts(dataset_funct_object_B$variable_levels_c) = contr.sum(as.numeric(distinct(dataset_funct_object_B, variable_level_number)))
# ---- NOTE: adds merging column to data frame
dataset_funct_object_B$merging_column <- dataset_funct_object_B$variable_levels
# ---- NOTE: merges original dataset with dataset of interest
dataset_funct_object_C <-
merge(dataset_funct_object_A,
dataset_funct_object_B,
by.x = "merging_column",
by.y = "merging_column",
all.x = TRUE,
all.y = FALSE,
no.dups = TRUE)
# ---- NOTE: # removes merging column from appropriate object
dataset_funct_object_D <-
dataset_funct_object_C %>%
select(
-c(merging_column,
variable_levels,
isNA,
variable_level_number)
)
# ---- NOTE: turns data into data frame
dataset_funct_object_D <- data.frame(dataset_funct_object_D)
# ---- NOTE: ## changes colname
names(dataset_funct_object_D)[names(dataset_funct_object_D) == "variable_levels_c"] <- paste(colnm, "_c", sep = "")
# ---- NOTE: turns data into data frame
dataset_funct_object_D <- data.frame(dataset_funct_object_D)
# ---- NOTE: # returns appropriate object/variable
return(dataset_funct_object_D)
}
-测试
假设 IV_original
是 IV
(因为在 OP 的输入示例中找不到该列)
mtcars_short_way_df <-
mapply(function_data_frame__sum_contrast,
(IV_info$IV[IV_info$IV_nature == "nominal"]),
(IV_info$dataset_analyses[IV_info$IV_nature == "nominal"]),
SIMPLIFY = FALSE)
lapply(mtcars_short_way_df, head, 3)
$vs
mpg cyl disp hp drat wt qsec vs am gear carb vs_c
1 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 0
2 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 0
3 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4 0
$am
mpg cyl disp hp drat wt qsec vs am gear carb am_c
1 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 0
2 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 0
3 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 0
我想在用户创建的函数上使用列表和映射来在 r 中进行总和编码对比。但是当我尝试时它不起作用。任何帮助将不胜感激。
具体来说,我想对 am
和 vs
应用求和对比,以在 mtcars
中创建求和编码变量 am_c
和 vs_c
数据。我可以长期执行此操作,但是当我尝试创建一个用户创建的函数来生成具有这些结果的数据框时,调用 function_data_frame__sum_contrast()
来完成此任务,但它不起作用。
当我输入单独的输入时该函数起作用:
### applied function to individually inputted ivs and datasets
head(function_data_frame__sum_contrast(vs, mtcars_short_way_df))
mpg cyl disp hp drat wt qsec vs am gear carb vs_c
1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 0
2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 0
3 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 0
4 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 0
5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 0
6 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 0
使用列表形式时无效
### applied function to first value in list
function_data_frame__sum_contrast(IV_info_short_way$IV_analyses[1], IV_info_short_way$dataset_analyses[1])
Error in get(nm1) :
object 'IV_info_short_way$dataset_analyses[1]' not found
3.
get(nm1)
2.
data.frame(get(nm1))
1.
function_data_frame__sum_contrast(IV_info_short_way$IV_analyses[1],
IV_info_short_way$dataset_analyses[1])
它也不适用于 mapply()。
### attempts to mapply for all parts of relevant lists
mtcars_short_way_df <-
mapply(function_data_frame__sum_contrast,
(IV_info$IV_original[IV_info$IV_nature == "nominal"]),
(IV_info$dataset_analyses[IV_info$IV_nature == "nominal"]),
SIMPLIFY = FALSE)
Error in mapply(function_data_frame__sum_contrast, (IV_info$IV_original[IV_info$IV_nature == :
zero-length inputs cannot be mixed with those of non-zero length
如果可能请帮忙。
练习代码如下:
# practice script
## loads packages for analyses
# ---- NOTE: data wrangling
if(!require(tidyverse)){install.packages("tidyverse")}
## gives information about datasets
### mtcars
# ---- NOTE: displays head of data
head(mtcars)
# ---- NOTE: gives structure of data
str(mtcars, list.len=ncol(mtcars))
# ---- NOTE: gives colnames of data
colnames(mtcars)
## produces IV_info chart
IV_info <-
data.frame(
cbind(
IV = c("vs", "am"),
IV_analyses = c("vs", "am"),
IV_nature = c("nominal", "nominal"),
dataset_name = c("mtcars"),
dataset_analyses = c("mtcars")
))
## produces datasets for practice
# ---- NOTE: creates long way dataset
mtcars_long_way_df <- mtcars
# ---- NOTE: creates long way dataset
mtcars_short_way_df <- mtcars
## long way
### changes IV_info to IV_info_short_way
# ---- NOTE: creates dataset
IV_info_short_way <- IV_info
# ---- NOTE: changes dataset_analyses variable
IV_info_short_way$dataset_analyses <- paste(IV_info_short_way$dataset_analysis, "_short_way_df", sep="")
### creates individual contrast variables
# ---- NOTE: based on IV_info_long_way$IV_analyses list
#### variable: vs
# ---- NOTE: gives levels of variable
unique(mtcars_long_way_df$vs)
# ---- NOTE: gives length of levels of variable
length(unique(mtcars_long_way_df$vs))
# ---- NOTE: creates contrast variable
mtcars_long_way_df$vs_c <- mtcars_long_way_df$vs
# ---- NOTE: turns contrast variable to factor
mtcars_long_way_df$vs_c <- as.factor(as.character(mtcars_long_way_df$vs_c))
# ---- NOTE: creates contrast variable
contrasts(mtcars_long_way_df$vs_c) <-
contr.sum(as.numeric(as.character(length(unique(mtcars_long_way_df$vs)))))
#### variable: am
# ---- NOTE: gives levels of variable
unique(mtcars_long_way_df$am)
# ---- NOTE: gives length of levels of variable
length(unique(mtcars_long_way_df$am))
# ---- NOTE: creates contrast variable
mtcars_long_way_df$am_c <- mtcars_long_way_df$am
# ---- NOTE: turns contrast variable to factor
mtcars_long_way_df$am_c <- as.factor(as.character(mtcars_long_way_df$am_c))
# ---- NOTE: creates contrast variable
contrasts(mtcars_long_way_df$am_c) <-
contr.sum(as.numeric(as.character(length(unique(mtcars_long_way_df$am)))))
## short way way
### changes IV_info to IV_info_short_way
# ---- NOTE: creates dataset
IV_info_short_way <- IV_info
# ---- NOTE: changes dataset_analyses variable
IV_info_short_way$dataset_analyses <- paste(IV_info_short_way$dataset_analyses, "_short_way_df", sep="")
### creates function function_data_frame__sum_contrast
# ---- NOTE: creates function
function_data_frame__sum_contrast <-
# ---- NOTE: turns variable into sum contrasted version of variable
# ---- NOTE: variable_name == variable to be turned to sum contrast
# ---- NOTE: dataset_name == dataset that contains variable name
# ---- NOTE: generally speaking, procedure is to create new variable with "_c" as suffix for corresponding sum contrasted variable
function(variable_name, dataset_name)
{
# ---- NOTE: # changes variable_name and dataset_name to strings
colnm <- deparse(substitute(variable_name))
nm1 <- deparse(substitute(dataset_name))
# ---- NOTE: # base data frame
dataset_funct_object_A <-
data.frame(get(nm1))
# ---- NOTE: adds merging column to base data frame
dataset_funct_object_A$merging_column <- dataset_funct_object_A[[colnm]]
# ---- NOTE: ## turns data into data frame
dataset_funct_object_A <- data.frame(dataset_funct_object_A)
# ---- NOTE: # sets up unique values part of data
# ---- NOTE: ## creates object with unique variable values
dataset_funct_object_B <- unique(dataset_funct_object_A[[colnm]])
# ---- NOTE: ### turns object to data frame
dataset_funct_object_B <- data.frame(dataset_funct_object_B)
# ---- NOTE: ### changes colnames
colnames(dataset_funct_object_B) <- c("variable_levels")
# ---- NOTE: ## gives info on whether a given variable_level has a value of NA
dataset_funct_object_B$isNA <- is.na(dataset_funct_object_B$variable_levels)
length(which(dataset_funct_object_B$isNA=="TRUE"))
# ---- NOTE: ## gives length of data frame column
dataset_funct_object_B$variable_level_number <- (as.numeric(as.character(length(dataset_funct_object_B$variable_levels))) - as.numeric(as.character(length(which(dataset_funct_object_B$isNA=="TRUE")))))
# ---- NOTE: ## displays distinct number of levels of variariable of interest, from variable_level_number
as.numeric(distinct(dataset_funct_object_B, variable_level_number))
# ---- NOTE: ## the contrast matrix for categorical variable with a given number of levels
contr.sum(as.numeric(distinct(dataset_funct_object_B, variable_level_number)))
# ---- NOTE: ## creates variable_levels_c variable, which will be used to hold contrast matrix
dataset_funct_object_B$variable_levels_c <- as.factor(as.character(dataset_funct_object_B$variable_levels))
# ---- NOTE: ## inserts the contrast matrix for categorical variable with a given number of levels transfers into appropriate variable
contrasts(dataset_funct_object_B$variable_levels_c) = contr.sum(as.numeric(distinct(dataset_funct_object_B, variable_level_number)))
# ---- NOTE: adds merging column to data frame
dataset_funct_object_B$merging_column <- dataset_funct_object_B$variable_levels
# ---- NOTE: merges original dataset with dataset of interest
dataset_funct_object_C <-
merge(dataset_funct_object_A,
dataset_funct_object_B,
by.x = "merging_column",
by.y = "merging_column",
all.x = TRUE,
all.y = FALSE,
no.dups = TRUE)
# ---- NOTE: # removes merging column from appropriate object
dataset_funct_object_D <-
dataset_funct_object_C %>%
select(
-c(merging_column,
variable_levels,
isNA,
variable_level_number)
)
# ---- NOTE: turns data into data frame
dataset_funct_object_D <- data.frame(dataset_funct_object_D)
# ---- NOTE: ## changes colname
names(dataset_funct_object_D)[names(dataset_funct_object_D) == "variable_levels_c"] <- paste(colnm, "_c", sep = "")
# ---- NOTE: turns data into data frame
dataset_funct_object_D <- data.frame(dataset_funct_object_D)
# ---- NOTE: # returns appropriate object/variable
return(dataset_funct_object_D)
}
### applied function to individually inputted ivs and datasets
head(function_data_frame__sum_contrast(vs, mtcars_short_way_df))
### applied function to first value in list
function_data_frame__sum_contrast(IV_info_short_way$IV_analyses[1], IV_info_short_way$dataset_analyses[1])
# ---- NOTE: does not work
### attempts to mapply for all parts of relevant lists
mtcars_short_way_df <-
mapply(function_data_frame__sum_contrast,
(IV_info$IV_original[IV_info$IV_nature == "nominal"]),
(IV_info$dataset_analyses[IV_info$IV_nature == "nominal"]),
SIMPLIFY = FALSE)
# ---- NOTE: does not work
deparse/substitute
在输入参数未被引号括起并且想要作为字符串检索时起作用。在循环中,我们直接传递一个字符串。所以,我们可以将该行更改为
colnm <- variable_name
nm1 <- dataset_name
-全功能
function_data_frame__sum_contrast <-
# ---- NOTE: turns variable into sum contrasted version of variable
# ---- NOTE: variable_name == variable to be turned to sum contrast
# ---- NOTE: dataset_name == dataset that contains variable name
# ---- NOTE: generally speaking, procedure is to create new variable with "_c" as suffix for corresponding sum contrasted variable
function(variable_name, dataset_name)
{
# ---- NOTE: # changes variable_name and dataset_name to strings
#colnm <- deparse(substitute(variable_name))
colnm <- variable_name
nm1 <- dataset_name
# ---- NOTE: # base data frame
dataset_funct_object_A <-
data.frame(get(nm1))
# ---- NOTE: adds merging column to base data frame
dataset_funct_object_A$merging_column <- dataset_funct_object_A[[colnm]]
# ---- NOTE: ## turns data into data frame
dataset_funct_object_A <- data.frame(dataset_funct_object_A)
# ---- NOTE: # sets up unique values part of data
# ---- NOTE: ## creates object with unique variable values
dataset_funct_object_B <- unique(dataset_funct_object_A[[colnm]])
# ---- NOTE: ### turns object to data frame
dataset_funct_object_B <- data.frame(dataset_funct_object_B)
# ---- NOTE: ### changes colnames
colnames(dataset_funct_object_B) <- c("variable_levels")
# ---- NOTE: ## gives info on whether a given variable_level has a value of NA
dataset_funct_object_B$isNA <- is.na(dataset_funct_object_B$variable_levels)
length(which(dataset_funct_object_B$isNA=="TRUE"))
# ---- NOTE: ## gives length of data frame column
dataset_funct_object_B$variable_level_number <- (as.numeric(as.character(length(dataset_funct_object_B$variable_levels))) - as.numeric(as.character(length(which(dataset_funct_object_B$isNA=="TRUE")))))
# ---- NOTE: ## displays distinct number of levels of variariable of interest, from variable_level_number
as.numeric(distinct(dataset_funct_object_B, variable_level_number))
# ---- NOTE: ## the contrast matrix for categorical variable with a given number of levels
contr.sum(as.numeric(distinct(dataset_funct_object_B, variable_level_number)))
# ---- NOTE: ## creates variable_levels_c variable, which will be used to hold contrast matrix
dataset_funct_object_B$variable_levels_c <- as.factor(as.character(dataset_funct_object_B$variable_levels))
# ---- NOTE: ## inserts the contrast matrix for categorical variable with a given number of levels transfers into appropriate variable
contrasts(dataset_funct_object_B$variable_levels_c) = contr.sum(as.numeric(distinct(dataset_funct_object_B, variable_level_number)))
# ---- NOTE: adds merging column to data frame
dataset_funct_object_B$merging_column <- dataset_funct_object_B$variable_levels
# ---- NOTE: merges original dataset with dataset of interest
dataset_funct_object_C <-
merge(dataset_funct_object_A,
dataset_funct_object_B,
by.x = "merging_column",
by.y = "merging_column",
all.x = TRUE,
all.y = FALSE,
no.dups = TRUE)
# ---- NOTE: # removes merging column from appropriate object
dataset_funct_object_D <-
dataset_funct_object_C %>%
select(
-c(merging_column,
variable_levels,
isNA,
variable_level_number)
)
# ---- NOTE: turns data into data frame
dataset_funct_object_D <- data.frame(dataset_funct_object_D)
# ---- NOTE: ## changes colname
names(dataset_funct_object_D)[names(dataset_funct_object_D) == "variable_levels_c"] <- paste(colnm, "_c", sep = "")
# ---- NOTE: turns data into data frame
dataset_funct_object_D <- data.frame(dataset_funct_object_D)
# ---- NOTE: # returns appropriate object/variable
return(dataset_funct_object_D)
}
-测试
假设 IV_original
是 IV
(因为在 OP 的输入示例中找不到该列)
mtcars_short_way_df <-
mapply(function_data_frame__sum_contrast,
(IV_info$IV[IV_info$IV_nature == "nominal"]),
(IV_info$dataset_analyses[IV_info$IV_nature == "nominal"]),
SIMPLIFY = FALSE)
lapply(mtcars_short_way_df, head, 3)
$vs
mpg cyl disp hp drat wt qsec vs am gear carb vs_c
1 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 0
2 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 0
3 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4 0
$am
mpg cyl disp hp drat wt qsec vs am gear carb am_c
1 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 0
2 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 0
3 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 0