如何使用 apply 创建多个多项式变量?
How to use apply to create multiple polynomial variables?
我有几个变量,
qual_cols <- c("ExterQual", "ExterCond", "BsmtQual", "BsmtCond", "HeatingQC", "KitchenQual", "FireplaceQu", "GarageQual", "GarageCond", "PoolQC")
每列由相同的五个字符值组成:
grades <- c( "Po", "Fa", "TA", "Gd", "Ex")
我想将其数值化
"Po" = 1; "Fa" = 2; "TA" = 4 "Gd" = 6 "Ex" = 11
为了相乘创建新的变量,像这样:
combi$GarageGrade <- combi$GarageQual * combi$GarageCond
combi$ExterGrade <- combi$ExterQual * combi$ExterCond
combi$KitchenScore <- combi$KitchenAbvGr * combi$KitchenQual
有哪些方法可以做到这一点?
注意:我是编程的初学者,因此不胜感激。
示例:
head(combi$ExterQual) # (Gd, TA, Gd, TA, Gd, Ta)
等效值将是 (6, 4, 6, 4, 6, 4)
head(combi$ExterCond) # (TA, TA, TA, TA, TA, TA)
等效值将是 (4, 4, 4, 4, 4, 4)
combi$ExterGrade <- combi$ExterQual * combi$ExterCond
head(combi$ExterGrade) # expected output: (24, 16, 24, 16, 24, 16)
我们可以使用命名向量将值更改为数字
newdata <- combi[qual_cols]
newdata[] <- lapply(combi[qual_cols], function(x)
setNames(c(1, 2, 4, 6, 11), grades)[x])
nm1 <- grep("(Cond|Qual)$", names(newdata), value = TRUE)
nm2 <- sub("[A-Z][a-z]+$", "", nm1)
nm3 <- paste0(unique(nm2), 'Grade')
newdata[nm3] <- lapply(split.default(newdata[nm1], nm2), function(x) Reduce(`*`, x))
数据
set.seed(24)
combi <- as.data.frame(matrix(sample(grades, 10 * 5, replace = TRUE),
ncol = 10, dimnames = list(NULL, qual_cols)), stringsAsFactors = FALSE)
如果变量名称不一致(即不仅仅是 Qual 和 Cond),这是一种更灵活的方法:
用 Po、Fa、TA、Gd、Ex 值识别变量
qual_cols <- c("ExterQual", "ExterCond", "BsmtQual", "BsmtCond", "HeatingQC", "KitchenQual", "FireplaceQu", "GarageQual", "GarageCond", "PoolQC")
用数值替换成绩类别。
numeric_quals = sapply(combi[qual_cols], function(x) ifelse(x == 'Po', 1,
ifelse(x =='Fa', 2,
ifelse(x =='TA', 4,
ifelse(x == 'Gd', 6,
ifelse(x == 'Ex', 11, 0))))) )
替换(更改)数据框中的变量。
combi = combi %>% select(-qual_cols) %>% cbind(numeric_quals)
执行乘法
房屋整体质量
combi$OverallGrade <- combi$OverallQual * combi$OverallCond
head(combi$OverallGrade)
总分
combi$PoolScore <- combi$PoolArea * combi$PoolQC
车库总面积
combi$AllGarage <- combi$GarageCars * combi$GarageArea
卫生间总数
combi$TotalBath <- combi$BsmtFullBath + (0.5 * combi$BsmtHalfBath) +
combi$FullBath + (0.5 * combi$HalfBath)
房屋总面积(包括地下室)
combi$AllSF <- combi$GrLivArea + combi$TotalBsmtSF
门廊总 SF
combi$AllPorchSF <- combi$OpenPorchSF + combi$EnclosedPorch +
combi$X3SsnPorch + combi$ScreenPorch
合并加法
combi$Additions <- combi$YearRemodAdd + as.numeric(combi$GarageYrBlt)
我有几个变量,
qual_cols <- c("ExterQual", "ExterCond", "BsmtQual", "BsmtCond", "HeatingQC", "KitchenQual", "FireplaceQu", "GarageQual", "GarageCond", "PoolQC")
每列由相同的五个字符值组成:
grades <- c( "Po", "Fa", "TA", "Gd", "Ex")
我想将其数值化
"Po" = 1; "Fa" = 2; "TA" = 4 "Gd" = 6 "Ex" = 11
为了相乘创建新的变量,像这样:
combi$GarageGrade <- combi$GarageQual * combi$GarageCond
combi$ExterGrade <- combi$ExterQual * combi$ExterCond
combi$KitchenScore <- combi$KitchenAbvGr * combi$KitchenQual
有哪些方法可以做到这一点?
注意:我是编程的初学者,因此不胜感激。
示例:
head(combi$ExterQual) # (Gd, TA, Gd, TA, Gd, Ta)
等效值将是 (6, 4, 6, 4, 6, 4)
head(combi$ExterCond) # (TA, TA, TA, TA, TA, TA)
等效值将是 (4, 4, 4, 4, 4, 4)
combi$ExterGrade <- combi$ExterQual * combi$ExterCond
head(combi$ExterGrade) # expected output: (24, 16, 24, 16, 24, 16)
我们可以使用命名向量将值更改为数字
newdata <- combi[qual_cols]
newdata[] <- lapply(combi[qual_cols], function(x)
setNames(c(1, 2, 4, 6, 11), grades)[x])
nm1 <- grep("(Cond|Qual)$", names(newdata), value = TRUE)
nm2 <- sub("[A-Z][a-z]+$", "", nm1)
nm3 <- paste0(unique(nm2), 'Grade')
newdata[nm3] <- lapply(split.default(newdata[nm1], nm2), function(x) Reduce(`*`, x))
数据
set.seed(24)
combi <- as.data.frame(matrix(sample(grades, 10 * 5, replace = TRUE),
ncol = 10, dimnames = list(NULL, qual_cols)), stringsAsFactors = FALSE)
如果变量名称不一致(即不仅仅是 Qual 和 Cond),这是一种更灵活的方法:
用 Po、Fa、TA、Gd、Ex 值识别变量
qual_cols <- c("ExterQual", "ExterCond", "BsmtQual", "BsmtCond", "HeatingQC", "KitchenQual", "FireplaceQu", "GarageQual", "GarageCond", "PoolQC")
用数值替换成绩类别。
numeric_quals = sapply(combi[qual_cols], function(x) ifelse(x == 'Po', 1,
ifelse(x =='Fa', 2,
ifelse(x =='TA', 4,
ifelse(x == 'Gd', 6,
ifelse(x == 'Ex', 11, 0))))) )
替换(更改)数据框中的变量。
combi = combi %>% select(-qual_cols) %>% cbind(numeric_quals)
执行乘法
房屋整体质量
combi$OverallGrade <- combi$OverallQual * combi$OverallCond
head(combi$OverallGrade)
总分
combi$PoolScore <- combi$PoolArea * combi$PoolQC
车库总面积
combi$AllGarage <- combi$GarageCars * combi$GarageArea
卫生间总数
combi$TotalBath <- combi$BsmtFullBath + (0.5 * combi$BsmtHalfBath) +
combi$FullBath + (0.5 * combi$HalfBath)
房屋总面积(包括地下室)
combi$AllSF <- combi$GrLivArea + combi$TotalBsmtSF
门廊总 SF
combi$AllPorchSF <- combi$OpenPorchSF + combi$EnclosedPorch +
combi$X3SsnPorch + combi$ScreenPorch
合并加法
combi$Additions <- combi$YearRemodAdd + as.numeric(combi$GarageYrBlt)