使用 fastDummies 创建 Year/Region 个虚拟变量
Creating Year/Region Dummy Variables with fastDummies
我编写了代码来为年份(2014、2015、2016、2017 或 2018)和地区(编码为地区 1、2、3 或 4)创建虚拟变量,以将其添加为我的数据中的新列帧NHIS1
。但是我现在正在尝试将我的作品编织成 R Markdown 文件,并收到无法找到对象 YEAR
的错误。我如何重写这行代码以获得与我的虚拟列相同的输出但没有错误消息?下面还有一张结果列的照片。
NHIS1 <- NHIS1 %>% mutate(YEAR = as.character(YEAR), REGION = as.character(REGION)) #turning data into integers to strings
df_test <- NHIS1 %>% select(YEAR, REGION) #subset to include 2 variables
results <- fastDummies::dummy_cols(df_test) #154-156 adding dummy columns, transposing row to columns with 9 new columns
results <- NHIS1 %>%
fastDummies::dummy_cols(select_columns = c(YEAR, REGION))
results <- results %>% select(-c(YEAR, REGION))
NHIS1 <-NHIS1 %>% bind_cols(results) #merging 9 variables with old data frame
你需要用引号,写c("YEAR", "REGION")
。
library(dplyr)
results <- NHIS1.fake %>%
fastDummies::dummy_cols(select_columns = c("YEAR", "REGION"))
results <- results %>% select(-c(YEAR, REGION))
NHIS1.fake <-NHIS1.fake %>% bind_cols(results) #merging 9 variables with old data frame
# YEAR REGION X Y X1 Y1 YEAR_2018 YEAR_2019 YEAR_2020
# 1 2018 a 1.37095845 -0.0627141 1.37095845 -0.0627141 1 0 0
# 2 2019 a -0.56469817 1.3048697 -0.56469817 1.3048697 0 1 0
# 3 2020 a 0.36312841 2.2866454 0.36312841 2.2866454 0 0 1
# 4 2018 b 0.63286260 -1.3888607 0.63286260 -1.3888607 1 0 0
# 5 2019 b 0.40426832 -0.2787888 0.40426832 -0.2787888 0 1 0
# 6 2020 b -0.10612452 -0.1333213 -0.10612452 -0.1333213 0 0 1
# 7 2018 c 1.51152200 0.6359504 1.51152200 0.6359504 1 0 0
# 8 2019 c -0.09465904 -0.2842529 -0.09465904 -0.2842529 0 1 0
# 9 2020 c 2.01842371 -2.6564554 2.01842371 -2.6564554 0 0 1
# REGION_a REGION_b REGION_c
# 1 1 0 0
# 2 1 0 0
# 3 1 0 0
# 4 0 1 0
# 5 0 1 0
# 6 0 1 0
# 7 0 0 1
# 8 0 0 1
# 9 0 0 1
玩具资料
NHIS1.fake <- structure(list(YEAR = c(2018L, 2019L, 2020L, 2018L, 2019L, 2020L,
2018L, 2019L, 2020L), REGION = structure(c(1L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L), .Label = c("a", "b", "c"), class = "factor"),
X = c(1.37095844714667, -0.564698171396089, 0.363128411337339,
0.63286260496104, 0.404268323140999, -0.106124516091484,
1.51152199743894, -0.0946590384130976, 2.01842371387704),
Y = c(-0.062714099052421, 1.30486965422349, 2.28664539270111,
-1.38886070111234, -0.278788766817371, -0.133321336393658,
0.635950398070074, -0.284252921416072, -2.65645542090478)), class = "data.frame", row.names = c(NA,
-9L))
我编写了代码来为年份(2014、2015、2016、2017 或 2018)和地区(编码为地区 1、2、3 或 4)创建虚拟变量,以将其添加为我的数据中的新列帧NHIS1
。但是我现在正在尝试将我的作品编织成 R Markdown 文件,并收到无法找到对象 YEAR
的错误。我如何重写这行代码以获得与我的虚拟列相同的输出但没有错误消息?下面还有一张结果列的照片。
NHIS1 <- NHIS1 %>% mutate(YEAR = as.character(YEAR), REGION = as.character(REGION)) #turning data into integers to strings
df_test <- NHIS1 %>% select(YEAR, REGION) #subset to include 2 variables
results <- fastDummies::dummy_cols(df_test) #154-156 adding dummy columns, transposing row to columns with 9 new columns
results <- NHIS1 %>%
fastDummies::dummy_cols(select_columns = c(YEAR, REGION))
results <- results %>% select(-c(YEAR, REGION))
NHIS1 <-NHIS1 %>% bind_cols(results) #merging 9 variables with old data frame
你需要用引号,写c("YEAR", "REGION")
。
library(dplyr)
results <- NHIS1.fake %>%
fastDummies::dummy_cols(select_columns = c("YEAR", "REGION"))
results <- results %>% select(-c(YEAR, REGION))
NHIS1.fake <-NHIS1.fake %>% bind_cols(results) #merging 9 variables with old data frame
# YEAR REGION X Y X1 Y1 YEAR_2018 YEAR_2019 YEAR_2020
# 1 2018 a 1.37095845 -0.0627141 1.37095845 -0.0627141 1 0 0
# 2 2019 a -0.56469817 1.3048697 -0.56469817 1.3048697 0 1 0
# 3 2020 a 0.36312841 2.2866454 0.36312841 2.2866454 0 0 1
# 4 2018 b 0.63286260 -1.3888607 0.63286260 -1.3888607 1 0 0
# 5 2019 b 0.40426832 -0.2787888 0.40426832 -0.2787888 0 1 0
# 6 2020 b -0.10612452 -0.1333213 -0.10612452 -0.1333213 0 0 1
# 7 2018 c 1.51152200 0.6359504 1.51152200 0.6359504 1 0 0
# 8 2019 c -0.09465904 -0.2842529 -0.09465904 -0.2842529 0 1 0
# 9 2020 c 2.01842371 -2.6564554 2.01842371 -2.6564554 0 0 1
# REGION_a REGION_b REGION_c
# 1 1 0 0
# 2 1 0 0
# 3 1 0 0
# 4 0 1 0
# 5 0 1 0
# 6 0 1 0
# 7 0 0 1
# 8 0 0 1
# 9 0 0 1
玩具资料
NHIS1.fake <- structure(list(YEAR = c(2018L, 2019L, 2020L, 2018L, 2019L, 2020L,
2018L, 2019L, 2020L), REGION = structure(c(1L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L), .Label = c("a", "b", "c"), class = "factor"),
X = c(1.37095844714667, -0.564698171396089, 0.363128411337339,
0.63286260496104, 0.404268323140999, -0.106124516091484,
1.51152199743894, -0.0946590384130976, 2.01842371387704),
Y = c(-0.062714099052421, 1.30486965422349, 2.28664539270111,
-1.38886070111234, -0.278788766817371, -0.133321336393658,
0.635950398070074, -0.284252921416072, -2.65645542090478)), class = "data.frame", row.names = c(NA,
-9L))