将二进制 "multiple response" 数据重组为分类数据
Restructure binary "multiple response" data to categorical
我想将一些“多重响应”调查数据从二元类别重组为名义类别。
该调查询问受访者他们最常与之互动的十个人,并给出了一个包含 50 个名字的列表。返回的数据有 50 列,每个姓名一列,每个单元格中的姓名值对应每个 selected 的姓名,未 selected 的姓名空白。我想将五十列转换为十列(name1 到 name10)。
下面是我所说的(为简单起见)5 个名字的示例,其中此人必须 select 两个名字和五个响应者。
id <- 1:5
mike <- c("","mike","","","mike")
tim <- c("tim","","tim","","")
mary <- c("mary","mary","mary","","")
jane <- c("","","","jane","jane")
liz <- c("","","","liz","")
surveyData <- data.frame(id,mike,tim,mary,jane,liz)
Name1 <- c("tim","mike","tim","jane","mike")
Name2 <- c("mary","mary","mary","liz","jane")
restructuredSurveyData <- data.frame(id,Name1,Name2)
replace
''
与 NA
和 apply
na.omit
.
cbind(surveyData[1], `colnames<-`(t(apply(replace(surveyData[-1],
surveyData[-1] == '', NA), 1,
na.omit)), paste0('name_', 1:2)))
# id name_1 name_2
# 1 1 tim mary
# 2 2 mike mary
# 3 3 tim mary
# 4 4 jane liz
# 5 5 mike jane
最近被宠坏的眼睛可能更喜欢这个:
replace(surveyData[-1], surveyData[-1] == '', NA) |>
apply(1, na.omit) |>
t() |>
`colnames<-`(paste0('name_', 1:2)) |>
cbind(surveyData[1]) |>
subset(select=c('id', 'name_1', 'name_2'))
# id name_1 name_2
# 1 1 tim mary
# 2 2 mike mary
# 3 3 tim mary
# 4 4 jane liz
# 5 5 mike jane
注意: R >= 4.1 使用。
另一种可能的解决方案,基于tidyverse
:
library(tidyverse)
surveyData %>%
pivot_longer(-id) %>%
filter(value != "") %>%
mutate(nam = if_else(row_number() %% 2 == 1, "names1", "names2")) %>%
pivot_wider(id, names_from = nam)
#> # A tibble: 5 × 3
#> id names1 names2
#> <int> <chr> <chr>
#> 1 1 tim mary
#> 2 2 mike mary
#> 3 3 tim mary
#> 4 4 jane liz
#> 5 5 mike jane
或使用purrr::pmap_df
:
library(tidyverse)
pmap_df(surveyData[-1], ~ str_c(c(...)[c(...) != ""], collapse = ",") %>%
set_names("names")) %>%
separate(names, into = str_c("names", 1:2), sep = ",") %>%
bind_cols(select(surveyData, id), .)
#> id names1 names2
#> 1 1 tim mary
#> 2 2 mike mary
#> 3 3 tim mary
#> 4 4 jane liz
#> 5 5 mike jane
我想将一些“多重响应”调查数据从二元类别重组为名义类别。
该调查询问受访者他们最常与之互动的十个人,并给出了一个包含 50 个名字的列表。返回的数据有 50 列,每个姓名一列,每个单元格中的姓名值对应每个 selected 的姓名,未 selected 的姓名空白。我想将五十列转换为十列(name1 到 name10)。
下面是我所说的(为简单起见)5 个名字的示例,其中此人必须 select 两个名字和五个响应者。
id <- 1:5
mike <- c("","mike","","","mike")
tim <- c("tim","","tim","","")
mary <- c("mary","mary","mary","","")
jane <- c("","","","jane","jane")
liz <- c("","","","liz","")
surveyData <- data.frame(id,mike,tim,mary,jane,liz)
Name1 <- c("tim","mike","tim","jane","mike")
Name2 <- c("mary","mary","mary","liz","jane")
restructuredSurveyData <- data.frame(id,Name1,Name2)
replace
''
与 NA
和 apply
na.omit
.
cbind(surveyData[1], `colnames<-`(t(apply(replace(surveyData[-1],
surveyData[-1] == '', NA), 1,
na.omit)), paste0('name_', 1:2)))
# id name_1 name_2
# 1 1 tim mary
# 2 2 mike mary
# 3 3 tim mary
# 4 4 jane liz
# 5 5 mike jane
最近被宠坏的眼睛可能更喜欢这个:
replace(surveyData[-1], surveyData[-1] == '', NA) |>
apply(1, na.omit) |>
t() |>
`colnames<-`(paste0('name_', 1:2)) |>
cbind(surveyData[1]) |>
subset(select=c('id', 'name_1', 'name_2'))
# id name_1 name_2
# 1 1 tim mary
# 2 2 mike mary
# 3 3 tim mary
# 4 4 jane liz
# 5 5 mike jane
注意: R >= 4.1 使用。
另一种可能的解决方案,基于tidyverse
:
library(tidyverse)
surveyData %>%
pivot_longer(-id) %>%
filter(value != "") %>%
mutate(nam = if_else(row_number() %% 2 == 1, "names1", "names2")) %>%
pivot_wider(id, names_from = nam)
#> # A tibble: 5 × 3
#> id names1 names2
#> <int> <chr> <chr>
#> 1 1 tim mary
#> 2 2 mike mary
#> 3 3 tim mary
#> 4 4 jane liz
#> 5 5 mike jane
或使用purrr::pmap_df
:
library(tidyverse)
pmap_df(surveyData[-1], ~ str_c(c(...)[c(...) != ""], collapse = ",") %>%
set_names("names")) %>%
separate(names, into = str_c("names", 1:2), sep = ",") %>%
bind_cols(select(surveyData, id), .)
#> id names1 names2
#> 1 1 tim mary
#> 2 2 mike mary
#> 3 3 tim mary
#> 4 4 jane liz
#> 5 5 mike jane