在没有定量数据的情况下将数据重塑为宽
Reshaping data to wide without quantitative data
我想我了解一般整形。但是,我有需要重塑为宽格式的数据,但我不想显示由另一个变量索引的分数或定量数据。
相反,我想根据其值将一个变量从单个变量切换为五个变量。不应索引其他变量。我希望一个变量的值形成其他五个变量,并且这些变量的值应该与它们的变量名称相同。
我已经包括了前后对比的例子。
数据:
> dput(ansscales3)
structure(list(ATID = c(33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
33, 33, 33), AnswerTypeDesc = c("VS|S|SD|VD", "VS|S|SD|VD", "VS|S|SD|VD",
"VS|S|SD|VD", "VS|S|SD|VD", "VS|S|SD|VD", "VS|S|SD|VD", "VS|S|SD|VD",
"VS|S|SD|VD", "VS|S|SD|VD", "VS|S|SD|VD", "VS|S|SD|VD", "VS|S|SD|VD"
), AValue = c(4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 4), ScaleValue = c(1,
2, 3, 4, 1, 2, 3, 4, 3, 4, 1, 2, 1), ADesc = c("Very Satisfied",
"Satisfied", "Somewhat Dissatisfied", "Very Dissatisfied", "Very Satisfied",
"Satisfied", "Somewhat Dissatisfied", "Very Dissatisfied", "Somewhat Dissatisfied",
"Very Dissatisfied", "Very Satisfied", "Satisfied", "Very Satisfied"
), AOrder = c(1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 1, 2, 1), StatGroup = c("AdjN",
"AdjN", "AdjN", "AdjN", "N", "N", "N", "N", "PctNeg", "PctNeg",
"PctPos", "PctPos", "TopBox"), Cycles = c(11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11)), .Names = c("ATID", "AnswerTypeDesc",
"AValue", "ScaleValue", "ADesc", "AOrder", "StatGroup", "Cycles"
), row.names = c(NA, -13L), class = "data.frame")
重塑为:
> dput(atids1)
structure(list(ATID = c(33, 33, 33, 33), AnswerTypeDesc = structure(c(1L,
1L, 1L, 1L), .Label = "VS|S|SD|VD", class = "factor"), AValue = c(4,
3, 2, 1), ScaleValue = c(1, 2, 3, 4), ADesc = c("Very Satisfied",
"Satisfied", "Somewhat Dissatisfied", "Very Dissatisfied"), AOrder = c(1,
2, 3, 4), Cycles = c(11, 11, 11, 11), N = c("N", "N", "N", "N"
), AdjN = c("AdjN", "AdjN", "AdjN", "AdjN"), PctPos = c("PctPos",
"PctPos", "", ""), PctNeg = c("", "", "PctNeg", "PctNeg"), TopBox = c("TopBox",
"", "", "")), .Names = c("ATID", "AnswerTypeDesc", "AValue",
"ScaleValue", "ADesc", "AOrder", "Cycles", "N", "AdjN", "PctPos",
"PctNeg", "TopBox"), row.names = c(NA, -4L), class = "data.frame")
我确定这很简单,但不幸的是我无法使用重塑方法弄清楚。
这是一个 reshape2
的解决方案
(注意:我加载了 dplyr
来触发 %>%
运算符,但这只是样式代码的个人选择)
library(reshape2)
library(dplyr)
dat1 %>% dcast(... ~ StatGroup, value.var = "StatGroup", fill = "")
ATID AnswerTypeDesc AValue ScaleValue ADesc AOrder Cycles AdjN N PctNeg PctPos TopBox
1 33 VS|S|SD|VD 1 4 Very Dissatisfied 4 11 AdjN N PctNeg
2 33 VS|S|SD|VD 2 3 Somewhat Dissatisfied 3 11 AdjN N PctNeg
3 33 VS|S|SD|VD 3 2 Satisfied 2 11 AdjN N PctPos
4 33 VS|S|SD|VD 4 1 Very Satisfied 1 11 AdjN N PctPos TopBox
另一个解决方案tidyr
library(tidyr)
dat1 %>% spread(StatGroup, StatGroup, fill = "")
ATID AnswerTypeDesc AValue ScaleValue ADesc AOrder Cycles AdjN N PctNeg PctPos TopBox
1 33 VS|S|SD|VD 1 4 Very Dissatisfied 4 11 AdjN N PctNeg
2 33 VS|S|SD|VD 2 3 Somewhat Dissatisfied 3 11 AdjN N PctNeg
3 33 VS|S|SD|VD 3 2 Satisfied 2 11 AdjN N PctPos
4 33 VS|S|SD|VD 4 1 Very Satisfied 1 11 AdjN N PctPos TopBox
我想我了解一般整形。但是,我有需要重塑为宽格式的数据,但我不想显示由另一个变量索引的分数或定量数据。
相反,我想根据其值将一个变量从单个变量切换为五个变量。不应索引其他变量。我希望一个变量的值形成其他五个变量,并且这些变量的值应该与它们的变量名称相同。
我已经包括了前后对比的例子。
数据:
> dput(ansscales3)
structure(list(ATID = c(33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
33, 33, 33), AnswerTypeDesc = c("VS|S|SD|VD", "VS|S|SD|VD", "VS|S|SD|VD",
"VS|S|SD|VD", "VS|S|SD|VD", "VS|S|SD|VD", "VS|S|SD|VD", "VS|S|SD|VD",
"VS|S|SD|VD", "VS|S|SD|VD", "VS|S|SD|VD", "VS|S|SD|VD", "VS|S|SD|VD"
), AValue = c(4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 4), ScaleValue = c(1,
2, 3, 4, 1, 2, 3, 4, 3, 4, 1, 2, 1), ADesc = c("Very Satisfied",
"Satisfied", "Somewhat Dissatisfied", "Very Dissatisfied", "Very Satisfied",
"Satisfied", "Somewhat Dissatisfied", "Very Dissatisfied", "Somewhat Dissatisfied",
"Very Dissatisfied", "Very Satisfied", "Satisfied", "Very Satisfied"
), AOrder = c(1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 1, 2, 1), StatGroup = c("AdjN",
"AdjN", "AdjN", "AdjN", "N", "N", "N", "N", "PctNeg", "PctNeg",
"PctPos", "PctPos", "TopBox"), Cycles = c(11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11)), .Names = c("ATID", "AnswerTypeDesc",
"AValue", "ScaleValue", "ADesc", "AOrder", "StatGroup", "Cycles"
), row.names = c(NA, -13L), class = "data.frame")
重塑为:
> dput(atids1)
structure(list(ATID = c(33, 33, 33, 33), AnswerTypeDesc = structure(c(1L,
1L, 1L, 1L), .Label = "VS|S|SD|VD", class = "factor"), AValue = c(4,
3, 2, 1), ScaleValue = c(1, 2, 3, 4), ADesc = c("Very Satisfied",
"Satisfied", "Somewhat Dissatisfied", "Very Dissatisfied"), AOrder = c(1,
2, 3, 4), Cycles = c(11, 11, 11, 11), N = c("N", "N", "N", "N"
), AdjN = c("AdjN", "AdjN", "AdjN", "AdjN"), PctPos = c("PctPos",
"PctPos", "", ""), PctNeg = c("", "", "PctNeg", "PctNeg"), TopBox = c("TopBox",
"", "", "")), .Names = c("ATID", "AnswerTypeDesc", "AValue",
"ScaleValue", "ADesc", "AOrder", "Cycles", "N", "AdjN", "PctPos",
"PctNeg", "TopBox"), row.names = c(NA, -4L), class = "data.frame")
我确定这很简单,但不幸的是我无法使用重塑方法弄清楚。
这是一个 reshape2
(注意:我加载了 dplyr
来触发 %>%
运算符,但这只是样式代码的个人选择)
library(reshape2)
library(dplyr)
dat1 %>% dcast(... ~ StatGroup, value.var = "StatGroup", fill = "")
ATID AnswerTypeDesc AValue ScaleValue ADesc AOrder Cycles AdjN N PctNeg PctPos TopBox
1 33 VS|S|SD|VD 1 4 Very Dissatisfied 4 11 AdjN N PctNeg
2 33 VS|S|SD|VD 2 3 Somewhat Dissatisfied 3 11 AdjN N PctNeg
3 33 VS|S|SD|VD 3 2 Satisfied 2 11 AdjN N PctPos
4 33 VS|S|SD|VD 4 1 Very Satisfied 1 11 AdjN N PctPos TopBox
另一个解决方案tidyr
library(tidyr)
dat1 %>% spread(StatGroup, StatGroup, fill = "")
ATID AnswerTypeDesc AValue ScaleValue ADesc AOrder Cycles AdjN N PctNeg PctPos TopBox
1 33 VS|S|SD|VD 1 4 Very Dissatisfied 4 11 AdjN N PctNeg
2 33 VS|S|SD|VD 2 3 Somewhat Dissatisfied 3 11 AdjN N PctNeg
3 33 VS|S|SD|VD 3 2 Satisfied 2 11 AdjN N PctPos
4 33 VS|S|SD|VD 4 1 Very Satisfied 1 11 AdjN N PctPos TopBox