合并两个可变列时,将 4 列融为 3 列
Melt 4 columns into 3 while combining the two variable columns
以下示例data.frame:
Date <- seq(as.Date("2016/9/1"), as.Date("2016/9/10"), "days")
A <- sample(0:200, 10)
B <- sample(0:400, 10)
A_limit <- rep(200,10)
B_limit <- rep(400,10)
data_sample <- data.frame(Date,A,B,A_limit,B_limit)
> Date A B A_limit B_limit
1 2016-09-01 175 270 200 400
2 2016-09-02 160 50 200 400
3 2016-09-03 173 25 200 400
...
我想将其重塑为以下形式:
> Date limit variable value
1 2016-09-01 200 A 175
2 2016-09-02 200 A 160
3 2016-09-03 200 A 173
...
31 2016-09-01 400 B 270
32 2016-09-02 400 B 50
33 2016-09-03 400 B 25
....
我设法完成了它,但在我看来我的方法太复杂了:
library("reshape2")
data_sample_2 <- data_sample %>% melt(id=c("Date","A","B"))
levels(data_sample_2$variable) <- c(levels(data_sample_2$variable),"A","B")
data_sample_2$variable[data_sample_2$variable == "A_limit"] <- as.factor("A")
data_sample_2$variable[data_sample_2$variable == "B_limit"] <- as.factor("B")
names(data_sample_2)[names(data_sample_2) == "value"] <- "limit"
names(data_sample_2)[names(data_sample_2) == "variable"] <- "variable_1"
data_sample_3 <- data_sample_2 %>% melt(id=c("Date","variable_1","limit"))
data_sample_3 <- droplevels(data_sample_3)
data_sample_4 <- data_sample_3[data_sample_3$variable_1 == data_sample_3$variable,]
data_sample_4$variable_1 <- NULL
我刚开始使用 reshape2
包,所以请让我知道我可以改进此 data.frame 转换的任何方法(无论它看起来多么明显)。
这是你想要的吗?
data_sample_2 <- melt(data_sample,id.vars=c("Date","A_limit","B_limit"))
data_sample_2$limit<- ifelse(data_sample_2$variable=="A",data_sample_2$A_limit,data_sample_2$B_limit)
data_sample_2[,c("Date","limit","variable","value")]
你可以通过基础 R 简单地通过 stack
ing 一切来做到这一点,即
df1 <- data.frame(Date = data_sample$Date, limit = stack(data_sample[-(1:3)])[[1]],
variable = stack(data_sample[2:3])[[2]],
value = stack(data_sample[2:3])[[1]],
stringsAsFactors = FALSE)
head(df1)
# Date limit variable value
#1 2016-09-01 200 A 67
#2 2016-09-02 200 A 100
#3 2016-09-03 200 A 166
#4 2016-09-04 200 A 116
#5 2016-09-05 200 A 89
#6 2016-09-06 200 A 138
tail(df1)
# Date limit variable value
#15 2016-09-05 400 B 208
#16 2016-09-06 400 B 387
#17 2016-09-07 400 B 125
#18 2016-09-08 400 B 116
#19 2016-09-09 400 B 120
#20 2016-09-10 400 B 241
由于您在示例中使用了 reshape2
,您可能会对如何在(更新的)tidyverse
设置中处理它感兴趣。
我将重复您的生成代码:
Date <- seq(as.Date("2016/9/1"), as.Date("2016/9/10"), "days")
A <- sample(0:200, 10)
B <- sample(0:400, 10)
A_limit <- rep(200,10)
B_limit <- rep(400,10)
data_sample <- data.frame(Date,A,B,A_limit,B_limit)
# Preview
head(data_sample)
#> Date A B A_limit B_limit
#> 1 2016-09-01 39 53 200 400
#> 2 2016-09-02 96 193 200 400
#> 3 2016-09-03 143 75 200 400
#> 4 2016-09-04 60 241 200 400
#> 5 2016-09-05 126 225 200 400
#> 6 2016-09-06 184 349 200 400
现在我们可以使用 dplyr
和 tidyr
(它们承担了 reshape2
的大部分职责)以 "clear" 的方式操作数据。
library(dplyr)
library(tidyr)
data_clean <- data_sample %>%
gather(variable, value, A, B) %>%
mutate(limit = if_else(variable == "A", A_limit, B_limit)) %>%
select(Date, limit, variable, value)
# Inspect results
head(data_clean)
#> Date limit variable value
#> 1 2016-09-01 200 A 39
#> 2 2016-09-02 200 A 96
#> 3 2016-09-03 200 A 143
#> 4 2016-09-04 200 A 60
#> 5 2016-09-05 200 A 126
#> 6 2016-09-06 200 A 184
以下示例data.frame:
Date <- seq(as.Date("2016/9/1"), as.Date("2016/9/10"), "days")
A <- sample(0:200, 10)
B <- sample(0:400, 10)
A_limit <- rep(200,10)
B_limit <- rep(400,10)
data_sample <- data.frame(Date,A,B,A_limit,B_limit)
> Date A B A_limit B_limit
1 2016-09-01 175 270 200 400
2 2016-09-02 160 50 200 400
3 2016-09-03 173 25 200 400
...
我想将其重塑为以下形式:
> Date limit variable value
1 2016-09-01 200 A 175
2 2016-09-02 200 A 160
3 2016-09-03 200 A 173
...
31 2016-09-01 400 B 270
32 2016-09-02 400 B 50
33 2016-09-03 400 B 25
....
我设法完成了它,但在我看来我的方法太复杂了:
library("reshape2")
data_sample_2 <- data_sample %>% melt(id=c("Date","A","B"))
levels(data_sample_2$variable) <- c(levels(data_sample_2$variable),"A","B")
data_sample_2$variable[data_sample_2$variable == "A_limit"] <- as.factor("A")
data_sample_2$variable[data_sample_2$variable == "B_limit"] <- as.factor("B")
names(data_sample_2)[names(data_sample_2) == "value"] <- "limit"
names(data_sample_2)[names(data_sample_2) == "variable"] <- "variable_1"
data_sample_3 <- data_sample_2 %>% melt(id=c("Date","variable_1","limit"))
data_sample_3 <- droplevels(data_sample_3)
data_sample_4 <- data_sample_3[data_sample_3$variable_1 == data_sample_3$variable,]
data_sample_4$variable_1 <- NULL
我刚开始使用 reshape2
包,所以请让我知道我可以改进此 data.frame 转换的任何方法(无论它看起来多么明显)。
这是你想要的吗?
data_sample_2 <- melt(data_sample,id.vars=c("Date","A_limit","B_limit"))
data_sample_2$limit<- ifelse(data_sample_2$variable=="A",data_sample_2$A_limit,data_sample_2$B_limit)
data_sample_2[,c("Date","limit","variable","value")]
你可以通过基础 R 简单地通过 stack
ing 一切来做到这一点,即
df1 <- data.frame(Date = data_sample$Date, limit = stack(data_sample[-(1:3)])[[1]],
variable = stack(data_sample[2:3])[[2]],
value = stack(data_sample[2:3])[[1]],
stringsAsFactors = FALSE)
head(df1)
# Date limit variable value
#1 2016-09-01 200 A 67
#2 2016-09-02 200 A 100
#3 2016-09-03 200 A 166
#4 2016-09-04 200 A 116
#5 2016-09-05 200 A 89
#6 2016-09-06 200 A 138
tail(df1)
# Date limit variable value
#15 2016-09-05 400 B 208
#16 2016-09-06 400 B 387
#17 2016-09-07 400 B 125
#18 2016-09-08 400 B 116
#19 2016-09-09 400 B 120
#20 2016-09-10 400 B 241
由于您在示例中使用了 reshape2
,您可能会对如何在(更新的)tidyverse
设置中处理它感兴趣。
我将重复您的生成代码:
Date <- seq(as.Date("2016/9/1"), as.Date("2016/9/10"), "days")
A <- sample(0:200, 10)
B <- sample(0:400, 10)
A_limit <- rep(200,10)
B_limit <- rep(400,10)
data_sample <- data.frame(Date,A,B,A_limit,B_limit)
# Preview
head(data_sample)
#> Date A B A_limit B_limit
#> 1 2016-09-01 39 53 200 400
#> 2 2016-09-02 96 193 200 400
#> 3 2016-09-03 143 75 200 400
#> 4 2016-09-04 60 241 200 400
#> 5 2016-09-05 126 225 200 400
#> 6 2016-09-06 184 349 200 400
现在我们可以使用 dplyr
和 tidyr
(它们承担了 reshape2
的大部分职责)以 "clear" 的方式操作数据。
library(dplyr)
library(tidyr)
data_clean <- data_sample %>%
gather(variable, value, A, B) %>%
mutate(limit = if_else(variable == "A", A_limit, B_limit)) %>%
select(Date, limit, variable, value)
# Inspect results
head(data_clean)
#> Date limit variable value
#> 1 2016-09-01 200 A 39
#> 2 2016-09-02 200 A 96
#> 3 2016-09-03 200 A 143
#> 4 2016-09-04 200 A 60
#> 5 2016-09-05 200 A 126
#> 6 2016-09-06 200 A 184