以编程方式重新排序数据框而不是对其进行子集化
Reorder dataframe programmatically instead of subseting it
我正在尝试将数据帧转换为特定格式,以便将其从 R 中导出并在 AMPL 中使用。
我的初始数据框如下
test <- structure(list(from = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
to = c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3), beta = c(0.0214674078064637,
0.0205966237172006, 0.0197611613089226, 0.0214674078064637,
0.0205966237172006, 0.0197611613089226, 0.0214674078064637,
0.0205966237172006, 0.0197611613089226, 0.0214674078064637,
0.0205966237172006, 0.0197611613089226), Time = c(0L, 0L,
0L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L)), class = "data.frame", row.names = c(NA,
-12L), .Names = c("from", "to", "beta", "Time"))
这是测试data.frame
from to beta Time
1 1 1 0.02146741 0
2 1 2 0.02059662 0
3 1 3 0.01976116 0
4 1 1 0.02146741 1
5 1 2 0.02059662 1
6 1 3 0.01976116 1
7 1 1 0.02146741 2
8 1 2 0.02059662 2
9 1 3 0.01976116 2
10 1 1 0.02146741 3
11 1 2 0.02059662 3
12 1 3 0.01976116 3
我想要的最终结果是这个:
V T1 0 T2 1 T3 2 T4 3 line
1 [1,1,*] 0 0.02146741 1 0.02146741 2 0.02146741 3 0.02146741 \n
2 [1,2,*] 0 0.02059662 1 0.02059662 2 0.02059662 3 0.02059662 \n
3 [1,3,*] 0 0.01976116 1 0.01976116 2 0.01976116 3 0.01976116 \n
我现在的做法是这样的:
Betas <- unite_(test, col = "V", sep = ",", from = c("from", "to"))
Betas <- spread(Betas, key = Time, value = beta)
Betas$V <- paste("[", Betas$V, ",*]", sep = "")
Betas$T1 <- 0
Betas$T2 <- 1
Betas$T3 <- 2
Betas$T4 <- 3
Betas <- Betas[,c(1,6,2,7,3,8,4,9,5)]
Betas$line <- "\n"
我喜欢前 3 行代码的过程
Betas <- unite_(test, col = "V", sep = ",", from = c("from", "to"))
Betas <- spread(Betas, key = Time, value = beta)
Betas$V <- paste("[", Betas$V, ",*]", sep = "")
但我想替换下面的行,原因是时间列可以重复 X 次。现在时间总是从 0 到 X,我怎么能以编程方式知道 X 的值呢?
Betas$T1 <- 0
Betas$T2 <- 1
Betas$T3 <- 2
Betas$T4 <- 3
Betas <- Betas[,c(1,6,2,7,3,8,4,9,5)]
Betas$line <- "\n"
基础 R 解决方案
temp = split(test, test$Time)
do.call(cbind, lapply(1:length(temp), function(i){
if (i == 1){
setNames(data.frame(paste(temp[[i]][["from"]], ",", temp[[i]][["to"]], ",*", sep = ""), temp[[i]]["Time"], temp[[i]]["beta"]),
c("V", paste("T", i, sep = ""), i-1))
} else if (i == length(temp)){
setNames(data.frame(temp[[i]]["Time"], temp[[i]]["beta"], rep("\n", NROW(temp[[i]]))),
c(paste("T", i, sep = ""), i-1, "line"))
} else {
setNames(data.frame(temp[[i]]["Time"], temp[[i]]["beta"]),
c(paste("T", i, sep = ""), i-1))
}
}))
# V T1 0 T2 1 T3 2 T4 3 line
#1 1,1,* 0 0.02146741 1 0.02146741 2 0.02146741 3 0.02146741 \n
#2 1,2,* 0 0.02059662 1 0.02059662 2 0.02059662 3 0.02059662 \n
#3 1,3,* 0 0.01976116 1 0.01976116 2 0.01976116 3 0.01976116 \n
来自 tidyverse
的解决方案。 Betas2
是最终输出。
# Load package
library(tidyverse)
# Create example data frame
Betas <- unite_(test, col = "V", sep = ",", from = c("from", "to"))
Betas <- spread(Betas, key = Time, value = beta)
Betas$V <- paste("[", Betas$V, ",*]", sep = "")
# A function to split the data frame
split_df <- function(Begin, End, dt){
dt2 <- dt %>% select(Begin, End)
return(dt2)
}
# A function to Add Time and values
add_time <- function(dt) {
# Extract column names and value
Colname <- colnames(dt)
Value <- as.numeric(Colname[2])
Value2 <- Value + 1
Value <- enquo(Value)
Value2 <- enquo(Value2)
Col <- paste0("T", quo_name(Value2))
# Add column based on the Colname
dt2 <- dt %>%
mutate(!!Col := !!Value) %>%
select(1, 3, 2)
return(dt2)
}
# Apply functions
dt_list <- map2(1, 2:ncol(Betas), .f = split_df, dt = Betas)
dt_list2 <- map(dt_list, .f = add_time)
# Merge all data frames
Betas2 <- reduce(dt_list2, left_join, by = "V") %>%
# Create the line column
mutate(line = "\n")
这是我的看法:
library(dplyr)
NR <- 3
df <- as.data.frame(cbind(paste0("[1,", 1:NR , ",*]"),
matrix(test$beta,nrow=NR ),
matrix(test$Time,nrow=NR ),
rep("\n",NR ))) %>%
select(V1,V6,V2,V7,V3,V8,V4,V9,V5,V10)
newnames <- c("V","T1","0","T2","1","T3","2","T4","3","line")
colnames(df) <- newnames
我正在尝试将数据帧转换为特定格式,以便将其从 R 中导出并在 AMPL 中使用。
我的初始数据框如下
test <- structure(list(from = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
to = c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3), beta = c(0.0214674078064637,
0.0205966237172006, 0.0197611613089226, 0.0214674078064637,
0.0205966237172006, 0.0197611613089226, 0.0214674078064637,
0.0205966237172006, 0.0197611613089226, 0.0214674078064637,
0.0205966237172006, 0.0197611613089226), Time = c(0L, 0L,
0L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L)), class = "data.frame", row.names = c(NA,
-12L), .Names = c("from", "to", "beta", "Time"))
这是测试data.frame
from to beta Time
1 1 1 0.02146741 0
2 1 2 0.02059662 0
3 1 3 0.01976116 0
4 1 1 0.02146741 1
5 1 2 0.02059662 1
6 1 3 0.01976116 1
7 1 1 0.02146741 2
8 1 2 0.02059662 2
9 1 3 0.01976116 2
10 1 1 0.02146741 3
11 1 2 0.02059662 3
12 1 3 0.01976116 3
我想要的最终结果是这个:
V T1 0 T2 1 T3 2 T4 3 line
1 [1,1,*] 0 0.02146741 1 0.02146741 2 0.02146741 3 0.02146741 \n
2 [1,2,*] 0 0.02059662 1 0.02059662 2 0.02059662 3 0.02059662 \n
3 [1,3,*] 0 0.01976116 1 0.01976116 2 0.01976116 3 0.01976116 \n
我现在的做法是这样的:
Betas <- unite_(test, col = "V", sep = ",", from = c("from", "to"))
Betas <- spread(Betas, key = Time, value = beta)
Betas$V <- paste("[", Betas$V, ",*]", sep = "")
Betas$T1 <- 0
Betas$T2 <- 1
Betas$T3 <- 2
Betas$T4 <- 3
Betas <- Betas[,c(1,6,2,7,3,8,4,9,5)]
Betas$line <- "\n"
我喜欢前 3 行代码的过程
Betas <- unite_(test, col = "V", sep = ",", from = c("from", "to"))
Betas <- spread(Betas, key = Time, value = beta)
Betas$V <- paste("[", Betas$V, ",*]", sep = "")
但我想替换下面的行,原因是时间列可以重复 X 次。现在时间总是从 0 到 X,我怎么能以编程方式知道 X 的值呢?
Betas$T1 <- 0
Betas$T2 <- 1
Betas$T3 <- 2
Betas$T4 <- 3
Betas <- Betas[,c(1,6,2,7,3,8,4,9,5)]
Betas$line <- "\n"
基础 R 解决方案
temp = split(test, test$Time)
do.call(cbind, lapply(1:length(temp), function(i){
if (i == 1){
setNames(data.frame(paste(temp[[i]][["from"]], ",", temp[[i]][["to"]], ",*", sep = ""), temp[[i]]["Time"], temp[[i]]["beta"]),
c("V", paste("T", i, sep = ""), i-1))
} else if (i == length(temp)){
setNames(data.frame(temp[[i]]["Time"], temp[[i]]["beta"], rep("\n", NROW(temp[[i]]))),
c(paste("T", i, sep = ""), i-1, "line"))
} else {
setNames(data.frame(temp[[i]]["Time"], temp[[i]]["beta"]),
c(paste("T", i, sep = ""), i-1))
}
}))
# V T1 0 T2 1 T3 2 T4 3 line
#1 1,1,* 0 0.02146741 1 0.02146741 2 0.02146741 3 0.02146741 \n
#2 1,2,* 0 0.02059662 1 0.02059662 2 0.02059662 3 0.02059662 \n
#3 1,3,* 0 0.01976116 1 0.01976116 2 0.01976116 3 0.01976116 \n
来自 tidyverse
的解决方案。 Betas2
是最终输出。
# Load package
library(tidyverse)
# Create example data frame
Betas <- unite_(test, col = "V", sep = ",", from = c("from", "to"))
Betas <- spread(Betas, key = Time, value = beta)
Betas$V <- paste("[", Betas$V, ",*]", sep = "")
# A function to split the data frame
split_df <- function(Begin, End, dt){
dt2 <- dt %>% select(Begin, End)
return(dt2)
}
# A function to Add Time and values
add_time <- function(dt) {
# Extract column names and value
Colname <- colnames(dt)
Value <- as.numeric(Colname[2])
Value2 <- Value + 1
Value <- enquo(Value)
Value2 <- enquo(Value2)
Col <- paste0("T", quo_name(Value2))
# Add column based on the Colname
dt2 <- dt %>%
mutate(!!Col := !!Value) %>%
select(1, 3, 2)
return(dt2)
}
# Apply functions
dt_list <- map2(1, 2:ncol(Betas), .f = split_df, dt = Betas)
dt_list2 <- map(dt_list, .f = add_time)
# Merge all data frames
Betas2 <- reduce(dt_list2, left_join, by = "V") %>%
# Create the line column
mutate(line = "\n")
这是我的看法:
library(dplyr)
NR <- 3
df <- as.data.frame(cbind(paste0("[1,", 1:NR , ",*]"),
matrix(test$beta,nrow=NR ),
matrix(test$Time,nrow=NR ),
rep("\n",NR ))) %>%
select(V1,V6,V2,V7,V3,V8,V4,V9,V5,V10)
newnames <- c("V","T1","0","T2","1","T3","2","T4","3","line")
colnames(df) <- newnames