如何用几行代码将字符数组转换为 data.frame?
How can I transform an array of characters with a few lines of code to a data.frame?
我有以下数组
my_list <- c("Jan-01--Dec-31|00:00--24:00", "Jan-01--Jun-30|12:00--18:00",
"Jul-06--Dec-31|09:00--19:00")
导致以下结果的最短代码是什么:
x1 x2 x3
1 Jan-01 Jan-01 Jul-06
2 Dec-31 Jun-30 Dec-31
和
x2 x2 x3
1 00:00 12:00 09:00
2 24:00 18:00 19:00
目前我有(不是很好)代码
df <- as.data.frame(strsplit(my_list, split = "|", fixed = T),
stringsAsFactors = F)
date_list <- strsplit(as.character(df[1, ]), split = "--", fixed = T)
date_df <- as.data.frame(date_list, col.names = c(1:length(date_list)),
stringsAsFactors = F)
time_list <- strsplit(as.character(df[2, ]), split = "--", fixed = T)
time_df <- as.data.frame(time_list, col.names = c(1:length(date_list)),
stringsAsFactors = F)
我现在最好的是
date_list <- sapply(strsplit(schedule$schedule, split = "|", fixed = T), "[", 1)
date_df <- t(data.frame(x1=sapply(strsplit(df1, split = "--", fixed = T), "[", 1),
x2=sapply(strsplit(df1, split = "--", fixed = T), "[", 2),
stringsAsFactors = F))
# and similarly for time_list and time_df.
还有更优雅的吗?
data.table
包中的 tstrsplit
和 stringr
包中的 str_split_fixed
是在拆分字符串向量时获取正确形状数据的非常有用的函数;前者提供 transpose
的拆分字符串,无需使用 apply
函数即可单独提取日期和时间,后者将字符串拆分为指定列的矩阵:
library(data.table); library(stringr)
lapply(tstrsplit(my_list, "\|"), function(s) t(str_split_fixed(s, "--", 2)))
#[[1]]
# [,1] [,2] [,3]
#[1,] "Jan-01" "Jan-01" "Jul-06"
#[2,] "Dec-31" "Jun-30" "Dec-31"
#[[2]]
# [,1] [,2] [,3]
#[1,] "00:00" "12:00" "09:00"
#[2,] "24:00" "18:00" "19:00"
my_results <- sapply(strsplit(my_list,"|",fixed=T),function(x) strsplit(x,"--",fixed=T))
my_dates <- t(Reduce("rbind",myresults[1,]))
my_times <- t(Reduce("rbind",myresults[2,]))
使用 stringr
的另一种选择:
library(stringr)
a <- t(str_split_fixed(my_list, "\||--", 4))
# [,1] [,2] [,3]
#[1,] "Jan-01" "Jan-01" "Jul-06"
#[2,] "Dec-31" "Jun-30" "Dec-31"
#[3,] "00:00" "12:00" "09:00"
#[4,] "24:00" "18:00" "19:00"
要获得最终输出,data.frame(a[1:2,])
和 data.frame(a[3:4,])
更新
my_list <- "Jan-01--Dec-31|00:00--24:00"
a <- t(str_split_fixed(my_list, "\||--", 4))
[,1]
[1,] "Jan-01"
[2,] "Dec-31"
[3,] "00:00"
[4,] "24:00"
data.frame(a[1:2,])
a.1.2...
1 Jan-01
2 Dec-31
data.frame(a[3:4,])
a.3.4...
1 00:00
2 24:00
strsplit
接受可以在一次通过中进行拆分的 greppish 模式。然后可以使用 lapply
(或 sapply
)并以 setNames
.
结束
setNames( data.frame(lapply( strsplit( my_vec, split="\-\-|\|"), "[", 1:2) ), paste0("x",1:3) )
x1 x2 x3
1 Jan-01 Jan-01 Jul-06
2 Dec-31 Jun-30 Dec-31
显然可以通过在上面的代码中用 3:4 代替 1:2 来处理时间。
这里有一个base R
选项
lst <- strsplit(scan(text=my_list, sep="|", what ="", quiet=TRUE), "--")
do.call(cbind, lst[c(TRUE, FALSE)])
# [,1] [,2] [,3]
#[1,] "Jan-01" "Jan-01" "Jul-06"
#[2,] "Dec-31" "Jun-30" "Dec-31"
do.call(cbind, lst[c(FALSE, TRUE)])
# [,1] [,2] [,3]
#[1,] "00:00" "12:00" "09:00"
#[2,] "24:00" "18:00" "19:00"
或单行base R
选项
lapply(split(scan(text=my_list, sep="|", what ="", quiet=TRUE), 1:2),
function(x) do.call(cbind, strsplit(x, "--")))
#$`1`
# [,1] [,2] [,3]
#[1,] "Jan-01" "Jan-01" "Jul-06"
#[2,] "Dec-31" "Jun-30" "Dec-31"
#$`2`
# [,1] [,2] [,3]
#[1,] "00:00" "12:00" "09:00"
#[2,] "24:00" "18:00" "19:00"
我有以下数组
my_list <- c("Jan-01--Dec-31|00:00--24:00", "Jan-01--Jun-30|12:00--18:00",
"Jul-06--Dec-31|09:00--19:00")
导致以下结果的最短代码是什么:
x1 x2 x3
1 Jan-01 Jan-01 Jul-06
2 Dec-31 Jun-30 Dec-31
和
x2 x2 x3
1 00:00 12:00 09:00
2 24:00 18:00 19:00
目前我有(不是很好)代码
df <- as.data.frame(strsplit(my_list, split = "|", fixed = T),
stringsAsFactors = F)
date_list <- strsplit(as.character(df[1, ]), split = "--", fixed = T)
date_df <- as.data.frame(date_list, col.names = c(1:length(date_list)),
stringsAsFactors = F)
time_list <- strsplit(as.character(df[2, ]), split = "--", fixed = T)
time_df <- as.data.frame(time_list, col.names = c(1:length(date_list)),
stringsAsFactors = F)
我现在最好的是
date_list <- sapply(strsplit(schedule$schedule, split = "|", fixed = T), "[", 1)
date_df <- t(data.frame(x1=sapply(strsplit(df1, split = "--", fixed = T), "[", 1),
x2=sapply(strsplit(df1, split = "--", fixed = T), "[", 2),
stringsAsFactors = F))
# and similarly for time_list and time_df.
还有更优雅的吗?
data.table
包中的 tstrsplit
和 stringr
包中的 str_split_fixed
是在拆分字符串向量时获取正确形状数据的非常有用的函数;前者提供 transpose
的拆分字符串,无需使用 apply
函数即可单独提取日期和时间,后者将字符串拆分为指定列的矩阵:
library(data.table); library(stringr)
lapply(tstrsplit(my_list, "\|"), function(s) t(str_split_fixed(s, "--", 2)))
#[[1]]
# [,1] [,2] [,3]
#[1,] "Jan-01" "Jan-01" "Jul-06"
#[2,] "Dec-31" "Jun-30" "Dec-31"
#[[2]]
# [,1] [,2] [,3]
#[1,] "00:00" "12:00" "09:00"
#[2,] "24:00" "18:00" "19:00"
my_results <- sapply(strsplit(my_list,"|",fixed=T),function(x) strsplit(x,"--",fixed=T))
my_dates <- t(Reduce("rbind",myresults[1,]))
my_times <- t(Reduce("rbind",myresults[2,]))
使用 stringr
的另一种选择:
library(stringr)
a <- t(str_split_fixed(my_list, "\||--", 4))
# [,1] [,2] [,3]
#[1,] "Jan-01" "Jan-01" "Jul-06"
#[2,] "Dec-31" "Jun-30" "Dec-31"
#[3,] "00:00" "12:00" "09:00"
#[4,] "24:00" "18:00" "19:00"
要获得最终输出,data.frame(a[1:2,])
和 data.frame(a[3:4,])
更新
my_list <- "Jan-01--Dec-31|00:00--24:00"
a <- t(str_split_fixed(my_list, "\||--", 4))
[,1]
[1,] "Jan-01"
[2,] "Dec-31"
[3,] "00:00"
[4,] "24:00"
data.frame(a[1:2,])
a.1.2...
1 Jan-01
2 Dec-31
data.frame(a[3:4,])
a.3.4...
1 00:00
2 24:00
strsplit
接受可以在一次通过中进行拆分的 greppish 模式。然后可以使用 lapply
(或 sapply
)并以 setNames
.
setNames( data.frame(lapply( strsplit( my_vec, split="\-\-|\|"), "[", 1:2) ), paste0("x",1:3) )
x1 x2 x3
1 Jan-01 Jan-01 Jul-06
2 Dec-31 Jun-30 Dec-31
显然可以通过在上面的代码中用 3:4 代替 1:2 来处理时间。
这里有一个base R
选项
lst <- strsplit(scan(text=my_list, sep="|", what ="", quiet=TRUE), "--")
do.call(cbind, lst[c(TRUE, FALSE)])
# [,1] [,2] [,3]
#[1,] "Jan-01" "Jan-01" "Jul-06"
#[2,] "Dec-31" "Jun-30" "Dec-31"
do.call(cbind, lst[c(FALSE, TRUE)])
# [,1] [,2] [,3]
#[1,] "00:00" "12:00" "09:00"
#[2,] "24:00" "18:00" "19:00"
或单行base R
选项
lapply(split(scan(text=my_list, sep="|", what ="", quiet=TRUE), 1:2),
function(x) do.call(cbind, strsplit(x, "--")))
#$`1`
# [,1] [,2] [,3]
#[1,] "Jan-01" "Jan-01" "Jul-06"
#[2,] "Dec-31" "Jun-30" "Dec-31"
#$`2`
# [,1] [,2] [,3]
#[1,] "00:00" "12:00" "09:00"
#[2,] "24:00" "18:00" "19:00"