拆分重新排序
Strsplit reorder
**给定的是一个df **
df = data.frame(c("28A/38A/28C/00:05/00:05/00:05","93/00:20","93B/06:26","23A/87E/00:04/00:05","1A/38A/28C/28/00:05/00:10/01:05/00:20"))
**我想重新排序字符串并创建 4 个新列*
示例 1 有 1 个代码和 1 个时间
"93/00:20"
结果
Col_Code1 = 93
Col_Time1 = 00:20
示例 2 有 4 个代码和 4 个时间
"1A/38A/28C/28/00:05/00:10/01:05/00:20"
结果
Col_Code1 = 1A
Col_Time1 = 00:05
Col_Code2 = 38A
Col_Time2 = 00:10
Col_Code3 = 28C
Col_Time3 = 01:05
Col_Code4 = 28
Col_Time4 = 00:20
知道如何拆分字符串并根据上述模式创建新列吗?
这是我目前所拥有的,不幸的是它不区分字符串的长度,因此我在一列中有时间值和代码值。
未排序结果
df = c("28A/38A/28C/00:05/00:05/00:05","93/00:20","93B/06:26","23A/87E/00:04/00:05","1A/38A/28C/28/00:05/00:10/01:05/00:20")
current_df <- df %>%
str_split(pattern = "/",simplify = TRUE) %>%
as_tibble() %>%
bind_cols()
想要的结果
df = c("28A/00:05/38A/00:05/28C/00:05","93/00:20","93B/06:26","23A/00:04/87E/00:05","1A/00:05/38A/00:10/28C/01:05/28/00:20")
desired_df <- df %>%
str_split(pattern = "/",simplify = TRUE) %>%
as_tibble() %>%
bind_cols()
提前致谢
先把结果拆分成长格式,再合并。
ss = strsplit(as.character(df[[1]]), split = "/")
ss = lapply(ss, function(x) {
d = data.frame(matrix(x, ncol = 2), stringsAsFactors = F)
d = setNames(d, c("Col_Code", "Col_Time"))
d$index = 1:nrow(d)
d
})
dlong = dplyr::bind_rows(ss, .id = "id")
head(dlong)
# id Col_Code Col_Time index
# 1 1 28A 00:05 1
# 2 1 38A 00:05 2
# 3 1 28C 00:05 3
# 4 2 93 00:20 1
# 5 3 93B 06:26 1
# 6 4 23A 00:04 1
我认为这是一个很好的整洁格式,对于很多用途,你最好停在那里。但是,如果需要,这里是继续的一般方法:
library(reshape2)
ww = list()
for (vv in c("Col_Code", "Col_Time")) {
d1 = dcast(dlong, id ~ index, value.var = vv)
names(d1)[-1] = paste0(vv, names(d1)[-1])
ww[[vv]] = d1
}
result = Reduce(f = merge, ww)
head(result)
# id Col_Code1 Col_Code2 Col_Code3 Col_Code4 Col_Time1 Col_Time2 Col_Time3 Col_Time4
# 1 1 28A 38A 28C <NA> 00:05 00:05 00:05 <NA>
# 2 2 93 <NA> <NA> <NA> 00:20 <NA> <NA> <NA>
# 3 3 93B <NA> <NA> <NA> 06:26 <NA> <NA> <NA>
# 4 4 23A 87E <NA> <NA> 00:04 00:05 <NA> <NA>
# 5 5 1A 38A 28C 28 00:05 00:10 01:05 00:20
使用您的数据和软件包 stringr
、magrittr
的友好帮助进行了测试
lapply(df,str_split,pattern="/") %>% unlist(.,recursive=F) %>% lapply(.,function(x){split(x,rep(0:1,each=length(x)/2))}) %>%
lapply(.,function(x)do.call(cbind,x)) %>% Reduce(rbind,.)
输出
0 1
[1,] "28A" "00:05"
[2,] "38A" "00:05"
[3,] "28C" "00:05"
[4,] "93" "00:20"
[5,] "93B" "06:26"
[6,] "23A" "00:04"
[7,] "87E" "00:05"
[8,] "1A" "00:05"
[9,] "38A" "00:10"
[10,] "28C" "01:05"
[11,] "28" "00:20"
此处使用 tidyverse
将您的数据转换为长格式:
library(tidyverse)
df %>%
extract(col1, c("col1", "col2"), regex = "((?:\w+/?)+)/((?:\d{2}:\d{2}/?)+)") %>%
map_dfc(~ as.tibble(.) %>% mutate(ID = row_number()) %>% separate_rows(1, sep = "/")) %>%
select(ID, starts_with("value"))
结果:
# A tibble: 11 x 3
ID value value1
<int> <chr> <chr>
1 1 28A 00:05
2 1 38A 00:05
3 1 28C 00:05
4 2 93 00:20
5 3 93B 06:26
6 4 23A 00:04
7 4 87E 00:05
8 5 1A 00:05
9 5 38A 00:10
10 5 28C 01:05
11 5 28 00:20
数据:
df = data.frame(col1 = c("28A/38A/28C/00:05/00:05/00:05","93/00:20","93B/06:26",
"23A/87E/00:04/00:05","1A/38A/28C/28/00:05/00:10/01:05/00:20"),
stringsAsFactors = FALSE)
**给定的是一个df **
df = data.frame(c("28A/38A/28C/00:05/00:05/00:05","93/00:20","93B/06:26","23A/87E/00:04/00:05","1A/38A/28C/28/00:05/00:10/01:05/00:20"))
**我想重新排序字符串并创建 4 个新列*
示例 1 有 1 个代码和 1 个时间
"93/00:20"
结果
Col_Code1 = 93
Col_Time1 = 00:20
示例 2 有 4 个代码和 4 个时间
"1A/38A/28C/28/00:05/00:10/01:05/00:20"
结果
Col_Code1 = 1A
Col_Time1 = 00:05
Col_Code2 = 38A
Col_Time2 = 00:10
Col_Code3 = 28C
Col_Time3 = 01:05
Col_Code4 = 28
Col_Time4 = 00:20
知道如何拆分字符串并根据上述模式创建新列吗?
这是我目前所拥有的,不幸的是它不区分字符串的长度,因此我在一列中有时间值和代码值。
未排序结果
df = c("28A/38A/28C/00:05/00:05/00:05","93/00:20","93B/06:26","23A/87E/00:04/00:05","1A/38A/28C/28/00:05/00:10/01:05/00:20")
current_df <- df %>%
str_split(pattern = "/",simplify = TRUE) %>%
as_tibble() %>%
bind_cols()
想要的结果
df = c("28A/00:05/38A/00:05/28C/00:05","93/00:20","93B/06:26","23A/00:04/87E/00:05","1A/00:05/38A/00:10/28C/01:05/28/00:20")
desired_df <- df %>%
str_split(pattern = "/",simplify = TRUE) %>%
as_tibble() %>%
bind_cols()
提前致谢
先把结果拆分成长格式,再合并。
ss = strsplit(as.character(df[[1]]), split = "/")
ss = lapply(ss, function(x) {
d = data.frame(matrix(x, ncol = 2), stringsAsFactors = F)
d = setNames(d, c("Col_Code", "Col_Time"))
d$index = 1:nrow(d)
d
})
dlong = dplyr::bind_rows(ss, .id = "id")
head(dlong)
# id Col_Code Col_Time index
# 1 1 28A 00:05 1
# 2 1 38A 00:05 2
# 3 1 28C 00:05 3
# 4 2 93 00:20 1
# 5 3 93B 06:26 1
# 6 4 23A 00:04 1
我认为这是一个很好的整洁格式,对于很多用途,你最好停在那里。但是,如果需要,这里是继续的一般方法:
library(reshape2)
ww = list()
for (vv in c("Col_Code", "Col_Time")) {
d1 = dcast(dlong, id ~ index, value.var = vv)
names(d1)[-1] = paste0(vv, names(d1)[-1])
ww[[vv]] = d1
}
result = Reduce(f = merge, ww)
head(result)
# id Col_Code1 Col_Code2 Col_Code3 Col_Code4 Col_Time1 Col_Time2 Col_Time3 Col_Time4
# 1 1 28A 38A 28C <NA> 00:05 00:05 00:05 <NA>
# 2 2 93 <NA> <NA> <NA> 00:20 <NA> <NA> <NA>
# 3 3 93B <NA> <NA> <NA> 06:26 <NA> <NA> <NA>
# 4 4 23A 87E <NA> <NA> 00:04 00:05 <NA> <NA>
# 5 5 1A 38A 28C 28 00:05 00:10 01:05 00:20
使用您的数据和软件包 stringr
、magrittr
lapply(df,str_split,pattern="/") %>% unlist(.,recursive=F) %>% lapply(.,function(x){split(x,rep(0:1,each=length(x)/2))}) %>%
lapply(.,function(x)do.call(cbind,x)) %>% Reduce(rbind,.)
输出
0 1
[1,] "28A" "00:05"
[2,] "38A" "00:05"
[3,] "28C" "00:05"
[4,] "93" "00:20"
[5,] "93B" "06:26"
[6,] "23A" "00:04"
[7,] "87E" "00:05"
[8,] "1A" "00:05"
[9,] "38A" "00:10"
[10,] "28C" "01:05"
[11,] "28" "00:20"
此处使用 tidyverse
将您的数据转换为长格式:
library(tidyverse)
df %>%
extract(col1, c("col1", "col2"), regex = "((?:\w+/?)+)/((?:\d{2}:\d{2}/?)+)") %>%
map_dfc(~ as.tibble(.) %>% mutate(ID = row_number()) %>% separate_rows(1, sep = "/")) %>%
select(ID, starts_with("value"))
结果:
# A tibble: 11 x 3
ID value value1
<int> <chr> <chr>
1 1 28A 00:05
2 1 38A 00:05
3 1 28C 00:05
4 2 93 00:20
5 3 93B 06:26
6 4 23A 00:04
7 4 87E 00:05
8 5 1A 00:05
9 5 38A 00:10
10 5 28C 01:05
11 5 28 00:20
数据:
df = data.frame(col1 = c("28A/38A/28C/00:05/00:05/00:05","93/00:20","93B/06:26",
"23A/87E/00:04/00:05","1A/38A/28C/28/00:05/00:10/01:05/00:20"),
stringsAsFactors = FALSE)