将 ID 分配给连续的组列 r
Assign ID to consecutive groups column r
我想在 data.frame 中生成一个列,用于计算组的连续 ID(虚拟 df 中的 s 列)
dummy_df = data.frame(s = c("a", "a", "b","b", "b", "c","c", "a", "a", "c", "c","a","a"),
desired_output= c(1,1,1,1,1,1,1,2,2,2,2,3,3))
dummy_df$rleid_output = rleid(dummy_df$s)
dummy_df
s desired_output rleid_output
1 a 1 1
2 a 1 1
3 b 1 2
4 b 1 2
5 b 1 2
6 c 1 3
7 c 1 3
8 a 2 4
9 a 2 4
10 c 2 5
11 c 2 5
12 a 3 6
13 a 3 6
我会说它类似于 rleid()
所做的,但在看到新组时重新开始计数。但是,我找不到以如此直接的方式做到这一点的方法。谢谢
您可以尝试将 tidyverse
与基础 R rle
函数结合使用
library(tidyverse)
rle(dummy_df$s) %>%
with(., data.frame(a=.$length, b=.$value)) %>%
group_by(b) %>%
mutate(n = 1:n()) %>%
with(., rep(n, times=a)) %>%
bind_cols(dummy_df, res=.)
s desired_output res
1 a 1 1
2 a 1 1
3 b 1 1
4 b 1 1
5 b 1 1
6 c 1 1
7 c 1 1
8 a 2 2
9 a 2 2
10 c 2 2
11 c 2 2
12 a 3 3
13 a 3 3
你可以这样做:
dummy_df$out <- with(rle(dummy_df$s), rep(ave(lengths, values, FUN = seq_along), lengths))
结果:
s desired_output out
1 a 1 1
2 a 1 1
3 b 1 1
4 b 1 1
5 b 1 1
6 c 1 1
7 c 1 1
8 a 2 2
9 a 2 2
10 c 2 2
11 c 2 2
12 a 3 3
13 a 3 3
如果您愿意使用data.table
(rleid
是软件包的一部分),您可以分两步完成:
library(data.table)
dummy_df = data.frame(s = c("a", "a", "b", "b", "b", "c", "c", "a", "a", "c", "c", "a", "a"))
# cast data.frame to data.table
setDT(dummy_df)
# create auxiliary variable
dummy_df[, rleid_output := rleid(s)]
# obtain desired output
dummy_df[, desired_output := rleid(rleid_output), by = "s"]
# end result
dummy_df
#> s rleid_output desired_output
#> 1: a 1 1
#> 2: a 1 1
#> 3: b 2 1
#> 4: b 2 1
#> 5: b 2 1
#> 6: c 3 1
#> 7: c 3 1
#> 8: a 4 2
#> 9: a 4 2
#> 10: c 5 2
#> 11: c 5 2
#> 12: a 6 3
#> 13: a 6 3
由 reprex package (v0.3.0)
于 2020-10-16 创建
我想在 data.frame 中生成一个列,用于计算组的连续 ID(虚拟 df 中的 s 列)
dummy_df = data.frame(s = c("a", "a", "b","b", "b", "c","c", "a", "a", "c", "c","a","a"),
desired_output= c(1,1,1,1,1,1,1,2,2,2,2,3,3))
dummy_df$rleid_output = rleid(dummy_df$s)
dummy_df
s desired_output rleid_output
1 a 1 1
2 a 1 1
3 b 1 2
4 b 1 2
5 b 1 2
6 c 1 3
7 c 1 3
8 a 2 4
9 a 2 4
10 c 2 5
11 c 2 5
12 a 3 6
13 a 3 6
我会说它类似于 rleid()
所做的,但在看到新组时重新开始计数。但是,我找不到以如此直接的方式做到这一点的方法。谢谢
您可以尝试将 tidyverse
与基础 R rle
函数结合使用
library(tidyverse)
rle(dummy_df$s) %>%
with(., data.frame(a=.$length, b=.$value)) %>%
group_by(b) %>%
mutate(n = 1:n()) %>%
with(., rep(n, times=a)) %>%
bind_cols(dummy_df, res=.)
s desired_output res
1 a 1 1
2 a 1 1
3 b 1 1
4 b 1 1
5 b 1 1
6 c 1 1
7 c 1 1
8 a 2 2
9 a 2 2
10 c 2 2
11 c 2 2
12 a 3 3
13 a 3 3
你可以这样做:
dummy_df$out <- with(rle(dummy_df$s), rep(ave(lengths, values, FUN = seq_along), lengths))
结果:
s desired_output out
1 a 1 1
2 a 1 1
3 b 1 1
4 b 1 1
5 b 1 1
6 c 1 1
7 c 1 1
8 a 2 2
9 a 2 2
10 c 2 2
11 c 2 2
12 a 3 3
13 a 3 3
如果您愿意使用data.table
(rleid
是软件包的一部分),您可以分两步完成:
library(data.table)
dummy_df = data.frame(s = c("a", "a", "b", "b", "b", "c", "c", "a", "a", "c", "c", "a", "a"))
# cast data.frame to data.table
setDT(dummy_df)
# create auxiliary variable
dummy_df[, rleid_output := rleid(s)]
# obtain desired output
dummy_df[, desired_output := rleid(rleid_output), by = "s"]
# end result
dummy_df
#> s rleid_output desired_output
#> 1: a 1 1
#> 2: a 1 1
#> 3: b 2 1
#> 4: b 2 1
#> 5: b 2 1
#> 6: c 3 1
#> 7: c 3 1
#> 8: a 4 2
#> 9: a 4 2
#> 10: c 5 2
#> 11: c 5 2
#> 12: a 6 3
#> 13: a 6 3
由 reprex package (v0.3.0)
于 2020-10-16 创建