如何增加每一行的变量值
how to increase the value of a variable for each row
我有这个数据框,
dat= rep(c("A", "B", "C"),3)
tat=c(rep("ttt", 6), rep("aaa", 6), rep("ddd", 6))
pct=c(14,7,12,8,11,13,19,6,9,11,13,20,11,18,6,9,10,13)
data=data.frame(dat, tat, pct) %>% group_by(tat) %>% mutate(max= max(pct))
> data
# A tibble: 18 x 4
# Groups: tat [3]
dat tat pct max
<chr> <chr> <dbl> <dbl>
1 A ttt 14 14
2 B ttt 7 14
3 C ttt 12 14
4 A ttt 8 14
5 B ttt 11 14
6 C ttt 13 14
7 A aaa 19 20
8 B aaa 6 20
9 C aaa 9 20
10 A aaa 11 20
11 B aaa 13 20
12 C aaa 20 20
13 A ddd 11 18
14 B ddd 18 18
15 C ddd 6 18
16 A ddd 9 18
17 B ddd 10 18
18 C ddd 13 18
我想创建另一个变量,将每行的 max 值增加 1(按变量 tat 分组)。请在下面找到我想要的示例。
有人有想法吗?
> data2
# A tibble: 18 x 5
# Groups: tat [3]
dat tat pct max ...5
<chr> <chr> <dbl> <dbl> <dbl>
1 A ttt 14 14 14
2 B ttt 7 14 15
3 C ttt 12 14 16
4 A ttt 8 14 17
5 B ttt 11 14 18
6 C ttt 13 14 19
7 A aaa 19 20 20
8 B aaa 6 20 21
9 C aaa 9 20 22
10 A aaa 11 20 23
11 B aaa 13 20 24
12 C aaa 20 20 25
13 A ddd 11 18 18
14 B ddd 18 18 19
15 C ddd 6 18 20
16 A ddd 9 18 21
17 B ddd 10 18 22
18 C ddd 13 18 23
这个有用吗:
library(dplyr)
data %>% group_by(tat) %>% mutate(c5 = max + row_number() - 1)
# A tibble: 18 x 5
# Groups: tat [3]
dat tat pct max c5
<chr> <chr> <dbl> <dbl> <dbl>
1 A ttt 14 14 14
2 B ttt 7 14 15
3 C ttt 12 14 16
4 A ttt 8 14 17
5 B ttt 11 14 18
6 C ttt 13 14 19
7 A aaa 19 20 20
8 B aaa 6 20 21
9 C aaa 9 20 22
10 A aaa 11 20 23
11 B aaa 13 20 24
12 C aaa 20 20 25
13 A ddd 11 18 18
14 B ddd 18 18 19
15 C ddd 6 18 20
16 A ddd 9 18 21
17 B ddd 10 18 22
18 C ddd 13 18 23
当by变量被排序后,那么你可以在base R 4.1.0或更高版本中进行如下操作:
datf |> within(
max_plus <- unlist(tapply(pct, factor(tat, unique(tat)),
function(x) max(x) + seq_along(x) - 1)))
#R> dat tat pct max_plus
#R> 1 A ttt 14 14
#R> 2 B ttt 7 15
#R> 3 C ttt 12 16
#R> 4 A ttt 8 17
#R> 5 B ttt 11 18
#R> 6 C ttt 13 19
#R> 7 A aaa 19 20
#R> 8 B aaa 6 21
#R> 9 C aaa 9 22
#R> 10 A aaa 11 23
#R> 11 B aaa 13 24
#R> 12 C aaa 20 25
#R> 13 A ddd 11 18
#R> 14 B ddd 18 19
#R> 15 C ddd 6 20
#R> 16 A ddd 9 21
#R> 17 B ddd 10 22
#R> 18 C ddd 13 23
在评论中关注onyambu's回答,您可以使用ave
:
dat= rep(c("A", "B", "C"),3)
tat=c(rep("ttt", 6), rep("aaa", 6), rep("ddd", 6))
pct=c(14,7,12,8,11,13,19,6,9,11,13,20,11,18,6,9,10,13)
datf <- data.frame(dat, tat, pct)
transform(datf, max_plus =
ave(pct, tat, FUN = function(x) max(x) + seq_along(x) - 1))
#R> dat tat pct max_plus
#R> 1 A ttt 14 14
#R> 2 B ttt 7 15
#R> 3 C ttt 12 16
#R> 4 A ttt 8 17
#R> 5 B ttt 11 18
#R> 6 C ttt 13 19
#R> 7 A aaa 19 20
#R> 8 B aaa 6 21
#R> 9 C aaa 9 22
#R> 10 A aaa 11 23
#R> 11 B aaa 13 24
#R> 12 C aaa 20 25
#R> 13 A ddd 11 18
#R> 14 B ddd 18 19
#R> 15 C ddd 6 20
#R> 16 A ddd 9 21
#R> 17 B ddd 10 22
#R> 18 C ddd 13 23
使用data.table
library(data.table)
setDT(data)[, c5 := max + (seq_len(.N) - 1), tat]
> data
dat tat pct max c5
1: A ttt 14 14 14
2: B ttt 7 14 15
3: C ttt 12 14 16
4: A ttt 8 14 17
5: B ttt 11 14 18
6: C ttt 13 14 19
7: A aaa 19 20 20
8: B aaa 6 20 21
9: C aaa 9 20 22
10: A aaa 11 20 23
11: B aaa 13 20 24
12: C aaa 20 20 25
13: A ddd 11 18 18
14: B ddd 18 18 19
15: C ddd 6 18 20
16: A ddd 9 18 21
17: B ddd 10 18 22
18: C ddd 13 18 23
使用 ave
+ seq_along
的基础 R 选项
> transform(
+ data,
+ c5 = ave(max, tat, FUN = seq_along) - 1 + max
+ )
dat tat pct max c5
1 A ttt 14 14 14
2 B ttt 7 14 15
3 C ttt 12 14 16
4 A ttt 8 14 17
5 B ttt 11 14 18
6 C ttt 13 14 19
7 A aaa 19 20 20
8 B aaa 6 20 21
9 C aaa 9 20 22
10 A aaa 11 20 23
11 B aaa 13 20 24
12 C aaa 20 20 25
13 A ddd 11 18 18
14 B ddd 18 18 19
15 C ddd 6 18 20
16 A ddd 9 18 21
17 B ddd 10 18 22
18 C ddd 13 18 23
我有这个数据框,
dat= rep(c("A", "B", "C"),3)
tat=c(rep("ttt", 6), rep("aaa", 6), rep("ddd", 6))
pct=c(14,7,12,8,11,13,19,6,9,11,13,20,11,18,6,9,10,13)
data=data.frame(dat, tat, pct) %>% group_by(tat) %>% mutate(max= max(pct))
> data
# A tibble: 18 x 4
# Groups: tat [3]
dat tat pct max
<chr> <chr> <dbl> <dbl>
1 A ttt 14 14
2 B ttt 7 14
3 C ttt 12 14
4 A ttt 8 14
5 B ttt 11 14
6 C ttt 13 14
7 A aaa 19 20
8 B aaa 6 20
9 C aaa 9 20
10 A aaa 11 20
11 B aaa 13 20
12 C aaa 20 20
13 A ddd 11 18
14 B ddd 18 18
15 C ddd 6 18
16 A ddd 9 18
17 B ddd 10 18
18 C ddd 13 18
我想创建另一个变量,将每行的 max 值增加 1(按变量 tat 分组)。请在下面找到我想要的示例。 有人有想法吗?
> data2
# A tibble: 18 x 5
# Groups: tat [3]
dat tat pct max ...5
<chr> <chr> <dbl> <dbl> <dbl>
1 A ttt 14 14 14
2 B ttt 7 14 15
3 C ttt 12 14 16
4 A ttt 8 14 17
5 B ttt 11 14 18
6 C ttt 13 14 19
7 A aaa 19 20 20
8 B aaa 6 20 21
9 C aaa 9 20 22
10 A aaa 11 20 23
11 B aaa 13 20 24
12 C aaa 20 20 25
13 A ddd 11 18 18
14 B ddd 18 18 19
15 C ddd 6 18 20
16 A ddd 9 18 21
17 B ddd 10 18 22
18 C ddd 13 18 23
这个有用吗:
library(dplyr)
data %>% group_by(tat) %>% mutate(c5 = max + row_number() - 1)
# A tibble: 18 x 5
# Groups: tat [3]
dat tat pct max c5
<chr> <chr> <dbl> <dbl> <dbl>
1 A ttt 14 14 14
2 B ttt 7 14 15
3 C ttt 12 14 16
4 A ttt 8 14 17
5 B ttt 11 14 18
6 C ttt 13 14 19
7 A aaa 19 20 20
8 B aaa 6 20 21
9 C aaa 9 20 22
10 A aaa 11 20 23
11 B aaa 13 20 24
12 C aaa 20 20 25
13 A ddd 11 18 18
14 B ddd 18 18 19
15 C ddd 6 18 20
16 A ddd 9 18 21
17 B ddd 10 18 22
18 C ddd 13 18 23
当by变量被排序后,那么你可以在base R 4.1.0或更高版本中进行如下操作:
datf |> within(
max_plus <- unlist(tapply(pct, factor(tat, unique(tat)),
function(x) max(x) + seq_along(x) - 1)))
#R> dat tat pct max_plus
#R> 1 A ttt 14 14
#R> 2 B ttt 7 15
#R> 3 C ttt 12 16
#R> 4 A ttt 8 17
#R> 5 B ttt 11 18
#R> 6 C ttt 13 19
#R> 7 A aaa 19 20
#R> 8 B aaa 6 21
#R> 9 C aaa 9 22
#R> 10 A aaa 11 23
#R> 11 B aaa 13 24
#R> 12 C aaa 20 25
#R> 13 A ddd 11 18
#R> 14 B ddd 18 19
#R> 15 C ddd 6 20
#R> 16 A ddd 9 21
#R> 17 B ddd 10 22
#R> 18 C ddd 13 23
在评论中关注onyambu's回答,您可以使用ave
:
dat= rep(c("A", "B", "C"),3)
tat=c(rep("ttt", 6), rep("aaa", 6), rep("ddd", 6))
pct=c(14,7,12,8,11,13,19,6,9,11,13,20,11,18,6,9,10,13)
datf <- data.frame(dat, tat, pct)
transform(datf, max_plus =
ave(pct, tat, FUN = function(x) max(x) + seq_along(x) - 1))
#R> dat tat pct max_plus
#R> 1 A ttt 14 14
#R> 2 B ttt 7 15
#R> 3 C ttt 12 16
#R> 4 A ttt 8 17
#R> 5 B ttt 11 18
#R> 6 C ttt 13 19
#R> 7 A aaa 19 20
#R> 8 B aaa 6 21
#R> 9 C aaa 9 22
#R> 10 A aaa 11 23
#R> 11 B aaa 13 24
#R> 12 C aaa 20 25
#R> 13 A ddd 11 18
#R> 14 B ddd 18 19
#R> 15 C ddd 6 20
#R> 16 A ddd 9 21
#R> 17 B ddd 10 22
#R> 18 C ddd 13 23
使用data.table
library(data.table)
setDT(data)[, c5 := max + (seq_len(.N) - 1), tat]
> data
dat tat pct max c5
1: A ttt 14 14 14
2: B ttt 7 14 15
3: C ttt 12 14 16
4: A ttt 8 14 17
5: B ttt 11 14 18
6: C ttt 13 14 19
7: A aaa 19 20 20
8: B aaa 6 20 21
9: C aaa 9 20 22
10: A aaa 11 20 23
11: B aaa 13 20 24
12: C aaa 20 20 25
13: A ddd 11 18 18
14: B ddd 18 18 19
15: C ddd 6 18 20
16: A ddd 9 18 21
17: B ddd 10 18 22
18: C ddd 13 18 23
使用 ave
+ seq_along
> transform(
+ data,
+ c5 = ave(max, tat, FUN = seq_along) - 1 + max
+ )
dat tat pct max c5
1 A ttt 14 14 14
2 B ttt 7 14 15
3 C ttt 12 14 16
4 A ttt 8 14 17
5 B ttt 11 14 18
6 C ttt 13 14 19
7 A aaa 19 20 20
8 B aaa 6 20 21
9 C aaa 9 20 22
10 A aaa 11 20 23
11 B aaa 13 20 24
12 C aaa 20 20 25
13 A ddd 11 18 18
14 B ddd 18 18 19
15 C ddd 6 18 20
16 A ddd 9 18 21
17 B ddd 10 18 22
18 C ddd 13 18 23