如何增加每一行的变量值

how to increase the value of a variable for each row

我有这个数据框,

dat= rep(c("A", "B", "C"),3)
tat=c(rep("ttt", 6), rep("aaa", 6), rep("ddd", 6))
pct=c(14,7,12,8,11,13,19,6,9,11,13,20,11,18,6,9,10,13)
data=data.frame(dat, tat, pct) %>% group_by(tat) %>% mutate(max= max(pct))

> data
# A tibble: 18 x 4
# Groups:   tat [3]
   dat   tat     pct   max
   <chr> <chr> <dbl> <dbl>
 1 A     ttt      14    14
 2 B     ttt       7    14
 3 C     ttt      12    14
 4 A     ttt       8    14
 5 B     ttt      11    14
 6 C     ttt      13    14
 7 A     aaa      19    20
 8 B     aaa       6    20
 9 C     aaa       9    20
10 A     aaa      11    20
11 B     aaa      13    20
12 C     aaa      20    20
13 A     ddd      11    18
14 B     ddd      18    18
15 C     ddd       6    18
16 A     ddd       9    18
17 B     ddd      10    18
18 C     ddd      13    18

我想创建另一个变量,将每行的 max 值增加 1(按变量 tat 分组)。请在下面找到我想要的示例。 有人有想法吗?

> data2
# A tibble: 18 x 5
# Groups:   tat [3]
   dat   tat     pct   max  ...5
   <chr> <chr> <dbl> <dbl> <dbl>
 1 A     ttt      14    14    14
 2 B     ttt       7    14    15
 3 C     ttt      12    14    16
 4 A     ttt       8    14    17
 5 B     ttt      11    14    18
 6 C     ttt      13    14    19
 7 A     aaa      19    20    20
 8 B     aaa       6    20    21
 9 C     aaa       9    20    22
10 A     aaa      11    20    23
11 B     aaa      13    20    24
12 C     aaa      20    20    25
13 A     ddd      11    18    18
14 B     ddd      18    18    19
15 C     ddd       6    18    20
16 A     ddd       9    18    21
17 B     ddd      10    18    22
18 C     ddd      13    18    23


这个有用吗:

library(dplyr)

data %>% group_by(tat) %>% mutate(c5 = max + row_number() - 1)
# A tibble: 18 x 5
# Groups:   tat [3]
   dat   tat     pct   max    c5
   <chr> <chr> <dbl> <dbl> <dbl>
 1 A     ttt      14    14    14
 2 B     ttt       7    14    15
 3 C     ttt      12    14    16
 4 A     ttt       8    14    17
 5 B     ttt      11    14    18
 6 C     ttt      13    14    19
 7 A     aaa      19    20    20
 8 B     aaa       6    20    21
 9 C     aaa       9    20    22
10 A     aaa      11    20    23
11 B     aaa      13    20    24
12 C     aaa      20    20    25
13 A     ddd      11    18    18
14 B     ddd      18    18    19
15 C     ddd       6    18    20
16 A     ddd       9    18    21
17 B     ddd      10    18    22
18 C     ddd      13    18    23

当by变量被排序后,那么你可以在base R 4.1.0或更高版本中进行如下操作:

datf |> within(
  max_plus <- unlist(tapply(pct, factor(tat, unique(tat)), 
                            function(x) max(x) + seq_along(x) - 1)))
#R>    dat tat pct max_plus
#R> 1    A ttt  14       14
#R> 2    B ttt   7       15
#R> 3    C ttt  12       16
#R> 4    A ttt   8       17
#R> 5    B ttt  11       18
#R> 6    C ttt  13       19
#R> 7    A aaa  19       20
#R> 8    B aaa   6       21
#R> 9    C aaa   9       22
#R> 10   A aaa  11       23
#R> 11   B aaa  13       24
#R> 12   C aaa  20       25
#R> 13   A ddd  11       18
#R> 14   B ddd  18       19
#R> 15   C ddd   6       20
#R> 16   A ddd   9       21
#R> 17   B ddd  10       22
#R> 18   C ddd  13       23

在评论中关注onyambu's回答,您可以使用ave:

dat= rep(c("A", "B", "C"),3)
tat=c(rep("ttt", 6), rep("aaa", 6), rep("ddd", 6))
pct=c(14,7,12,8,11,13,19,6,9,11,13,20,11,18,6,9,10,13)
datf <- data.frame(dat, tat, pct)

transform(datf, max_plus = 
            ave(pct, tat, FUN = function(x) max(x) + seq_along(x) - 1))
#R>    dat tat pct max_plus
#R> 1    A ttt  14       14
#R> 2    B ttt   7       15
#R> 3    C ttt  12       16
#R> 4    A ttt   8       17
#R> 5    B ttt  11       18
#R> 6    C ttt  13       19
#R> 7    A aaa  19       20
#R> 8    B aaa   6       21
#R> 9    C aaa   9       22
#R> 10   A aaa  11       23
#R> 11   B aaa  13       24
#R> 12   C aaa  20       25
#R> 13   A ddd  11       18
#R> 14   B ddd  18       19
#R> 15   C ddd   6       20
#R> 16   A ddd   9       21
#R> 17   B ddd  10       22
#R> 18   C ddd  13       23

使用data.table

library(data.table)
setDT(data)[, c5 := max + (seq_len(.N) - 1), tat]
> data
    dat tat pct max c5
 1:   A ttt  14  14 14
 2:   B ttt   7  14 15
 3:   C ttt  12  14 16
 4:   A ttt   8  14 17
 5:   B ttt  11  14 18
 6:   C ttt  13  14 19
 7:   A aaa  19  20 20
 8:   B aaa   6  20 21
 9:   C aaa   9  20 22
10:   A aaa  11  20 23
11:   B aaa  13  20 24
12:   C aaa  20  20 25
13:   A ddd  11  18 18
14:   B ddd  18  18 19
15:   C ddd   6  18 20
16:   A ddd   9  18 21
17:   B ddd  10  18 22
18:   C ddd  13  18 23

使用 ave + seq_along

的基础 R 选项
> transform(
+     data,
+     c5 = ave(max, tat, FUN = seq_along) - 1 + max
+ )
   dat tat pct max c5
1    A ttt  14  14 14
2    B ttt   7  14 15
3    C ttt  12  14 16
4    A ttt   8  14 17
5    B ttt  11  14 18
6    C ttt  13  14 19
7    A aaa  19  20 20
8    B aaa   6  20 21
9    C aaa   9  20 22
10   A aaa  11  20 23
11   B aaa  13  20 24
12   C aaa  20  20 25
13   A ddd  11  18 18
14   B ddd  18  18 19
15   C ddd   6  18 20
16   A ddd   9  18 21
17   B ddd  10  18 22
18   C ddd  13  18 23