计算 R 中每一行的所有时间序列列之间的减少

Calculating the reduction between all time-series columns for each row in R

我尝试创建一个 data.frame,它显示 R 中每一行的所有时间序列列之间的减少。我只想捕获负差异(减少)并将正差异设置为零。

为了说明这一点,我创建了 df1 作为起点,并尝试实现 df2

library(data.table)
df1 = data.table(
  ID = c("a1", "a2", "a3", "a4", "a5", "a6", "a7"),
  "string1" = c("x2", "g3", "n2", "m3", "2w", "ps2", "kg2"),
  "2018" = c(3,5,11,3,9,22,6),
  "2019" = c(3,5,6,21,1,4,0),
  "2020" = c(0,4,13,9,16,7,9),
  "2021" = c(4,0,3,8,5,4,6),
  "string2" = c("si", "q2", "oq", "mx", "ix", "p2", "2q"))

   ID string1 2018 2019 2020 2021 string2
1: a1      x2    3    3    0    4      si
2: a2      g3    5    5    4    0      q2
3: a3      n2   11    6   13    3      oq
4: a4      m3    3   21    9    8      mx
5: a5      2w    9    1   16    5      ix
6: a6     ps2   22    4    7    4      p2
7: a7     kg2    6    0    9    6      2q
df2 = data.table(
  ID = c("a1", "a2", "a3", "a4", "a5", "a6", "a7"),
  "string1" = c("x2", "g3", "n2", "m3", "2w", "ps2", "kg2"),
  "2018" = c(0,0,0,0,0,0,0),
  "2019" = c(0,0,5,0,8,18,6),
  "2020" = c(3,1,0,12,0,0,0),
  "2021" = c(0,4,10,1,11,3,3),
  "string2" = c("si", "q2", "oq", "mx", "ix", "p2", "2q"))

   ID string1 2018 2019 2020 2021 string2
1: a1      x2    0    0    3    0      si
2: a2      g3    0    0    1    4      q2
3: a3      n2    0    5    0   10      oq
4: a4      m3    0    0   12    1      mx
5: a5      2w    0    8    0   11      ix
6: a6     ps2    0   18    0    3      p2
7: a7     kg2    0    6    0    3      2q

有没有比使用 for 循环迭代更有效的方法?

非常感谢!

df1.melt <- melt(df1, measure.vars = patterns("^[0-9]{4}$"), 
                 variable.factor = FALSE, variable.name = "year")
setkey(df1.melt, ID, year)
df1.melt[, val.diff := value - shift(value, type = "lag", fill = value[1]), by = .(ID)]
df1.melt[val.diff > 0, val.diff := 0]
df1.melt[val.diff < 0, val.diff := abs(val.diff)]
dcast(df1.melt, ID + string1 + string2 ~ year, value.var = "val.diff")
#    ID string1 string2 2018 2019 2020 2021
# 1: a1      x2      si    0    0    3    0
# 2: a2      g3      q2    0    0    1    4
# 3: a3      n2      oq    0    5    0   10
# 4: a4      m3      mx    0    0   12    1
# 5: a5      2w      ix    0    8    0   11
# 6: a6     ps2      p2    0   18    0    3
# 7: a7     kg2      2q    0    6    0    3
library(tidyr)
library(dplyr)

spec <- 
  df1 %>% 
  build_longer_spec(matches("\d{4}"),
                    names_to = "year", values_to = "value")

df1 %>% 
  pivot_longer_spec(spec) %>% 
  group_by(ID) %>% 
  arrange(ID, year) %>% 
  mutate(value = {
    diff = c(0, - diff(value))
    diff * (diff > 0)
  }) %>% 
  pivot_wider_spec(spec)

#> # A tibble: 7 x 7
#> # Groups:   ID [7]
#>   ID    string1 string2 `2018` `2019` `2020` `2021`
#>   <chr> <chr>   <chr>    <dbl>  <dbl>  <dbl>  <dbl>
#> 1 a1    x2      si           0      0      3      0
#> 2 a2    g3      q2           0      0      1      4
#> 3 a3    n2      oq           0      5      0     10
#> 4 a4    m3      mx           0      0     12      1
#> 5 a5    2w      ix           0      8      0     11
#> 6 a6    ps2     p2           0     18      0      3
#> 7 a7    kg2     2q           0      6      0      3

data.table 的更短解决方案:

df1[
    , 
    c(4:6) := df1[, 4:6] - df1[, 3:5]
  ][
    ,
    c(3:6) := lapply(.SD, \(x) { ifelse(x > 0, 0, abs(x)) })
    ,
    .SDcols = c(3:6)
  ][]

#    ID string1 2018 2019 2020 2021 string2
# 1: a1      x2    0    0    3    0      si
# 2: a2      g3    0    0    1    4      q2
# 3: a3      n2    0    5    0   10      oq
# 4: a4      m3    0    0   12    1      mx
# 5: a5      2w    0    8    0   11      ix
# 6: a6     ps2    0   18    0    3      p2
# 7: a7     kg2    0    6    0    3      2q