如果 r 中的值变为负值,则重置 cumsum

resetting cumsum if value goes to negative in r

ve <- c(17, -9, 9, -17, 17, -17, 11, -9, 16, -18, 17, 0, 0, -18, 17, 0, 0, -17, 14, -14, 17, -2, 0, -15, 9, -9, 17, -16, 16, -17, 17, -17, 17, -17, 17, -17, 17, -8, 7, -16, 17, -14, 14, -10, 10, -16, 16, -10, 10, -12, 12, -11, 11, -17, 17, -17, 17, -9, 8, -17, 17, -17, 17, -16, 16, -17, 17, -8, 8, -9, 9, -17, 17, -17, 17, -13, 13, -10, 7, -10, 13, -16, 17, -13, 13, -13, 13, -9, 8, -17, 17, -10, 9, -17, 17, -17, 17, -16, 16, -10, 10, -15, 15, -14, 14, -14, 15, -13, 13, -9, 9, -13, 13, -12, 12, -10, 9, -11, 12, -8, 7, -10, 10, -9, 9, -11, 11, -9, 9, -7, 7, -12, 11, -11, 12, -11, 11, -14, 14, -13, 13, -10, 10, -13, 13, -17, 17, -7, 7, -17, 17, -17, 17, -14, 14, NA)

df <- data.frame(ve = ve, calc = 0)

我需要在计算列中计算 cumsum,但它需要重置为零并在其值变为负数时重新开始。. 我已经尝试了几种条件,但它并没有真正起作用...

另外,在dplyr中可以实现吗?我是 dplyr 的新手,每当我需要使用相关值时发现它有点困难..

感谢您的帮助!

它应该像..

     ve calc
1    17    17
2    -9    8
3     9    17
4   -17    0
5    17    17
6   -17    0
7    11    11
8    -9    2
9    16    18
10  -18    0
11   17    17
12    0    17
13    0    17
14  -18    0
15   17    17

如果您看到第 14 行和第 15 行,对于正常的 cumsum,它将是 -1 和 16 但我希望它重置为 0 而不是 -1 并继续 cumsum,因此下一个将是 17

我们可以 replace NA 值为 0 并使用 cumsum

library(dplyr)
df1 <- df %>%
      group_by(grp = cumsum(lag(cumsum(replace(ve, is.na(ve), 0)) < 0, default = TRUE))) %>%
     mutate(calc = cumsum(replace(ve, is.na(ve), 0)), calc = replace(calc, calc < 0, 0)) %>%
      ungroup() %>%
      select(-grp)
head(df1, 15)
# A tibble: 15 x 2
#      ve  calc
#   <dbl> <dbl>
# 1    17    17
# 2    -9     8
# 3     9    17
# 4   -17     0
# 5    17    17
# 6   -17     0
# 7    11    11
# 8    -9     2
# 9    16    18
#10   -18     0
#11    17    17
#12     0    17
#13     0    17
#14   -18     0
#15    17    17

未使用 dplyr,但这应该有效:

ve = as.data.frame(ve)
ve = na.omit(ve)
ve$cumS = 0
ve$cumS[1] = ve$ve[1]

for (i in 2 : length(ve$ve)) {

ve$cumS[i] = ifelse((ve$cumS[i - 1] + ve$ve[i]) < 0,
                     0, (ve$cumS[i - 1] + ve$ve[i]))
}

这是一个迭代解决方案。如果不多次传递数据,我想不出如何做到这一点 vectorized/using dplyr,但我相信其他人会:

ve_csum = numeric(length(ve))

current_total = 0
for (i in 1:length(ve)) {
    if (is.na(ve[i])) {
        ve_csum[i] = current_total
        next
    }
    current_total = current_total + ve[i]
    if (current_total < 0) {
        current_total = 0
    }
    ve_csum[i] = current_total
}

result = data.frame(ve, ve_csum)
> df$calc=ifelse(cumsum(df$ve)<0,0,cumsum(df$ve))

base R 的 Reducepurrr::accumulate 专为这些场景设计

df$calc <- Reduce(\(.x, .y) ifelse(.x + .y < 0, 0, .x + .y), df$ve, accumulate = TRUE)
df
#>      ve calc
#> 1    17   17
#> 2    -9    8
#> 3     9   17
#> 4   -17    0
#> 5    17   17
#> 6   -17    0
#> 7    11   11
#> 8    -9    2
#> 9    16   18
#> 10  -18    0
#> 11   17   17
#> 12    0   17
#> 13    0   17
#> 14  -18    0
#> 15   17   17
.
.
.

或在purrr

library(purrr)
library(dplyr)

df %>% mutate(calc = accumulate(ve,  ~ ifelse(.x + .y < 0, 0, .x + .y)))