当 cumsum 达到一定值时重置它
Reset cumsum as it reaches certain value
我想在向量达到特定值时重置 cumsum
。
例如对于以下向量:
v <- c(3, 5, 2, 5, 3, 4, 5, 3, 1, 4)
预期输出为:
c(0, 0, 10, 0, 0, 22, 0, 30, 0, 0)
使用 reset <- 10
我可以将任务减少到标记完整整数后的第一个值:
res <- cumsum(v)
resd <- res/reset
resd
# [1] 0.3 0.8 1.0 1.5 1.8 2.2 2.7 3.0 3.1 3.5
预期输出是这样的:
c(F, F, T, F, F, T, F, T, F, F) # or
c(0, 0, 1.0, 0, 0, 2.2, 0, 3.0, 0, 0)
我需要一种快速计算其中之一的方法。
v <- c(3, 5, 2, 5, 3, 4, 5, 3, 1, 4)
a <- cumsum(v)
c(a[1] >= 10, a[-1] %/% 10 > a[-length(a)] %/% 10)
输出:
[1] FALSE FALSE TRUE FALSE FALSE TRUE FALSE TRUE FALSE FALSE
我的(改进的)解决方案:
v <- c(3, 5, 2, 5, 3, 4, 5, 3, 1, 4)
res <- cumsum(v)
reset <- 10
resd <- res/reset
res[diff(c(0, floor(resd))) == 0] <- 0
print(res) #gives 0 0 10 0 0 22 0 30 0 0
编辑:现在 v
中的第一个元素可以大于 10。
另一种可能的方法:
v <- c(3, 5, 2, 5, 3, 4, 5, 3, 1, 4)
reset <- 10
s <- cumsum(v)
idx <- as.integer(s / reset)
logic <- idx >= 1 & !duplicated(idx)
> logic
[1] FALSE FALSE TRUE FALSE FALSE TRUE FALSE TRUE FALSE FALSE
# corresponding one-liner
logic <- with(list(idx=as.integer(cumsum(v) / reset)),idx >= 1 & !duplicated(idx))
为了好玩,我还创建了该函数的 Rcpp 版本:
library(Rcpp)
library(inline)
cumsumResetRcpp <- cxxfunction(signature(values='numeric',reset='integer'),
'
Rcpp::IntegerVector r(reset);
int resetVal = r[0];
Rcpp::NumericVector v(values);
int n = v.size();
Rcpp::NumericVector result(n);
double cumsum = 0;
for(int i = 0; i < n; i++){
int prevCumSumFloor = (int)(cumsum / resetVal);
cumsum += v[i];
int currCumSumFloor = (int)(cumsum / resetVal);
if(currCumSumFloor > prevCumSumFloor)
result[i] = cumsum;
}
return( result ) ;
', plugin="Rcpp", verbose=FALSE,includes='')
与我之前版本的比较:
library(microbenchmark)
baseRVersion <- function(v,reset){
a <- cumsum(v)
a[!with(list(idx=as.integer(a / reset)),idx >= 1 & !duplicated(idx))] <- 0
a
}
RcppVersion <- function(v,reset){
cumsumResetRcpp(v,reset)
}
set.seed(1234)
v <- sample(5,1e6,replace=TRUE)
microbenchmark(baseRVersion(v,10), RcppVersion(v,10),times=20)
# Result :
Unit: milliseconds
expr min lq mean median uq max neval
baseRVersion(v, 10) 69.78914 74.34717 91.67828 102.95764 103.6911 105.4055 20
RcppVersion(v, 10) 17.28785 17.58432 18.89449 19.25759 19.8595 20.5627 20
因为我永远无法抗拒...
qaswed <-function(v) {
res <- cumsum(v)
reset <- 10
resd <- res/reset
res[diff(c(0, floor(resd))) == 0] <- 0
}
digemall <-function(v){
reset <- 10
with(list(idx=as.integer(cumsum(v) / reset)),idx >= 1 & !duplicated(idx))
}
colonel <-function(v){
ifelse(c(0, diff(cumsum(v) %/% 10)), cumsum(v), 0)
}
userx <- function(v){
a <- cumsum(v)
c(a[1] >= 10, a[-1] %/% 10 > a[-length(a)] %/% 10)
}
set.seed(5)
v <- sample(5,1e6,replace=TRUE)
microbenchmark(qaswed(v),digemall(v),colonel(v),userx(v),times=10)
Unit: milliseconds
expr min lq mean median uq max neval
qaswed(v) 45.97558 50.29943 86.54772 85.52356 88.60232 200.89699 10
digemall(v) 54.12038 58.85200 67.15433 60.51172 64.40194 99.32623 10
colonel(v) 200.80942 233.56203 254.33662 252.65635 275.16588 306.76971 10
userx(v) 53.87098 56.55786 71.38571 57.98169 92.94224 96.69956 10
这会将所有小于 10 的总和或模除以 10 的值重复为零的总和设置为零:
a <- cumsum(v)
a %/% 10
[1] 0 0 1 1 1 2 2 3 3 3
a[ duplicated(a %/% 10) | a<10 ] <- 0
a
[1] 0 0 10 0 0 22 0 30 0 0
我想在向量达到特定值时重置 cumsum
。
例如对于以下向量:
v <- c(3, 5, 2, 5, 3, 4, 5, 3, 1, 4)
预期输出为:
c(0, 0, 10, 0, 0, 22, 0, 30, 0, 0)
使用 reset <- 10
我可以将任务减少到标记完整整数后的第一个值:
res <- cumsum(v)
resd <- res/reset
resd
# [1] 0.3 0.8 1.0 1.5 1.8 2.2 2.7 3.0 3.1 3.5
预期输出是这样的:
c(F, F, T, F, F, T, F, T, F, F) # or
c(0, 0, 1.0, 0, 0, 2.2, 0, 3.0, 0, 0)
我需要一种快速计算其中之一的方法。
v <- c(3, 5, 2, 5, 3, 4, 5, 3, 1, 4)
a <- cumsum(v)
c(a[1] >= 10, a[-1] %/% 10 > a[-length(a)] %/% 10)
输出:
[1] FALSE FALSE TRUE FALSE FALSE TRUE FALSE TRUE FALSE FALSE
我的(改进的)解决方案:
v <- c(3, 5, 2, 5, 3, 4, 5, 3, 1, 4)
res <- cumsum(v)
reset <- 10
resd <- res/reset
res[diff(c(0, floor(resd))) == 0] <- 0
print(res) #gives 0 0 10 0 0 22 0 30 0 0
编辑:现在 v
中的第一个元素可以大于 10。
另一种可能的方法:
v <- c(3, 5, 2, 5, 3, 4, 5, 3, 1, 4)
reset <- 10
s <- cumsum(v)
idx <- as.integer(s / reset)
logic <- idx >= 1 & !duplicated(idx)
> logic
[1] FALSE FALSE TRUE FALSE FALSE TRUE FALSE TRUE FALSE FALSE
# corresponding one-liner
logic <- with(list(idx=as.integer(cumsum(v) / reset)),idx >= 1 & !duplicated(idx))
为了好玩,我还创建了该函数的 Rcpp 版本:
library(Rcpp)
library(inline)
cumsumResetRcpp <- cxxfunction(signature(values='numeric',reset='integer'),
'
Rcpp::IntegerVector r(reset);
int resetVal = r[0];
Rcpp::NumericVector v(values);
int n = v.size();
Rcpp::NumericVector result(n);
double cumsum = 0;
for(int i = 0; i < n; i++){
int prevCumSumFloor = (int)(cumsum / resetVal);
cumsum += v[i];
int currCumSumFloor = (int)(cumsum / resetVal);
if(currCumSumFloor > prevCumSumFloor)
result[i] = cumsum;
}
return( result ) ;
', plugin="Rcpp", verbose=FALSE,includes='')
与我之前版本的比较:
library(microbenchmark)
baseRVersion <- function(v,reset){
a <- cumsum(v)
a[!with(list(idx=as.integer(a / reset)),idx >= 1 & !duplicated(idx))] <- 0
a
}
RcppVersion <- function(v,reset){
cumsumResetRcpp(v,reset)
}
set.seed(1234)
v <- sample(5,1e6,replace=TRUE)
microbenchmark(baseRVersion(v,10), RcppVersion(v,10),times=20)
# Result :
Unit: milliseconds
expr min lq mean median uq max neval
baseRVersion(v, 10) 69.78914 74.34717 91.67828 102.95764 103.6911 105.4055 20
RcppVersion(v, 10) 17.28785 17.58432 18.89449 19.25759 19.8595 20.5627 20
因为我永远无法抗拒...
qaswed <-function(v) {
res <- cumsum(v)
reset <- 10
resd <- res/reset
res[diff(c(0, floor(resd))) == 0] <- 0
}
digemall <-function(v){
reset <- 10
with(list(idx=as.integer(cumsum(v) / reset)),idx >= 1 & !duplicated(idx))
}
colonel <-function(v){
ifelse(c(0, diff(cumsum(v) %/% 10)), cumsum(v), 0)
}
userx <- function(v){
a <- cumsum(v)
c(a[1] >= 10, a[-1] %/% 10 > a[-length(a)] %/% 10)
}
set.seed(5)
v <- sample(5,1e6,replace=TRUE)
microbenchmark(qaswed(v),digemall(v),colonel(v),userx(v),times=10)
Unit: milliseconds
expr min lq mean median uq max neval
qaswed(v) 45.97558 50.29943 86.54772 85.52356 88.60232 200.89699 10
digemall(v) 54.12038 58.85200 67.15433 60.51172 64.40194 99.32623 10
colonel(v) 200.80942 233.56203 254.33662 252.65635 275.16588 306.76971 10
userx(v) 53.87098 56.55786 71.38571 57.98169 92.94224 96.69956 10
这会将所有小于 10 的总和或模除以 10 的值重复为零的总和设置为零:
a <- cumsum(v)
a %/% 10
[1] 0 0 1 1 1 2 2 3 3 3
a[ duplicated(a %/% 10) | a<10 ] <- 0
a
[1] 0 0 10 0 0 22 0 30 0 0