计算系列中的数字
Counting the number in series
假设我有向量 z
z<-c(6,7,10,11,12,13,17,20,31,32,33,40,56,57,58,59)
我想要一些输出,只要有连续的系列,就会将值存储为系列的计数,否则如果没有系列,则简单地计算该数字
[1] 2 4 1 1 3 1 4
这里
2 表示 6,7
4 代表 10,11,12,13
1 对 17
1 对 20
3 代表 31,32,33
1 对 40
4 代表 56,57,58,59
希望你理解这个问题。我在 Whosebug 上看到过一个例子,其中可以计算出类似数字的总数没有出现。问题中的 1 和 2,但没有找到与此类似的内容。请提供解决方案
Akrun 和 bgoltst 为我的 post 提供了准确的答案。
现在我想在不使用 loops 预构建函数的情况下解决它。伙计们有什么想法吗?
尝试
unname(tapply(z, cumsum(c(TRUE, diff(z)!=1)), FUN= length))
#[1] 2 4 1 1 3 1 4
或者一个紧凑的版本是
lengths(split(z, cumsum(c(0,diff(z)!=1))))
或者我们可以做
tabulate(cumsum(c(TRUE,z[-1]-z[-length(z)] !=1)))
rle(c(0,cumsum(diff(z)!=1)))$lengths;
## [1] 2 4 1 1 3 1 4
基准测试
library(microbenchmark);
bgoldst <- function(z) rle(c(0,cumsum(diff(z)!=1)))$lengths;
akrun1 <- function(z) unname(tapply(z,cumsum(c(TRUE,diff(z)!=1)),length));
akrun2 <- function(z) unname(lengths(split(z, cumsum(c(0,diff(z)!=1)))));
akrun3 <- function(z) tabulate(cumsum(c(TRUE,z[-1L]-z[-length(z)]!=1)));
loop <- function(z) { res <- integer(); if (length(z)==0L) return(res); ri <- 1L; res[ri] <- 1L; for (zi in seq(2L,len=length(z)-1L)) if (z[zi]==z[zi-1L]+1L) res[ri] <- res[ri]+1L else { ri <- ri+1L; res[ri] <- 1L; }; res; };
expected <- c(2L,4L,1L,1L,3L,1L,4L);
identical(expected,bgoldst(z));
## [1] TRUE
identical(expected,structure(akrun1(z),dim=NULL));
## [1] TRUE
identical(expected,akrun2(z));
## [1] TRUE
identical(expected,akrun3(z));
## [1] TRUE
identical(expected,loop(z));
## [1] TRUE
microbenchmark(bgoldst(z),akrun1(z),akrun2(z),akrun3(z),loop(z));
## Unit: microseconds
## expr min lq mean median uq max neval
## bgoldst(z) 29.081 35.9240 41.52996 40.2000 43.6215 112.901 100
## akrun1(z) 139.416 152.2450 163.97971 161.6535 169.7790 301.068 100
## akrun2(z) 94.940 103.0640 110.23655 107.7685 116.3220 168.924 100
## akrun3(z) 4.277 6.4150 7.37772 7.6980 8.1260 18.817 100
## loop(z) 42.338 50.4635 58.54198 54.7400 64.3625 136.422 100
规模测试:
set.seed(1L);
N <- 1e5L; z <- sort(sample(seq(1L,N*3L),N));
expected <- bgoldst(z);
identical(expected,structure(akrun1(z),dim=NULL));
## [1] TRUE
identical(expected,akrun2(z));
## [1] TRUE
identical(expected,akrun3(z));
## [1] TRUE
identical(expected,loop(z));
## [1] TRUE
microbenchmark(bgoldst(z),akrun1(z),akrun2(z),akrun3(z),loop(z),times=10L);
## Unit: milliseconds
## expr min lq mean median uq max neval
## bgoldst(z) 7.260254 8.391395 11.045106 9.945911 11.704845 17.99818 10
## akrun1(z) 193.259087 198.464899 215.520530 203.924951 220.394912 300.59812 10
## akrun2(z) 217.264925 228.902627 263.216340 250.776189 285.028577 400.09614 10
## akrun3(z) 2.322153 3.299338 6.076256 3.928843 7.569875 18.12305 10
## loop(z) 2392.752491 2463.121401 2545.236552 2494.690056 2527.570319 2882.29511 10
假设我有向量 z
z<-c(6,7,10,11,12,13,17,20,31,32,33,40,56,57,58,59)
我想要一些输出,只要有连续的系列,就会将值存储为系列的计数,否则如果没有系列,则简单地计算该数字
[1] 2 4 1 1 3 1 4
这里
2 表示 6,7
4 代表 10,11,12,13
1 对 17
1 对 20
3 代表 31,32,33
1 对 40
4 代表 56,57,58,59
希望你理解这个问题。我在 Whosebug 上看到过一个例子,其中可以计算出类似数字的总数没有出现。问题中的 1 和 2,但没有找到与此类似的内容。请提供解决方案
Akrun 和 bgoltst 为我的 post 提供了准确的答案。 现在我想在不使用 loops 预构建函数的情况下解决它。伙计们有什么想法吗?
尝试
unname(tapply(z, cumsum(c(TRUE, diff(z)!=1)), FUN= length))
#[1] 2 4 1 1 3 1 4
或者一个紧凑的版本是
lengths(split(z, cumsum(c(0,diff(z)!=1))))
或者我们可以做
tabulate(cumsum(c(TRUE,z[-1]-z[-length(z)] !=1)))
rle(c(0,cumsum(diff(z)!=1)))$lengths;
## [1] 2 4 1 1 3 1 4
基准测试
library(microbenchmark);
bgoldst <- function(z) rle(c(0,cumsum(diff(z)!=1)))$lengths;
akrun1 <- function(z) unname(tapply(z,cumsum(c(TRUE,diff(z)!=1)),length));
akrun2 <- function(z) unname(lengths(split(z, cumsum(c(0,diff(z)!=1)))));
akrun3 <- function(z) tabulate(cumsum(c(TRUE,z[-1L]-z[-length(z)]!=1)));
loop <- function(z) { res <- integer(); if (length(z)==0L) return(res); ri <- 1L; res[ri] <- 1L; for (zi in seq(2L,len=length(z)-1L)) if (z[zi]==z[zi-1L]+1L) res[ri] <- res[ri]+1L else { ri <- ri+1L; res[ri] <- 1L; }; res; };
expected <- c(2L,4L,1L,1L,3L,1L,4L);
identical(expected,bgoldst(z));
## [1] TRUE
identical(expected,structure(akrun1(z),dim=NULL));
## [1] TRUE
identical(expected,akrun2(z));
## [1] TRUE
identical(expected,akrun3(z));
## [1] TRUE
identical(expected,loop(z));
## [1] TRUE
microbenchmark(bgoldst(z),akrun1(z),akrun2(z),akrun3(z),loop(z));
## Unit: microseconds
## expr min lq mean median uq max neval
## bgoldst(z) 29.081 35.9240 41.52996 40.2000 43.6215 112.901 100
## akrun1(z) 139.416 152.2450 163.97971 161.6535 169.7790 301.068 100
## akrun2(z) 94.940 103.0640 110.23655 107.7685 116.3220 168.924 100
## akrun3(z) 4.277 6.4150 7.37772 7.6980 8.1260 18.817 100
## loop(z) 42.338 50.4635 58.54198 54.7400 64.3625 136.422 100
规模测试:
set.seed(1L);
N <- 1e5L; z <- sort(sample(seq(1L,N*3L),N));
expected <- bgoldst(z);
identical(expected,structure(akrun1(z),dim=NULL));
## [1] TRUE
identical(expected,akrun2(z));
## [1] TRUE
identical(expected,akrun3(z));
## [1] TRUE
identical(expected,loop(z));
## [1] TRUE
microbenchmark(bgoldst(z),akrun1(z),akrun2(z),akrun3(z),loop(z),times=10L);
## Unit: milliseconds
## expr min lq mean median uq max neval
## bgoldst(z) 7.260254 8.391395 11.045106 9.945911 11.704845 17.99818 10
## akrun1(z) 193.259087 198.464899 215.520530 203.924951 220.394912 300.59812 10
## akrun2(z) 217.264925 228.902627 263.216340 250.776189 285.028577 400.09614 10
## akrun3(z) 2.322153 3.299338 6.076256 3.928843 7.569875 18.12305 10
## loop(z) 2392.752491 2463.121401 2545.236552 2494.690056 2527.570319 2882.29511 10