哪一个(向量 1 < 向量 2)
which(vector1 < vector2)
先做个小例子,用R计算:
x<- c(1,3,1,4,2)
max(which(x<2))
[1] 3
现在,我不仅要对一个值 2 执行此操作,而且要同时对多个值执行此操作。它应该给我这样的东西:
max(which(x<c(1,2,3,4,5,6)))
[1] NA 3 5 5 5 5
当然我可以 运行 一个 for
循环,但是那很慢:
for(i in c(1,2,3,4,5,6)){
test[i]<-max(which(x<i))
}
有快速的方法吗?
你在找这个吗?
y<-1:6
max.col(outer(y,x,">"),ties.method="last")*NA^(y<=min(x))
#[1] NA 3 5 5 5 5
找到在x
中看到的每个值的最大索引:
xvals <- unique(x)
xmaxindx <- length(x) - match(xvals,rev(x)) + 1L
重新排列
xvals <- xvals[order(xmaxindx,decreasing=TRUE)]
xmaxindx <- xmaxindx[order(xmaxindx,decreasing=TRUE)]
# 2 4 1 3
# 5 4 3 2
Select 来自那些:
xmaxindx[vapply(1:6,function(z){
ok <- xvals < z
if(length(ok)) which(ok)[1] else NA_integer_
},integer(1))]
# <NA> 1 2 2 2 2
# NA 3 5 5 5 5
它可以方便地报告值(第一行)和索引(第二行)。
sapply
方法更简单,而且可能不会更慢:
xmaxindx[sapply(1:6,function(z) which(xvals < z)[1])]
基准。 OP 的案例没有得到完整描述,但这里有一些基准:
# setup
nicola <- function() max.col(outer(y,x,">"),ties.method="last")*NA^(y<=min(x))
frank <- function(){
xvals <- unique(x)
xmaxindx <- length(x) - match(xvals,rev(x)) + 1L
xvals <- xvals[order(xmaxindx,decreasing=TRUE)]
xmaxindx <- xmaxindx[order(xmaxindx,decreasing=TRUE)]
xmaxindx[vapply(y,function(z){
ok <- xvals < z
if(length(ok)) which(ok)[1] else NA_integer_
},integer(1))]
}
beauvel <- function()
Vectorize(function(u) ifelse(length(which(x<u))==0,NA,max(which(x<u))))(y)
davida <- function() vapply(y, function(i) c(max(which(x < i)),NA)[1], double(1))
hallo <- function(){
test <- vector("integer",length(y))
for(i in y){
test[i]<-max(which(x<i))
}
test
}
josho <- function(){
xo <- sort(unique(x))
xi <- cummax(1L + length(x) - match(xo, rev(x)))
xi[cut(y, c(xo, Inf))]
}
require(microbenchmark)
(@MrHallo 和@DavidArenburg 以我现在编写的方式发出了一堆警告,但这可以修复。)以下是一些结果:
> x <- sample(1:4,1e6,replace=TRUE)
> y <- 1:6
> microbenchmark(nicola(),frank(),beauvel(),davida(),hallo(),josho(),times=10)
Unit: milliseconds
expr min lq mean median uq max neval
nicola() 76.17992 78.01171 99.75596 98.43919 120.81776 127.63058 10
frank() 25.27245 25.44666 36.41508 28.44055 45.32306 73.66652 10
beauvel() 47.70081 59.47828 67.44918 68.93808 74.12869 95.20936 10
davida() 26.52582 26.55827 33.93855 30.00990 35.55436 57.24119 10
hallo() 26.58186 26.63984 32.68850 28.68163 33.54364 50.49190 10
josho() 25.69634 26.28724 37.95341 30.50828 47.90526 68.30376 10
There were 20 warnings (use warnings() to see them)
>
>
> x <- sample(1:80,1e6,replace=TRUE)
> y <- 1:60
> microbenchmark(nicola(),frank(),beauvel(),davida(),hallo(),josho(),times=10)
Unit: milliseconds
expr min lq mean median uq max neval
nicola() 2341.96795 2395.68816 2446.60612 2481.14602 2496.77128 2504.8117 10
frank() 25.67026 25.81119 42.80353 30.41979 53.19950 123.7467 10
beauvel() 665.26904 686.63822 728.48755 734.04857 753.69499 784.7280 10
davida() 326.79072 359.22803 390.66077 397.50163 420.66266 456.8318 10
hallo() 330.10586 349.40995 380.33538 389.71356 397.76407 443.0808 10
josho() 26.06863 30.76836 35.04775 31.05701 38.84259 57.3946 10
There were 20 warnings (use warnings() to see them)
>
>
> x <- sample(sample(1e5,1e1),1e6,replace=TRUE)
> y <- sample(1e5,1e4)
> microbenchmark(frank(),josho(),times=10)
Unit: milliseconds
expr min lq mean median uq max neval
frank() 69.41371 74.53816 94.41251 89.53743 107.6402 134.01839 10
josho() 35.70584 37.37200 56.42519 54.13120 63.3452 90.42475 10
当然,对于 OP 的真实案例,比较结果可能会有所不同。
您可以使用 Vectorize
:
func = Vectorize(function(u) ifelse(length(which(x<u))==0,NA,max(which(x<u))))
> func(1:6)
#[1] NA 3 5 5 5 5
试试这个:
vapply(1:6, function(i) max(which(x < i)), double(1))
完全矢量化的方法:
x <- c(1,3,1,4,2)
y <- c(1,2,3,4,5,6)
f <- function(x, y) {
xo <- sort(unique(x))
xi <- cummax(1 + length(x) - match(xo, rev(x)))
xi[cut(y, c(xo, Inf))]
}
f(x,y)
# [1] NA 3 5 5 5 5
当 x
和 y
都相对较长并且每个都包含许多不同的值时,完全矢量化的优势才真正开始发挥作用:
x <- sample(1:1e4)
y <- 1:1e4
microbenchmark(nicola(), frank(), beauvel(), davida(), hallo(), josho(),times=5)
Unit: milliseconds
expr min lq mean median uq max neval cld
nicola() 4927.45918 4980.67901 5031.84199 4991.38240 5052.6861 5207.00330 5 d
frank() 513.05769 513.33547 552.29335 517.65783 540.9536 676.46221 5 b
beauvel() 1091.93823 1114.84647 1167.10033 1121.58251 1161.3828 1345.75158 5 c
davida() 562.71123 575.75352 585.83873 590.90048 597.0284 602.80002 5 b
hallo() 559.11618 574.60667 614.62914 624.19570 641.9639 673.26328 5 b
josho() 36.22829 36.57181 37.37892 37.52677 37.6373 38.93044 5 a
先做个小例子,用R计算:
x<- c(1,3,1,4,2)
max(which(x<2))
[1] 3
现在,我不仅要对一个值 2 执行此操作,而且要同时对多个值执行此操作。它应该给我这样的东西:
max(which(x<c(1,2,3,4,5,6)))
[1] NA 3 5 5 5 5
当然我可以 运行 一个 for
循环,但是那很慢:
for(i in c(1,2,3,4,5,6)){
test[i]<-max(which(x<i))
}
有快速的方法吗?
你在找这个吗?
y<-1:6
max.col(outer(y,x,">"),ties.method="last")*NA^(y<=min(x))
#[1] NA 3 5 5 5 5
找到在x
中看到的每个值的最大索引:
xvals <- unique(x)
xmaxindx <- length(x) - match(xvals,rev(x)) + 1L
重新排列
xvals <- xvals[order(xmaxindx,decreasing=TRUE)]
xmaxindx <- xmaxindx[order(xmaxindx,decreasing=TRUE)]
# 2 4 1 3
# 5 4 3 2
Select 来自那些:
xmaxindx[vapply(1:6,function(z){
ok <- xvals < z
if(length(ok)) which(ok)[1] else NA_integer_
},integer(1))]
# <NA> 1 2 2 2 2
# NA 3 5 5 5 5
它可以方便地报告值(第一行)和索引(第二行)。
sapply
方法更简单,而且可能不会更慢:
xmaxindx[sapply(1:6,function(z) which(xvals < z)[1])]
基准。 OP 的案例没有得到完整描述,但这里有一些基准:
# setup
nicola <- function() max.col(outer(y,x,">"),ties.method="last")*NA^(y<=min(x))
frank <- function(){
xvals <- unique(x)
xmaxindx <- length(x) - match(xvals,rev(x)) + 1L
xvals <- xvals[order(xmaxindx,decreasing=TRUE)]
xmaxindx <- xmaxindx[order(xmaxindx,decreasing=TRUE)]
xmaxindx[vapply(y,function(z){
ok <- xvals < z
if(length(ok)) which(ok)[1] else NA_integer_
},integer(1))]
}
beauvel <- function()
Vectorize(function(u) ifelse(length(which(x<u))==0,NA,max(which(x<u))))(y)
davida <- function() vapply(y, function(i) c(max(which(x < i)),NA)[1], double(1))
hallo <- function(){
test <- vector("integer",length(y))
for(i in y){
test[i]<-max(which(x<i))
}
test
}
josho <- function(){
xo <- sort(unique(x))
xi <- cummax(1L + length(x) - match(xo, rev(x)))
xi[cut(y, c(xo, Inf))]
}
require(microbenchmark)
(@MrHallo 和@DavidArenburg 以我现在编写的方式发出了一堆警告,但这可以修复。)以下是一些结果:
> x <- sample(1:4,1e6,replace=TRUE)
> y <- 1:6
> microbenchmark(nicola(),frank(),beauvel(),davida(),hallo(),josho(),times=10)
Unit: milliseconds
expr min lq mean median uq max neval
nicola() 76.17992 78.01171 99.75596 98.43919 120.81776 127.63058 10
frank() 25.27245 25.44666 36.41508 28.44055 45.32306 73.66652 10
beauvel() 47.70081 59.47828 67.44918 68.93808 74.12869 95.20936 10
davida() 26.52582 26.55827 33.93855 30.00990 35.55436 57.24119 10
hallo() 26.58186 26.63984 32.68850 28.68163 33.54364 50.49190 10
josho() 25.69634 26.28724 37.95341 30.50828 47.90526 68.30376 10
There were 20 warnings (use warnings() to see them)
>
>
> x <- sample(1:80,1e6,replace=TRUE)
> y <- 1:60
> microbenchmark(nicola(),frank(),beauvel(),davida(),hallo(),josho(),times=10)
Unit: milliseconds
expr min lq mean median uq max neval
nicola() 2341.96795 2395.68816 2446.60612 2481.14602 2496.77128 2504.8117 10
frank() 25.67026 25.81119 42.80353 30.41979 53.19950 123.7467 10
beauvel() 665.26904 686.63822 728.48755 734.04857 753.69499 784.7280 10
davida() 326.79072 359.22803 390.66077 397.50163 420.66266 456.8318 10
hallo() 330.10586 349.40995 380.33538 389.71356 397.76407 443.0808 10
josho() 26.06863 30.76836 35.04775 31.05701 38.84259 57.3946 10
There were 20 warnings (use warnings() to see them)
>
>
> x <- sample(sample(1e5,1e1),1e6,replace=TRUE)
> y <- sample(1e5,1e4)
> microbenchmark(frank(),josho(),times=10)
Unit: milliseconds
expr min lq mean median uq max neval
frank() 69.41371 74.53816 94.41251 89.53743 107.6402 134.01839 10
josho() 35.70584 37.37200 56.42519 54.13120 63.3452 90.42475 10
当然,对于 OP 的真实案例,比较结果可能会有所不同。
您可以使用 Vectorize
:
func = Vectorize(function(u) ifelse(length(which(x<u))==0,NA,max(which(x<u))))
> func(1:6)
#[1] NA 3 5 5 5 5
试试这个:
vapply(1:6, function(i) max(which(x < i)), double(1))
完全矢量化的方法:
x <- c(1,3,1,4,2)
y <- c(1,2,3,4,5,6)
f <- function(x, y) {
xo <- sort(unique(x))
xi <- cummax(1 + length(x) - match(xo, rev(x)))
xi[cut(y, c(xo, Inf))]
}
f(x,y)
# [1] NA 3 5 5 5 5
当 x
和 y
都相对较长并且每个都包含许多不同的值时,完全矢量化的优势才真正开始发挥作用:
x <- sample(1:1e4)
y <- 1:1e4
microbenchmark(nicola(), frank(), beauvel(), davida(), hallo(), josho(),times=5)
Unit: milliseconds
expr min lq mean median uq max neval cld
nicola() 4927.45918 4980.67901 5031.84199 4991.38240 5052.6861 5207.00330 5 d
frank() 513.05769 513.33547 552.29335 517.65783 540.9536 676.46221 5 b
beauvel() 1091.93823 1114.84647 1167.10033 1121.58251 1161.3828 1345.75158 5 c
davida() 562.71123 575.75352 585.83873 590.90048 597.0284 602.80002 5 b
hallo() 559.11618 574.60667 614.62914 624.19570 641.9639 673.26328 5 b
josho() 36.22829 36.57181 37.37892 37.52677 37.6373 38.93044 5 a