避免在 R 中使用多个 for 循环来计算矩阵
Avoiding multiple for-loops in R to calculate a matrix
所以在生成一些假数据来回答地图问题的过程中,我发现自己写了以下内容:
# Generate some fake data
lat <- seq(-90, 90, by = 5)
lon <- seq(-180, 180, by = 10)
phi <- matrix(0, nrow = length(lat), ncol = length(lon))
i <- 1
for (l1 in lat) {
j <- 1
for (l2 in lon) {
phi[i, j] <- (sin(pi * l1 / 180) * cos(pi * l2 / 180))^2
j <- j+1
}
i <- i+1
}
phi <- 1500*phi + 4500 # scale it properly
现在显然这两个中央 for 循环并不像我想要的那样 R'ish。看起来我应该能够得到一个 mapply
或其他东西来完成这项工作,但遗憾的是 returns 一个列表,并没有真正做我想要的。另一个申请似乎也没有做正确的事情。
我在这里错过了什么?
你可以使用outer
x = outer(lat, lon, FUN = function(x,y) {(sin(pi * x/180) * cos(pi * y /180))^2})
identical(x * 1500 + 4500, phi)
# [1] TRUE
NBATrends 的答案似乎比其他解决方案更快。这里有一些基准
library(microbenchmark)
microbenchmark(within(df, {
phi <- (sin(pi * lat / 180) * cos(pi * lon / 180))^2
phi <- 1500*phi + 4500
}), 1500 * tcrossprod(sin(pi * lat / 180), cos(pi * lon / 180))^2 + 4500, outer(lat, lon, FUN = function(x,y) {(sin(pi * x/180) * cos(pi * y /180))^2}),
((as.matrix(l1)%*%t(as.matrix(l2)))^2) * 1500 + 4500)
Unit: microseconds
expr min lq mean median uq max neval
within(df, { phi <- (sin(pi * lat/180) * cos(pi * lon/180))^2 phi <- 1500 * phi + 4500 }) 255.670 262.0095 270.50948 266.6880 277.7060 385.467 100
1500 * tcrossprod(sin(pi * lat/180), cos(pi * lon/180))^2 + 4500 11.471 12.3770 22.30177 12.9805 13.5850 868.130 100
outer(lat, lon, FUN = function(x, y) { (sin(pi * x/180) * cos(pi * y/180))^2 }) 137.645 139.7590 144.39520 141.5700 145.1925 179.905 100
((as.matrix(l1) %*% t(as.matrix(l2)))^2) * 1500 + 4500 16.301 17.6595 20.20390 19.6215 20.5270 80.294 100
为什么要附加到矩阵结构并在可以向量化时使用应用?
df <- expand.grid(lat = seq(-90, 90, by = 5),
lon = seq(-180, 180, by = 10))
df <- within(df, {
phi <- (sin(pi * lat / 180) * cos(pi * lon / 180))^2
phi <- 1500*phi + 4500
})
您始终可以使用指令 here 转换回来。
你应该尝试使用矩阵代数。无需使用 apply 系列的任何功能:
lat <- seq(-90, 90, by = 5)
lon <- seq(-180, 180, by = 10)
1500 * tcrossprod(sin(pi * lat / 180), cos(pi * lon / 180))^2 + 4500
线性代数对于您的应用程序来说可能更简单,因为您只是将两个向量逐元素相乘,这可以通过 v * u^T 来完成。在R中,矩阵乘法是%*%
。
lat <- seq(-90, 90, by = 5)
lon <- seq(-180, 180, by = 10)
l1 <- sin(pi * lat / 180)
l2 <- s(pi * lon/ 180)
# compute the matrix
phi <- as.matrix(l1)%*%t(as.matrix(l2))
# square each element of the matrix
phi <- phi^2
# scale properly
# square each element of the matrix
phi <- 1500*phi + 4500
使用sapply()
,但我更喜欢outer()
解决方案:
#using sapply
phi_1 <-
t(
sapply(lat, function(l1)
sapply(lon, function(l2)(sin(pi * l1 / 180) * cos(pi * l2 / 180))^2))
) * 1500 + 4500
#compare result
identical(phi_1, phi)
# [1] TRUE
所以在生成一些假数据来回答地图问题的过程中,我发现自己写了以下内容:
# Generate some fake data
lat <- seq(-90, 90, by = 5)
lon <- seq(-180, 180, by = 10)
phi <- matrix(0, nrow = length(lat), ncol = length(lon))
i <- 1
for (l1 in lat) {
j <- 1
for (l2 in lon) {
phi[i, j] <- (sin(pi * l1 / 180) * cos(pi * l2 / 180))^2
j <- j+1
}
i <- i+1
}
phi <- 1500*phi + 4500 # scale it properly
现在显然这两个中央 for 循环并不像我想要的那样 R'ish。看起来我应该能够得到一个 mapply
或其他东西来完成这项工作,但遗憾的是 returns 一个列表,并没有真正做我想要的。另一个申请似乎也没有做正确的事情。
我在这里错过了什么?
你可以使用outer
x = outer(lat, lon, FUN = function(x,y) {(sin(pi * x/180) * cos(pi * y /180))^2})
identical(x * 1500 + 4500, phi)
# [1] TRUE
NBATrends 的答案似乎比其他解决方案更快。这里有一些基准
library(microbenchmark)
microbenchmark(within(df, {
phi <- (sin(pi * lat / 180) * cos(pi * lon / 180))^2
phi <- 1500*phi + 4500
}), 1500 * tcrossprod(sin(pi * lat / 180), cos(pi * lon / 180))^2 + 4500, outer(lat, lon, FUN = function(x,y) {(sin(pi * x/180) * cos(pi * y /180))^2}),
((as.matrix(l1)%*%t(as.matrix(l2)))^2) * 1500 + 4500)
Unit: microseconds
expr min lq mean median uq max neval
within(df, { phi <- (sin(pi * lat/180) * cos(pi * lon/180))^2 phi <- 1500 * phi + 4500 }) 255.670 262.0095 270.50948 266.6880 277.7060 385.467 100
1500 * tcrossprod(sin(pi * lat/180), cos(pi * lon/180))^2 + 4500 11.471 12.3770 22.30177 12.9805 13.5850 868.130 100
outer(lat, lon, FUN = function(x, y) { (sin(pi * x/180) * cos(pi * y/180))^2 }) 137.645 139.7590 144.39520 141.5700 145.1925 179.905 100
((as.matrix(l1) %*% t(as.matrix(l2)))^2) * 1500 + 4500 16.301 17.6595 20.20390 19.6215 20.5270 80.294 100
为什么要附加到矩阵结构并在可以向量化时使用应用?
df <- expand.grid(lat = seq(-90, 90, by = 5),
lon = seq(-180, 180, by = 10))
df <- within(df, {
phi <- (sin(pi * lat / 180) * cos(pi * lon / 180))^2
phi <- 1500*phi + 4500
})
您始终可以使用指令 here 转换回来。
你应该尝试使用矩阵代数。无需使用 apply 系列的任何功能:
lat <- seq(-90, 90, by = 5)
lon <- seq(-180, 180, by = 10)
1500 * tcrossprod(sin(pi * lat / 180), cos(pi * lon / 180))^2 + 4500
线性代数对于您的应用程序来说可能更简单,因为您只是将两个向量逐元素相乘,这可以通过 v * u^T 来完成。在R中,矩阵乘法是%*%
。
lat <- seq(-90, 90, by = 5)
lon <- seq(-180, 180, by = 10)
l1 <- sin(pi * lat / 180)
l2 <- s(pi * lon/ 180)
# compute the matrix
phi <- as.matrix(l1)%*%t(as.matrix(l2))
# square each element of the matrix
phi <- phi^2
# scale properly
# square each element of the matrix
phi <- 1500*phi + 4500
使用sapply()
,但我更喜欢outer()
解决方案:
#using sapply
phi_1 <-
t(
sapply(lat, function(l1)
sapply(lon, function(l2)(sin(pi * l1 / 180) * cos(pi * l2 / 180))^2))
) * 1500 + 4500
#compare result
identical(phi_1, phi)
# [1] TRUE