在 R 中为 surface3d 准备数据。请提供更快速的代码
Preparing data for surface3d in R. More fast code please
我正在尝试从 120,000 行 csv 数据中获取 "surface3d plot",但是我的代码太慢了,处理时间将超过 12 小时。
应该改哪个地方?
( interp() 处理时间也会超过12小时。如果可以的话,我想合并 mk_surface_data() 和 interp() 。可以吗?)
library(rgl)
library(data.table)
library(akima)
fv <- cmpfun(function(vec) {
return(vec[is.finite(vec)])
})
mk_surface_data <- cmpfun(function(mat, mean_range = 2, x_div = 100, y_div = 100,defalut_z = 0){
x <- mat[,"x"]
y <- mat[,"y"]
min_x <- min(fv(x))
max_x <- max(fv(x))
min_y <- min(fv(y))
max_y <- max(fv(y))
sa_x <- max_x - min_x
sa_y <- max_y - min_y
step_x <- sa_x / x_div
step_y <- sa_y / y_div
surface_m <- matrix(nrow=0,ncol=3)
for(x in 0:x_div){
base_x_range <- min_x + (step_x * x)
min_x_range <- base_x_range - (mean_range * step_x)
max_x_range <- base_x_range + (mean_range * step_x)
for(y in 0:y_div){
base_y_range <- min_y + (step_y * y)
min_y_range <- base_y_range - (mean_range * step_y)
max_y_range <- base_y_range + (mean_range * step_y)
all_z <- mat[((min_x_range < mat[,"x"]) & (max_x_range > mat[,"x"]) & (min_y_range < mat[,"y"]) & (max_y_range > mat[,"y"])),c("z")]
if(length(fv(all_z)) > 0){
insert <- c(base_x_range,base_y_range,weighted.mean(all_z,na.rm=T))
}else{
insert <- c(base_x_range,base_y_range,defalut_z)
}
surface_m <- rbind(surface_m,insert)
}
}
colnames(surface_m) <- c("x","y","z")
return(as.matrix(surface_m))
})
# main process
mean_range = 2
x_div = 1000
y_div = 1000
defalut_z = 0
mat <- as.matrix(fread("target_file.csv"))
sdf <- mk_surface_data(mat, mean_range, x_div, y_div,defalut_z)
interpolated <- interp(sdf[,"x"], sdf[,"y"], sdf[,"z"])
plot3d(sdf[,"x"], sdf[,"y"], sdf[,"z"])
surface3d(interpolated$x, interpolated$y, interpolated$z,col="green")
总是在效率方面发出危险信号的一行代码是:
surface_m <- rbind(surface_m,insert)
基本上,您在最内层循环中一次增长矩阵 surface_m
一行,这可能非常低效(有关详细信息,请参见 the R Inferno 的第二个圆圈)。您可以使用以下内容更有效地构造 surface_m
:
surface_m <- t(apply(expand.grid(y=0:y_div, x=0:x_div), 1, function(yx) {
y <- yx[1]
x <- yx[2]
base_x_range <- min_x + (step_x * x)
min_x_range <- base_x_range - (mean_range * step_x)
max_x_range <- base_x_range + (mean_range * step_x)
base_y_range <- min_y + (step_y * y)
min_y_range <- base_y_range - (mean_range * step_y)
max_y_range <- base_y_range + (mean_range * step_y)
all_z <- mat[((min_x_range < mat[,"x"]) & (max_x_range > mat[,"x"]) & (min_y_range < mat[,"y"]) & (max_y_range > mat[,"y"])),c("z")]
if (length(fv(all_z)) > 0){
insert <- c(base_x_range,base_y_range,weighted.mean(all_z,na.rm=T))
} else {
insert <- c(base_x_range,base_y_range,defalut_z)
}
return(insert)
}))
我正在尝试从 120,000 行 csv 数据中获取 "surface3d plot",但是我的代码太慢了,处理时间将超过 12 小时。 应该改哪个地方? ( interp() 处理时间也会超过12小时。如果可以的话,我想合并 mk_surface_data() 和 interp() 。可以吗?)
library(rgl)
library(data.table)
library(akima)
fv <- cmpfun(function(vec) {
return(vec[is.finite(vec)])
})
mk_surface_data <- cmpfun(function(mat, mean_range = 2, x_div = 100, y_div = 100,defalut_z = 0){
x <- mat[,"x"]
y <- mat[,"y"]
min_x <- min(fv(x))
max_x <- max(fv(x))
min_y <- min(fv(y))
max_y <- max(fv(y))
sa_x <- max_x - min_x
sa_y <- max_y - min_y
step_x <- sa_x / x_div
step_y <- sa_y / y_div
surface_m <- matrix(nrow=0,ncol=3)
for(x in 0:x_div){
base_x_range <- min_x + (step_x * x)
min_x_range <- base_x_range - (mean_range * step_x)
max_x_range <- base_x_range + (mean_range * step_x)
for(y in 0:y_div){
base_y_range <- min_y + (step_y * y)
min_y_range <- base_y_range - (mean_range * step_y)
max_y_range <- base_y_range + (mean_range * step_y)
all_z <- mat[((min_x_range < mat[,"x"]) & (max_x_range > mat[,"x"]) & (min_y_range < mat[,"y"]) & (max_y_range > mat[,"y"])),c("z")]
if(length(fv(all_z)) > 0){
insert <- c(base_x_range,base_y_range,weighted.mean(all_z,na.rm=T))
}else{
insert <- c(base_x_range,base_y_range,defalut_z)
}
surface_m <- rbind(surface_m,insert)
}
}
colnames(surface_m) <- c("x","y","z")
return(as.matrix(surface_m))
})
# main process
mean_range = 2
x_div = 1000
y_div = 1000
defalut_z = 0
mat <- as.matrix(fread("target_file.csv"))
sdf <- mk_surface_data(mat, mean_range, x_div, y_div,defalut_z)
interpolated <- interp(sdf[,"x"], sdf[,"y"], sdf[,"z"])
plot3d(sdf[,"x"], sdf[,"y"], sdf[,"z"])
surface3d(interpolated$x, interpolated$y, interpolated$z,col="green")
总是在效率方面发出危险信号的一行代码是:
surface_m <- rbind(surface_m,insert)
基本上,您在最内层循环中一次增长矩阵 surface_m
一行,这可能非常低效(有关详细信息,请参见 the R Inferno 的第二个圆圈)。您可以使用以下内容更有效地构造 surface_m
:
surface_m <- t(apply(expand.grid(y=0:y_div, x=0:x_div), 1, function(yx) {
y <- yx[1]
x <- yx[2]
base_x_range <- min_x + (step_x * x)
min_x_range <- base_x_range - (mean_range * step_x)
max_x_range <- base_x_range + (mean_range * step_x)
base_y_range <- min_y + (step_y * y)
min_y_range <- base_y_range - (mean_range * step_y)
max_y_range <- base_y_range + (mean_range * step_y)
all_z <- mat[((min_x_range < mat[,"x"]) & (max_x_range > mat[,"x"]) & (min_y_range < mat[,"y"]) & (max_y_range > mat[,"y"])),c("z")]
if (length(fv(all_z)) > 0){
insert <- c(base_x_range,base_y_range,weighted.mean(all_z,na.rm=T))
} else {
insert <- c(base_x_range,base_y_range,defalut_z)
}
return(insert)
}))