我正在尝试识别数据框中介于其他两个值之间的相关值
I am trying to identify relevant values in a dataframe that fall between two other values
我正在尝试预测高尔夫球手在练习场击中目标所需的击球次数。我已经建立了一个 monte carlo 模型(请参阅下面的代码),我遇到的问题是当我尝试确定球是否落在桶中时(代码的第 117 行(我创建的地方'score_data' dataframe)), 代码说它总是这样,即使我绘制了圆,但很明显它没有。目前,当我 运行 它的分数数据输出如下:
> score_data
Y_score X_score score
1 0.5 0.5 1
2 0.5 0.5 1
3 0.5 0.5 1
4 0.5 0.5 1
5 0.5 0.5 1
6 0.5 0.5 1
7 0.5 0.5 1
8 0.5 0.5 1
9 0.5 0.5 1
10 0.5 0.5 1
当我希望它更像这个
> score_data
Y_score X_score score
1 0.5 0 0.5
2 0 0 0
3 0.5 0.5 1
4 0 0 0
5 0 0 0
6 0.5 0.5 1
7 0.5 0 0.5
8 0.5 0.5 1
9 0 0 0
10 0.5 0 0.5
提前致谢。
################################################################
#### ####
#### Simulation Set Up ####
#### ####
################################################################
library(dplyr)
model_seed <- 1
shots_taken_Error_y_bucket <- 1
No_of_Runs <- 1000
no_buckets <- 10 # max 10 buckets if more increase the ifelse statements
area_length <- 100
area_width <- area_length
bucket_size <- 5
success_distance <- sqrt(bucket_size /pi)
Error_y <- 15
Error_x <- 12
################################################################
#### ####
#### Creating a Blank Matrix ####
#### ####
################################################################
matrix_row_names <- data.frame(var1 = rep("Bucket ", no_buckets), var2 = seq(1:no_buckets)) # creates the text for each of the row names
matrix_row_names <- paste(matrix_row_names$var1, matrix_row_names$var2, sep = "") # concatenated the dataframe
matrix_rows <- sum(table(matrix_row_names)) # calculates the number of rows required
Output_Matrix <- matrix(NA,
matrix_rows,
ncol = No_of_Runs) # creates the matrix
colnames(Output_Matrix) <- seq(1, No_of_Runs) # adds column names
rownames(Output_Matrix) <- matrix_row_names # adds row names
################################################################
#### ####
#### Creating the density functions ####
#### ####
################################################################
set.seed(1) # set the random number seed
Y_dist_data <- rnorm(1000, mean = 0, sd = Error_y/0.6745) # generate a nornal distribution
Y_dist <- ecdf(Y_dist_data) # created the empritical cdf (Y)
X_dist_data <- rnorm(1000, mean = 0, sd = Error_x/0.6745) # generate a nornal distribution
X_dist <- ecdf(X_dist_data) # created the empritical cdf (Y)
set.seed(model_seed)
################################################################
#### ####
#### Identifying Aim Points ####
#### ####
################################################################
no_buckets_Y <- ceiling(area_length/(4*Error_y))
no_buckets_X <- ceiling(area_width/(8*Error_x))
distance_between_buckets_Y <- round(area_length/(no_buckets_Y+1), 0) # calculates the distance for uniform distribution of buckets along the Y
buckets_Y_loc <- seq(1, no_buckets_Y)*distance_between_buckets_Y # calculates the location of each bucket along the Y
distance_between_buckets_X <- round(area_width/(no_buckets_X+1), 0) # calculates the distance for uniform distribution of buckets along the X
buckets_dips_loc <- seq(1, no_buckets_X)*distance_between_buckets_X # calculates the location of each bucket along the X
buckets_loc <- data.frame(Y = rep(buckets_Y_loc, no_buckets_X)) # repeats the number of Y buckets to account for the number of buckets along the X
buckets_loc$X <- buckets_dips_loc
shots_taken <- shots_taken_Error_y_bucket * no_buckets_Y * no_buckets_X
################################################################
#### ####
#### Running the Simulation ####
#### ####
################################################################
for (i in seq(1, No_of_Runs)) {
################################################################
#### ####
#### Placing the Buckets ####
#### ####
################################################################
target_loc <- data.frame(Y = runif(no_buckets, 0, area_length),
X = runif(no_buckets, 0, area_width))
buckets_rnds <- data.frame(Y = rep(buckets_loc$Y, shots_taken/(no_buckets_Y*no_buckets_X)))
buckets_rnds$X <- rep(buckets_loc$X, shots_taken/(no_buckets_Y*no_buckets_X))
data <- data.frame(Y_rand_no = runif(shots_taken)) # create required number of random numbers between 0 and 1
data$Y_distance <- quantile(Y_dist, probs = data$Y_rand_no) # reads the random number off the cdf
data$X_rand_no <- runif(shots_taken) # create required number of random numbers between 0 and 1
data$X_distance <- quantile(X_dist,
probs = data$X_rand_no) # reads the random number off the cdf
data$Y_distance <- data$Y_distance + buckets_loc$Y
data$X_distance <- data$X_distance + buckets_loc$X
data <- data.frame(min_Y = data$Y_distance - (0.5*success_distance),
impact_Y = data$Y_distance,
max_Y = data$Y_distance + (0.5*success_distance),
min_X = data$X_distance - (0.5*success_distance),
impact_X = data$X_distance,
max_X = data$X_distance + (0.5*success_distance))
score_data <- data.frame(Y_score = rep(0, sum(table(target_loc$Y))),
X_score = rep(0, sum(table(target_loc$X))))
score_data$Y_score <- ifelse(data$min_Y > target_loc[1,1] && data$max_Y < target_loc[1,1], score_data$Y_score, 0.5)
score_data$Y_score <- ifelse(data$min_Y > target_loc[2,1] && data$max_Y < target_loc[2,1], score_data$Y_score, 0.5)
score_data$Y_score <- ifelse(data$min_Y > target_loc[3,1] && data$max_Y < target_loc[3,1], score_data$Y_score, 0.5)
score_data$Y_score <- ifelse(data$min_Y > target_loc[4,1] && data$max_Y < target_loc[4,1], score_data$Y_score, 0.5)
score_data$Y_score <- ifelse(data$min_Y > target_loc[5,1] && data$max_Y < target_loc[5,1], score_data$Y_score, 0.5)
score_data$Y_score <- ifelse(data$min_Y > target_loc[6,1] && data$max_Y < target_loc[6,1], score_data$Y_score, 0.5)
score_data$Y_score <- ifelse(data$min_Y > target_loc[7,1] && data$max_Y < target_loc[7,1], score_data$Y_score, 0.5)
score_data$Y_score <- ifelse(data$min_Y > target_loc[8,1] && data$max_Y < target_loc[8,1], score_data$Y_score, 0.5)
score_data$Y_score <- ifelse(data$min_Y > target_loc[9,1] && data$max_Y < target_loc[9,1], score_data$Y_score, 0.5)
score_data$Y_score <- ifelse(data$min_Y > target_loc[10,1] && data$max_Y < target_loc[10,1], score_data$Y_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[1,2] && data$max_X < target_loc[1,2], score_data$X_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[2,2] && data$max_X < target_loc[2,2], score_data$X_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[3,2] && data$max_X < target_loc[3,2], score_data$X_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[4,2] && data$max_X < target_loc[4,2], score_data$X_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[5,2] && data$max_X < target_loc[5,2], score_data$X_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[6,2] && data$max_X < target_loc[6,2], score_data$X_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[7,2] && data$max_X < target_loc[7,2], score_data$X_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[8,2] && data$max_X < target_loc[8,2], score_data$X_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[9,2] && data$max_X < target_loc[9,2], score_data$X_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[10,2] && data$max_X < target_loc[10,2], score_data$X_score, 0.5)
score_data$score <- score_data$Y_score + score_data$X_score
Output_Matrix[,i] <- score_data$score # pastes the outcome of the run to the matrix
print(paste(floor(round(i/No_of_Runs, 2)*100), "%", sep = "")) # outputs the process through the simulation 0% - 100%
}
################################################################
#### ####
#### Manipulating the Final Output ####
#### ####
################################################################
output <- as.data.frame(t(Output_Matrix)) # transposes the matrix and converts it to a data frame
output$no_scores <- rowSums(output) # counts the number of scores
print(paste("Average number of scores: ", mean(output$no_scores), # output of the simulation.
", Error_ycentage of scores: ", round((mean(output$no_scores)/shots_taken)*100, 2), "%", sep = ""))
hist(output$no_scores)
ifelse
没有像您预期的那样工作。它是一个向量函数,这意味着如果您将向量值与标量进行比较,它将 return 一个向量:
ifelse(c(0, 1) > 0, 1, 0)
# returns a vector:
# [1] 0 1
所以你的情况发生了奇怪的事情:
data$min_Y
- 长度为 4
target_loc[1,1]
的长度为 1
score_data$Y_score
的长度为 10
确保条件中的所有向量具有相同的长度 N 或 1。
如果 target_loc
和 score_data
应该始终具有相同的大小,请考虑这个而不是复制它 10 次:
score_data$Y_score <- ifelse(data$min_Y[1] > target_loc[,1] && data$max_Y[1] < target_loc[,1], score_data$Y_score, 0.5)
请注意,我只从 data
中获取价值。
另外,这个条件总是returns FALSE
:
data$min_Y[1] > target_loc[,1] && data$max_Y[1] < target_loc[,1]
我正在尝试预测高尔夫球手在练习场击中目标所需的击球次数。我已经建立了一个 monte carlo 模型(请参阅下面的代码),我遇到的问题是当我尝试确定球是否落在桶中时(代码的第 117 行(我创建的地方'score_data' dataframe)), 代码说它总是这样,即使我绘制了圆,但很明显它没有。目前,当我 运行 它的分数数据输出如下:
> score_data
Y_score X_score score
1 0.5 0.5 1
2 0.5 0.5 1
3 0.5 0.5 1
4 0.5 0.5 1
5 0.5 0.5 1
6 0.5 0.5 1
7 0.5 0.5 1
8 0.5 0.5 1
9 0.5 0.5 1
10 0.5 0.5 1
当我希望它更像这个
> score_data
Y_score X_score score
1 0.5 0 0.5
2 0 0 0
3 0.5 0.5 1
4 0 0 0
5 0 0 0
6 0.5 0.5 1
7 0.5 0 0.5
8 0.5 0.5 1
9 0 0 0
10 0.5 0 0.5
提前致谢。
################################################################
#### ####
#### Simulation Set Up ####
#### ####
################################################################
library(dplyr)
model_seed <- 1
shots_taken_Error_y_bucket <- 1
No_of_Runs <- 1000
no_buckets <- 10 # max 10 buckets if more increase the ifelse statements
area_length <- 100
area_width <- area_length
bucket_size <- 5
success_distance <- sqrt(bucket_size /pi)
Error_y <- 15
Error_x <- 12
################################################################
#### ####
#### Creating a Blank Matrix ####
#### ####
################################################################
matrix_row_names <- data.frame(var1 = rep("Bucket ", no_buckets), var2 = seq(1:no_buckets)) # creates the text for each of the row names
matrix_row_names <- paste(matrix_row_names$var1, matrix_row_names$var2, sep = "") # concatenated the dataframe
matrix_rows <- sum(table(matrix_row_names)) # calculates the number of rows required
Output_Matrix <- matrix(NA,
matrix_rows,
ncol = No_of_Runs) # creates the matrix
colnames(Output_Matrix) <- seq(1, No_of_Runs) # adds column names
rownames(Output_Matrix) <- matrix_row_names # adds row names
################################################################
#### ####
#### Creating the density functions ####
#### ####
################################################################
set.seed(1) # set the random number seed
Y_dist_data <- rnorm(1000, mean = 0, sd = Error_y/0.6745) # generate a nornal distribution
Y_dist <- ecdf(Y_dist_data) # created the empritical cdf (Y)
X_dist_data <- rnorm(1000, mean = 0, sd = Error_x/0.6745) # generate a nornal distribution
X_dist <- ecdf(X_dist_data) # created the empritical cdf (Y)
set.seed(model_seed)
################################################################
#### ####
#### Identifying Aim Points ####
#### ####
################################################################
no_buckets_Y <- ceiling(area_length/(4*Error_y))
no_buckets_X <- ceiling(area_width/(8*Error_x))
distance_between_buckets_Y <- round(area_length/(no_buckets_Y+1), 0) # calculates the distance for uniform distribution of buckets along the Y
buckets_Y_loc <- seq(1, no_buckets_Y)*distance_between_buckets_Y # calculates the location of each bucket along the Y
distance_between_buckets_X <- round(area_width/(no_buckets_X+1), 0) # calculates the distance for uniform distribution of buckets along the X
buckets_dips_loc <- seq(1, no_buckets_X)*distance_between_buckets_X # calculates the location of each bucket along the X
buckets_loc <- data.frame(Y = rep(buckets_Y_loc, no_buckets_X)) # repeats the number of Y buckets to account for the number of buckets along the X
buckets_loc$X <- buckets_dips_loc
shots_taken <- shots_taken_Error_y_bucket * no_buckets_Y * no_buckets_X
################################################################
#### ####
#### Running the Simulation ####
#### ####
################################################################
for (i in seq(1, No_of_Runs)) {
################################################################
#### ####
#### Placing the Buckets ####
#### ####
################################################################
target_loc <- data.frame(Y = runif(no_buckets, 0, area_length),
X = runif(no_buckets, 0, area_width))
buckets_rnds <- data.frame(Y = rep(buckets_loc$Y, shots_taken/(no_buckets_Y*no_buckets_X)))
buckets_rnds$X <- rep(buckets_loc$X, shots_taken/(no_buckets_Y*no_buckets_X))
data <- data.frame(Y_rand_no = runif(shots_taken)) # create required number of random numbers between 0 and 1
data$Y_distance <- quantile(Y_dist, probs = data$Y_rand_no) # reads the random number off the cdf
data$X_rand_no <- runif(shots_taken) # create required number of random numbers between 0 and 1
data$X_distance <- quantile(X_dist,
probs = data$X_rand_no) # reads the random number off the cdf
data$Y_distance <- data$Y_distance + buckets_loc$Y
data$X_distance <- data$X_distance + buckets_loc$X
data <- data.frame(min_Y = data$Y_distance - (0.5*success_distance),
impact_Y = data$Y_distance,
max_Y = data$Y_distance + (0.5*success_distance),
min_X = data$X_distance - (0.5*success_distance),
impact_X = data$X_distance,
max_X = data$X_distance + (0.5*success_distance))
score_data <- data.frame(Y_score = rep(0, sum(table(target_loc$Y))),
X_score = rep(0, sum(table(target_loc$X))))
score_data$Y_score <- ifelse(data$min_Y > target_loc[1,1] && data$max_Y < target_loc[1,1], score_data$Y_score, 0.5)
score_data$Y_score <- ifelse(data$min_Y > target_loc[2,1] && data$max_Y < target_loc[2,1], score_data$Y_score, 0.5)
score_data$Y_score <- ifelse(data$min_Y > target_loc[3,1] && data$max_Y < target_loc[3,1], score_data$Y_score, 0.5)
score_data$Y_score <- ifelse(data$min_Y > target_loc[4,1] && data$max_Y < target_loc[4,1], score_data$Y_score, 0.5)
score_data$Y_score <- ifelse(data$min_Y > target_loc[5,1] && data$max_Y < target_loc[5,1], score_data$Y_score, 0.5)
score_data$Y_score <- ifelse(data$min_Y > target_loc[6,1] && data$max_Y < target_loc[6,1], score_data$Y_score, 0.5)
score_data$Y_score <- ifelse(data$min_Y > target_loc[7,1] && data$max_Y < target_loc[7,1], score_data$Y_score, 0.5)
score_data$Y_score <- ifelse(data$min_Y > target_loc[8,1] && data$max_Y < target_loc[8,1], score_data$Y_score, 0.5)
score_data$Y_score <- ifelse(data$min_Y > target_loc[9,1] && data$max_Y < target_loc[9,1], score_data$Y_score, 0.5)
score_data$Y_score <- ifelse(data$min_Y > target_loc[10,1] && data$max_Y < target_loc[10,1], score_data$Y_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[1,2] && data$max_X < target_loc[1,2], score_data$X_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[2,2] && data$max_X < target_loc[2,2], score_data$X_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[3,2] && data$max_X < target_loc[3,2], score_data$X_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[4,2] && data$max_X < target_loc[4,2], score_data$X_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[5,2] && data$max_X < target_loc[5,2], score_data$X_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[6,2] && data$max_X < target_loc[6,2], score_data$X_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[7,2] && data$max_X < target_loc[7,2], score_data$X_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[8,2] && data$max_X < target_loc[8,2], score_data$X_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[9,2] && data$max_X < target_loc[9,2], score_data$X_score, 0.5)
score_data$X_score <- ifelse(data$min_X > target_loc[10,2] && data$max_X < target_loc[10,2], score_data$X_score, 0.5)
score_data$score <- score_data$Y_score + score_data$X_score
Output_Matrix[,i] <- score_data$score # pastes the outcome of the run to the matrix
print(paste(floor(round(i/No_of_Runs, 2)*100), "%", sep = "")) # outputs the process through the simulation 0% - 100%
}
################################################################
#### ####
#### Manipulating the Final Output ####
#### ####
################################################################
output <- as.data.frame(t(Output_Matrix)) # transposes the matrix and converts it to a data frame
output$no_scores <- rowSums(output) # counts the number of scores
print(paste("Average number of scores: ", mean(output$no_scores), # output of the simulation.
", Error_ycentage of scores: ", round((mean(output$no_scores)/shots_taken)*100, 2), "%", sep = ""))
hist(output$no_scores)
ifelse
没有像您预期的那样工作。它是一个向量函数,这意味着如果您将向量值与标量进行比较,它将 return 一个向量:
ifelse(c(0, 1) > 0, 1, 0)
# returns a vector:
# [1] 0 1
所以你的情况发生了奇怪的事情:
data$min_Y
- 长度为 4target_loc[1,1]
的长度为 1score_data$Y_score
的长度为 10
确保条件中的所有向量具有相同的长度 N 或 1。
如果 target_loc
和 score_data
应该始终具有相同的大小,请考虑这个而不是复制它 10 次:
score_data$Y_score <- ifelse(data$min_Y[1] > target_loc[,1] && data$max_Y[1] < target_loc[,1], score_data$Y_score, 0.5)
请注意,我只从 data
中获取价值。
另外,这个条件总是returns FALSE
:
data$min_Y[1] > target_loc[,1] && data$max_Y[1] < target_loc[,1]