ELO评级算法中的for循环错误
Error in for loop in ELO rating algorithm
我正在尝试 运行 来自此处的 ELO 评级代码
https://github.com/xsankar/hairy-octo-hipster/blob/master/ELO-538.R
我使用的数据集在这里
https://www.mediafire.com/file/0a5v393ki775vw9/sport2.xlsx/file
我 运行ning 的代码略有修改,我只用了 17 周而不是原始代码中的 22 周,但我无法用 22 周使其工作
##########################################################
library(dplyr)
data<- read_xlsx("C:/User/sport2.xlsx")
data$PtsW <- as.numeric(data$PtsW)
data$PtsL <- as.numeric(data$PtsL)
data$YdsW <- as.numeric(data$YdsW)
data$YdsL <- as.numeric(data$YdsL)
data$TOW <- as.numeric(data$TOW)
data$TOL <- as.numeric(data$TOL)
rankings <- data_frame()
teams <- data %>% distinct(Winner.tie) %>% select(Winner.tie)
teams
rankings <- bind_rows(rankings,teams)
#print(data)
#class(rankings)
#print(rankings)
rankings
for (i in 2:19) {
rankings[,i] <- 0
}
colnames(rankings) <- c("Team","Week.0","Week.1","Week.2","Week.3","Week.4","Week.5","Week.6",
"Week.7","Week.8","Week.9","Week.10","Week.11","Week.12","Week.13",
"Week.14","Week.15","Week.16","Week.17")
rankings$Week.0 <- 1500
# Iterate for each week of play
week.no <- 1
k_factor <- 20.0
week.data <- data[data$Week == week.no,]
for (j in 1:17) {
week.no <- j
k_factor <- 20.0
week.data <- data[data$Week == week.no,]
for (i in 1:nrow(week.data)) {
winner <- week.data[i,"Winner.tie"]
loser <- week.data[i,"Loser.tie"]
old.rank.w <- rankings[rankings$Team == winner,week.no+1]
old.rank.w <- old.rank.w[[1]]
old.rank.l <- rankings[rankings$Team == loser,week.no+1]
old.rank.l <- old.rank.l[[1]]
# Calculate Margin of Victory Multiplier
# mv_mult = LN(ABS(PD)+1) * (2.2/((ELOW-ELOL)*.001+2.2))
pd <- week.data$PtsW[i] - week.data$PtsL[i]
mv_mult <- 1 #Margin For Victory Multiplier
mv_mult <- log(pd +1) * (2.2/((old.rank.w - old.rank.l)*.001+2.2))
#
# Use old ELO Algorithm
#
w_w <- 1.0
w_l <- 0.0
if (pd == 0) {
w_w <- 0.5
w_l <- 0.5
}
#
#
d_ij_w <- old.rank.w - old.rank.l
d_ij_l <- old.rank.l - old.rank.w
#
mu_ij_w <- 1 / (1 + 10 ^ ((-1 * d_ij_w)/400))
new.rank.w <- round( old.rank.w + (k_factor * mv_mult * (w_w - mu_ij_w)))
#
mu_ij_l <- 1 / (1 + 10 ^ ((-1 * d_ij_l)/400))
new.rank.l <- round( old.rank.l + (k_factor * mv_mult * (w_l - mu_ij_l)))
#
print (sprintf("Rank : W = %d L = %d",new.rank.w,new.rank.l))
rankings[rankings$Team == winner,week.no+2] <- new.rank.w
rankings[rankings$Team == loser,week.no+2] <- new.rank.l
}
###################################################################3
# if team didn't play, carry forward early ratings
# not needed for wildcard, division et al
# for (i in 1:nrow(rankings)) {
# if (is.na(rankings[i,week.no+2])) {
# rankings[i,week.no+2] <- rankings[i,week.no+1]
# }
# if (rankings[i,week.no+2] < 1) {
# rankings[i,week.no+2] <- rankings[i,week.no+1]
# }
# }
##################################################################
}
# week #1 ranking
rankings %>% select(Team,Week.1) %>% arrange(-Week.1)
# week #17 ranking
rankings %>% select(Team,Week.17) %>% arrange(-Week.17)
#
我遇到了错误
Error in x[[jj]][iseq] <- vjj : replacement has length zero
我试过修改索引和调试但找不到问题所在
我会回应 Ronak 关于提供最少可重现示例的好处的评论,这样我们就可以更好地帮助您(这意味着我们不必梳理很多行代码来找到错误的来源)!
在进入解决方案之前,我只想指出几个文体编码“最佳”实践:
- 如果您选择使用 dplyr(以及随后的 tidyverse),坚持使用它通常在风格上更一致。你的代码在 dplyr 和 base 之间切换,这会使代码更难阅读,但这不是世界末日
- 当您使用
read_xlsx()
时,请注意它是从 readxl
包中调用的,该包在您的示例中没有作为库加载,也没有通过命名空间引用,即 readxl::read_xlsx()
。确保您的示例可以使用您使用的包完全重现是一种很好的做法
代码重写
我已经重写了您在 dplyr
中的部分代码,直到错误的来源。
data_clean <- data %>%
mutate(across(c("PtsW":"TOL"), as.numeric))
teams <- data_clean %>%
distinct(Winner.tie) %>%
select(Winner.tie)
rankings <- teams
但是,您可以尝试自己实施的一些修复是:
以 dplyr
风格重写代码的其余部分
使用 purrr
家族的 map()
函数来替换 for 循环。我还没有找到明确的证据表明 purrr::map()
普遍 比 for 循环快,我个人认为使用 for 循环从根本上讲是有指导意义的。
错误来源
我认为错误的来源来自您的这部分代码:
winner <- week.data[i,"Winner.tie"]
loser <- week.data[i,"Loser.tie"]
当我打印 loser
时,我没有在您的内部循环 (i = 1) 的第一次迭代中得到一个值“Green Bay Packers”,而是得到了一个提示,这就是您的代码正在做的事情通过索引。其余代码失败,因为您随后尝试将小标题传递到此语句中:old.rank.l <- rankings[rankings$Team == loser,week.no+1]
并且 returns 是一个空值 (numeric(0)).
解决方案
这里是重写的代码,直到错误的来源。
winner <- week.data[i,"Winner.tie"] %>% pull()
loser <- week.data[i,"Loser.tie"] %>% pull()
old.rank.w <- rankings %>%
filter(Team == winner) %>%
.[[week.no + 1]]
old.rank.l <- rankings %>%
filter(Team == loser) %>%
.[[week.no + 1]]
进行此更改后,运行 其余代码应该可以正常工作。如果您有任何问题,请告诉我
结论性思考
使用 print()
语句查看您的代码在哪里失败以及您的 objects 是否包含您期望的值。当您开始功能化代码时,browser()
是一个有用的调试工具。
我正在尝试 运行 来自此处的 ELO 评级代码
https://github.com/xsankar/hairy-octo-hipster/blob/master/ELO-538.R
我使用的数据集在这里
https://www.mediafire.com/file/0a5v393ki775vw9/sport2.xlsx/file
我 运行ning 的代码略有修改,我只用了 17 周而不是原始代码中的 22 周,但我无法用 22 周使其工作
##########################################################
library(dplyr)
data<- read_xlsx("C:/User/sport2.xlsx")
data$PtsW <- as.numeric(data$PtsW)
data$PtsL <- as.numeric(data$PtsL)
data$YdsW <- as.numeric(data$YdsW)
data$YdsL <- as.numeric(data$YdsL)
data$TOW <- as.numeric(data$TOW)
data$TOL <- as.numeric(data$TOL)
rankings <- data_frame()
teams <- data %>% distinct(Winner.tie) %>% select(Winner.tie)
teams
rankings <- bind_rows(rankings,teams)
#print(data)
#class(rankings)
#print(rankings)
rankings
for (i in 2:19) {
rankings[,i] <- 0
}
colnames(rankings) <- c("Team","Week.0","Week.1","Week.2","Week.3","Week.4","Week.5","Week.6",
"Week.7","Week.8","Week.9","Week.10","Week.11","Week.12","Week.13",
"Week.14","Week.15","Week.16","Week.17")
rankings$Week.0 <- 1500
# Iterate for each week of play
week.no <- 1
k_factor <- 20.0
week.data <- data[data$Week == week.no,]
for (j in 1:17) {
week.no <- j
k_factor <- 20.0
week.data <- data[data$Week == week.no,]
for (i in 1:nrow(week.data)) {
winner <- week.data[i,"Winner.tie"]
loser <- week.data[i,"Loser.tie"]
old.rank.w <- rankings[rankings$Team == winner,week.no+1]
old.rank.w <- old.rank.w[[1]]
old.rank.l <- rankings[rankings$Team == loser,week.no+1]
old.rank.l <- old.rank.l[[1]]
# Calculate Margin of Victory Multiplier
# mv_mult = LN(ABS(PD)+1) * (2.2/((ELOW-ELOL)*.001+2.2))
pd <- week.data$PtsW[i] - week.data$PtsL[i]
mv_mult <- 1 #Margin For Victory Multiplier
mv_mult <- log(pd +1) * (2.2/((old.rank.w - old.rank.l)*.001+2.2))
#
# Use old ELO Algorithm
#
w_w <- 1.0
w_l <- 0.0
if (pd == 0) {
w_w <- 0.5
w_l <- 0.5
}
#
#
d_ij_w <- old.rank.w - old.rank.l
d_ij_l <- old.rank.l - old.rank.w
#
mu_ij_w <- 1 / (1 + 10 ^ ((-1 * d_ij_w)/400))
new.rank.w <- round( old.rank.w + (k_factor * mv_mult * (w_w - mu_ij_w)))
#
mu_ij_l <- 1 / (1 + 10 ^ ((-1 * d_ij_l)/400))
new.rank.l <- round( old.rank.l + (k_factor * mv_mult * (w_l - mu_ij_l)))
#
print (sprintf("Rank : W = %d L = %d",new.rank.w,new.rank.l))
rankings[rankings$Team == winner,week.no+2] <- new.rank.w
rankings[rankings$Team == loser,week.no+2] <- new.rank.l
}
###################################################################3
# if team didn't play, carry forward early ratings
# not needed for wildcard, division et al
# for (i in 1:nrow(rankings)) {
# if (is.na(rankings[i,week.no+2])) {
# rankings[i,week.no+2] <- rankings[i,week.no+1]
# }
# if (rankings[i,week.no+2] < 1) {
# rankings[i,week.no+2] <- rankings[i,week.no+1]
# }
# }
##################################################################
}
# week #1 ranking
rankings %>% select(Team,Week.1) %>% arrange(-Week.1)
# week #17 ranking
rankings %>% select(Team,Week.17) %>% arrange(-Week.17)
#
我遇到了错误
Error in x[[jj]][iseq] <- vjj : replacement has length zero
我试过修改索引和调试但找不到问题所在
我会回应 Ronak 关于提供最少可重现示例的好处的评论,这样我们就可以更好地帮助您(这意味着我们不必梳理很多行代码来找到错误的来源)!
在进入解决方案之前,我只想指出几个文体编码“最佳”实践:
- 如果您选择使用 dplyr(以及随后的 tidyverse),坚持使用它通常在风格上更一致。你的代码在 dplyr 和 base 之间切换,这会使代码更难阅读,但这不是世界末日
- 当您使用
read_xlsx()
时,请注意它是从readxl
包中调用的,该包在您的示例中没有作为库加载,也没有通过命名空间引用,即readxl::read_xlsx()
。确保您的示例可以使用您使用的包完全重现是一种很好的做法
代码重写
我已经重写了您在 dplyr
中的部分代码,直到错误的来源。
data_clean <- data %>%
mutate(across(c("PtsW":"TOL"), as.numeric))
teams <- data_clean %>%
distinct(Winner.tie) %>%
select(Winner.tie)
rankings <- teams
但是,您可以尝试自己实施的一些修复是:
以
dplyr
风格重写代码的其余部分使用
purrr
家族的map()
函数来替换 for 循环。我还没有找到明确的证据表明purrr::map()
普遍 比 for 循环快,我个人认为使用 for 循环从根本上讲是有指导意义的。
错误来源
我认为错误的来源来自您的这部分代码:
winner <- week.data[i,"Winner.tie"]
loser <- week.data[i,"Loser.tie"]
当我打印 loser
时,我没有在您的内部循环 (i = 1) 的第一次迭代中得到一个值“Green Bay Packers”,而是得到了一个提示,这就是您的代码正在做的事情通过索引。其余代码失败,因为您随后尝试将小标题传递到此语句中:old.rank.l <- rankings[rankings$Team == loser,week.no+1]
并且 returns 是一个空值 (numeric(0)).
解决方案
这里是重写的代码,直到错误的来源。
winner <- week.data[i,"Winner.tie"] %>% pull()
loser <- week.data[i,"Loser.tie"] %>% pull()
old.rank.w <- rankings %>%
filter(Team == winner) %>%
.[[week.no + 1]]
old.rank.l <- rankings %>%
filter(Team == loser) %>%
.[[week.no + 1]]
进行此更改后,运行 其余代码应该可以正常工作。如果您有任何问题,请告诉我
结论性思考
使用 print()
语句查看您的代码在哪里失败以及您的 objects 是否包含您期望的值。当您开始功能化代码时,browser()
是一个有用的调试工具。