从零开始的 k-NN 算法精度低
Low accuracy in a k-NN algorithm from scratch
我正在尝试实施 k-NN 算法,但它总是导致非常低的准确度值。一定有逻辑错误,但我不知道它在哪里。代码如下:
start <- Sys.time()
AccEuc <- NULL
AccMan <- NULL
for(K in grid){
cvAccEuc <- NULL
cvAccMan <- NULL
for (fold in 1:10){
split = kfoldsplit(dfmerged,10,fold)
train <- split[[1]][,-c(1,2)]
valid <- split[[2]][,-c(1,2)]
trainclass <- split[[1]][,2]
validclass <- split[[2]][,2]
combined=rbind(valid,train)
eucdistcombined = as.matrix(dist(combined, method = "euclidian")) # Euclidian Distance
mandistcombined = as.matrix(dist(combined, method = "manhattan")) # Manhattan Distance
lnvalid = dim(valid)[1]; lntrain = dim(train)[1]; lnall = lnvalid + lntrain
eucdistcombined = eucdistcombined[1:lnvalid,(lnvalid+1):lnall]
mandistcombined = mandistcombined[1:lnvalid,(lnvalid+1):lnall]
neighbors_euc = t(apply(eucdistcombined, 1, order))
neighbors_man = t(apply(mandistcombined, 1, order))
idxeuc = t(apply(neighbors_euc, 1, function(x)( x <= K) ))
idxman = t(apply(neighbors_man, 1, function(x)( x <= K) ))
predseuc <- apply(idxeuc, 1, function(x) as.numeric(getmode( trainclass[as.vector(x)] )) )
predsman <- apply(idxman, 1, function(x) as.numeric(getmode( trainclass[as.vector(x)] )) )
cvAccEuc <- c( cvAccEuc, sum(validclass == predseuc)/dim(validclass)[1] )
cvAccMan <- c( cvAccMan, sum(validclass == predsman)/dim(validclass)[1] )
}
AccEuc <- rbind(AccEuc, t(c(K,cvAccEuc)))
AccMan <- rbind(AccMan, t(c(K,cvAccMan)))
}
Sys.time() - start
( x <= K)
应替换为 x[1:K]
。 x
是 行 ,相应地包含 eucdistcombined
/mandistcombined
的 行 的订单值。 ( x <= K)
仅给出 值小于 K 的索引,但是需要 最小距离值的索引 。应该是x[1:K]
获取K近邻
我正在尝试实施 k-NN 算法,但它总是导致非常低的准确度值。一定有逻辑错误,但我不知道它在哪里。代码如下:
start <- Sys.time()
AccEuc <- NULL
AccMan <- NULL
for(K in grid){
cvAccEuc <- NULL
cvAccMan <- NULL
for (fold in 1:10){
split = kfoldsplit(dfmerged,10,fold)
train <- split[[1]][,-c(1,2)]
valid <- split[[2]][,-c(1,2)]
trainclass <- split[[1]][,2]
validclass <- split[[2]][,2]
combined=rbind(valid,train)
eucdistcombined = as.matrix(dist(combined, method = "euclidian")) # Euclidian Distance
mandistcombined = as.matrix(dist(combined, method = "manhattan")) # Manhattan Distance
lnvalid = dim(valid)[1]; lntrain = dim(train)[1]; lnall = lnvalid + lntrain
eucdistcombined = eucdistcombined[1:lnvalid,(lnvalid+1):lnall]
mandistcombined = mandistcombined[1:lnvalid,(lnvalid+1):lnall]
neighbors_euc = t(apply(eucdistcombined, 1, order))
neighbors_man = t(apply(mandistcombined, 1, order))
idxeuc = t(apply(neighbors_euc, 1, function(x)( x <= K) ))
idxman = t(apply(neighbors_man, 1, function(x)( x <= K) ))
predseuc <- apply(idxeuc, 1, function(x) as.numeric(getmode( trainclass[as.vector(x)] )) )
predsman <- apply(idxman, 1, function(x) as.numeric(getmode( trainclass[as.vector(x)] )) )
cvAccEuc <- c( cvAccEuc, sum(validclass == predseuc)/dim(validclass)[1] )
cvAccMan <- c( cvAccMan, sum(validclass == predsman)/dim(validclass)[1] )
}
AccEuc <- rbind(AccEuc, t(c(K,cvAccEuc)))
AccMan <- rbind(AccMan, t(c(K,cvAccMan)))
}
Sys.time() - start
( x <= K)
应替换为 x[1:K]
。 x
是 行 ,相应地包含 eucdistcombined
/mandistcombined
的 行 的订单值。 ( x <= K)
仅给出 值小于 K 的索引,但是需要 最小距离值的索引 。应该是x[1:K]
获取K近邻