在 R 中交叉验证具有不同大小的单个隐藏层的多个神经网络
Cross-validating multiple Neural Networks with varying size of the single hidden layer in R
我必须使用交叉验证来找出模型的单个隐藏层应包含多少个神经元(使用 nnet 包)。我必须在 R 中编写一个函数,它将数据、模型和参数 n 作为输入,并使用具有 n 层的神经网络根据随机分割的训练和测试集的准确性计算模型性能。在循环中使用此函数,使用隐藏层大小为 n = 1、2、3、20 的神经网络计算性能。我的主要目标是了解隐藏层的大小,因为最后我必须绘制一个显示准确性与模型复杂性的图表。出于这个原因,理想情况下我希望对测试集和训练集进行所有精度测量
我收到错误:未找到对象 'accNN',这是用于存储结果的空向量。我想比较 20 个模型,所以在循环中我还必须创建 20 个空向量来存储 20 个不同的结果(accNN1、accNN2、accNN3 等)。如果能帮助正确编码循环会很棒。
非常感谢!
set.seed(1)
df <- data.frame(
X = sample(1:100),
Y = sample(1:100),
Z = sample(1:100),
target = sample(c("yes", "no"), 10, replace = TRUE))
# Create K folds with equal size for cross validation.
nFolds <- 5
myFolds <- cut(seq(1, nrow(df)),
breaks = nFolds,
labels=FALSE)
table(myFolds)
# Create object for number of neurons
sizehiddenlayer <- 3
# Define the model
mdl <- target ~ X + Y + Z
for (j in 1:sizehiddenlayer) {
# Initialize empty vectors to collect results
accNN[j] <- rep(NA, nFolds)
for (i in 1:nFolds) {
cat("Analysis of fold", i, "\n")
# 1: Define training and test sets
testObs <- which(myFolds == i, arr.ind = TRUE)
dfTest <- df[ testObs, ]
dfTrain <- df[-testObs, ]
# 2: Train the models on the training sets
rsltNN[j] <- nnet(mdlB, data = df, size = j)
# 3: Predict values for the test sets
predNN[j] <- predict(rsltNN[j], type ="class")
# 4: Measure accuracy and store the results
accNN[j] <- mean(df$target == predNN[j])
}
}
您需要制作一个对象来存储结果,使用箭头不会将该对象附加到现有的向量或列表中,因此类似这样的方法会起作用(注意您在 dfTrain 上训练并在 dfTest 上预测:
results = vector("list",sizehiddenlayer)
for (j in 1:sizehiddenlayer) {
results[[j]]$accNN <- rep(NA, nFolds)
results[[j]]$rsltNN <- vector("list",nFolds)
results[[j]]$predNN <- vector("list",nFolds)
for (i in 1:nFolds) {
testObs <- which(myFolds == i, arr.ind = TRUE)
dfTest <- df[ testObs, ]
dfTrain <- df[-testObs, ]
results[[j]]$rsltNN[[i]] <- nnet(mdl, data = dfTrain, size = j)
results[[j]]$predNN[[i]] <- predict(results[[j]]$rsltNN[[i]],dfTest, type ="class")
results[[j]]$accNN[i] <- mean(dfTest$target == results[[j]]$predNN[[i]])
}
}
结果组织在一个列表中:
head(results[[1]],2)
$accNN
[1] 0.6 0.6 0.6 0.6 0.6
$rsltNN
$rsltNN[[1]]
a 3-1-1 network with 6 weights
inputs: X Y Z
output(s): target
options were - entropy fitting
$rsltNN[[2]]
a 3-1-1 network with 6 weights
inputs: X Y Z
output(s): target
options were - entropy fitting
另一种方法是使用 caret 来处理 CV 等,或者您可以尝试类似 purrr 的方法:
library(purrr)
library(dplyr)
fit = function(dat,Folds,i,j){nnet(mdl, data = dat[Folds!=i,],size = j)}
pred = function(dat,Folds,mdl,i){predict(mdl,dat[Folds==i,],type="class")}
accr = function(dat,Folds,prediction,i){mean(dat$target[Folds==i] == prediction)}
results = expand.grid(hiddenlayer=1:sizehiddenlayer,fold=1:nFolds) %>%
tibble() %>%
mutate(
mdl=map2(.x=fold,.y= hiddenlayer,~fit(dat=df,F=myFolds,i =.x ,j=.y)),
pred = map2(.x=fold,.y= mdl,~pred(dat=df,F=myFolds,mdl = .y ,i=.x)),
accuracy = map2(.x=fold,.y= pred,~accr(dat=df,F=myFolds,prediction = .y ,i=.x))
)
results
# A tibble: 15 x 5
hiddenlayer fold mdl pred accuracy
<int> <int> <list> <list> <list>
1 1 1 <nnt.frml> <chr [20]> <dbl [1]>
2 2 1 <nnt.frml> <chr [20]> <dbl [1]>
3 3 1 <nnt.frml> <chr [20]> <dbl [1]>
4 1 2 <nnt.frml> <chr [20]> <dbl [1]>
5 2 2 <nnt.frml> <chr [20]> <dbl [1]>
6 3 2 <nnt.frml> <chr [20]> <dbl [1]>
7 1 3 <nnt.frml> <chr [20]> <dbl [1]>
您可以像这样访问它们:
results$mdl[[1]]
a 3-1-1 network with 6 weights
inputs: X Y Z
output(s): target
options were - entropy fitting
> results$pred[[1]]
[1] "no" "no" "no" "no" "no" "no" "no" "no" "no" "no" "no" "no" "no" "no" "no"
[16] "no" "no" "no" "no" "no"
> results$accuracy[[1]]
[1] 0.6
我必须使用交叉验证来找出模型的单个隐藏层应包含多少个神经元(使用 nnet 包)。我必须在 R 中编写一个函数,它将数据、模型和参数 n 作为输入,并使用具有 n 层的神经网络根据随机分割的训练和测试集的准确性计算模型性能。在循环中使用此函数,使用隐藏层大小为 n = 1、2、3、20 的神经网络计算性能。我的主要目标是了解隐藏层的大小,因为最后我必须绘制一个显示准确性与模型复杂性的图表。出于这个原因,理想情况下我希望对测试集和训练集进行所有精度测量
我收到错误:未找到对象 'accNN',这是用于存储结果的空向量。我想比较 20 个模型,所以在循环中我还必须创建 20 个空向量来存储 20 个不同的结果(accNN1、accNN2、accNN3 等)。如果能帮助正确编码循环会很棒。
非常感谢!
set.seed(1)
df <- data.frame(
X = sample(1:100),
Y = sample(1:100),
Z = sample(1:100),
target = sample(c("yes", "no"), 10, replace = TRUE))
# Create K folds with equal size for cross validation.
nFolds <- 5
myFolds <- cut(seq(1, nrow(df)),
breaks = nFolds,
labels=FALSE)
table(myFolds)
# Create object for number of neurons
sizehiddenlayer <- 3
# Define the model
mdl <- target ~ X + Y + Z
for (j in 1:sizehiddenlayer) {
# Initialize empty vectors to collect results
accNN[j] <- rep(NA, nFolds)
for (i in 1:nFolds) {
cat("Analysis of fold", i, "\n")
# 1: Define training and test sets
testObs <- which(myFolds == i, arr.ind = TRUE)
dfTest <- df[ testObs, ]
dfTrain <- df[-testObs, ]
# 2: Train the models on the training sets
rsltNN[j] <- nnet(mdlB, data = df, size = j)
# 3: Predict values for the test sets
predNN[j] <- predict(rsltNN[j], type ="class")
# 4: Measure accuracy and store the results
accNN[j] <- mean(df$target == predNN[j])
}
}
您需要制作一个对象来存储结果,使用箭头不会将该对象附加到现有的向量或列表中,因此类似这样的方法会起作用(注意您在 dfTrain 上训练并在 dfTest 上预测:
results = vector("list",sizehiddenlayer)
for (j in 1:sizehiddenlayer) {
results[[j]]$accNN <- rep(NA, nFolds)
results[[j]]$rsltNN <- vector("list",nFolds)
results[[j]]$predNN <- vector("list",nFolds)
for (i in 1:nFolds) {
testObs <- which(myFolds == i, arr.ind = TRUE)
dfTest <- df[ testObs, ]
dfTrain <- df[-testObs, ]
results[[j]]$rsltNN[[i]] <- nnet(mdl, data = dfTrain, size = j)
results[[j]]$predNN[[i]] <- predict(results[[j]]$rsltNN[[i]],dfTest, type ="class")
results[[j]]$accNN[i] <- mean(dfTest$target == results[[j]]$predNN[[i]])
}
}
结果组织在一个列表中:
head(results[[1]],2)
$accNN
[1] 0.6 0.6 0.6 0.6 0.6
$rsltNN
$rsltNN[[1]]
a 3-1-1 network with 6 weights
inputs: X Y Z
output(s): target
options were - entropy fitting
$rsltNN[[2]]
a 3-1-1 network with 6 weights
inputs: X Y Z
output(s): target
options were - entropy fitting
另一种方法是使用 caret 来处理 CV 等,或者您可以尝试类似 purrr 的方法:
library(purrr)
library(dplyr)
fit = function(dat,Folds,i,j){nnet(mdl, data = dat[Folds!=i,],size = j)}
pred = function(dat,Folds,mdl,i){predict(mdl,dat[Folds==i,],type="class")}
accr = function(dat,Folds,prediction,i){mean(dat$target[Folds==i] == prediction)}
results = expand.grid(hiddenlayer=1:sizehiddenlayer,fold=1:nFolds) %>%
tibble() %>%
mutate(
mdl=map2(.x=fold,.y= hiddenlayer,~fit(dat=df,F=myFolds,i =.x ,j=.y)),
pred = map2(.x=fold,.y= mdl,~pred(dat=df,F=myFolds,mdl = .y ,i=.x)),
accuracy = map2(.x=fold,.y= pred,~accr(dat=df,F=myFolds,prediction = .y ,i=.x))
)
results
# A tibble: 15 x 5
hiddenlayer fold mdl pred accuracy
<int> <int> <list> <list> <list>
1 1 1 <nnt.frml> <chr [20]> <dbl [1]>
2 2 1 <nnt.frml> <chr [20]> <dbl [1]>
3 3 1 <nnt.frml> <chr [20]> <dbl [1]>
4 1 2 <nnt.frml> <chr [20]> <dbl [1]>
5 2 2 <nnt.frml> <chr [20]> <dbl [1]>
6 3 2 <nnt.frml> <chr [20]> <dbl [1]>
7 1 3 <nnt.frml> <chr [20]> <dbl [1]>
您可以像这样访问它们:
results$mdl[[1]]
a 3-1-1 network with 6 weights
inputs: X Y Z
output(s): target
options were - entropy fitting
> results$pred[[1]]
[1] "no" "no" "no" "no" "no" "no" "no" "no" "no" "no" "no" "no" "no" "no" "no"
[16] "no" "no" "no" "no" "no"
> results$accuracy[[1]]
[1] 0.6