R 中非线性 PCA 的 homals 包:dimnames(x) <- dn 中的错误:'dimnames' [1] 的长度不等于数组范围
homals package for Nonlinear PCA in R: Error in dimnames(x) <- dn : length of 'dimnames' [1] not equal to array extent
我正在尝试使用 R
中的 homals
包对数据集实施 NLPCA(非线性 PCA),但我不断收到以下错误消息:
Error in dimnames(x) <- dn : length of 'dimnames' [1] not equal to array extent
我使用的数据集可以在 UCI ML Repository 中找到,在 R
中导入时称为 dat
:https://archive.ics.uci.edu/ml/datasets/South+German+Credit+%28UPDATE%29
这是我的代码(下载数据集后会提供一些代码):
nlpcasouthgerman <- homals(dat, rank=1, level=c('nominal','numerical',rep('nominal',2),
'numerical','nominal',
rep('ordinal',2), rep('nominal',2),
'ordinal','nominal','numerical',
rep('nominal',2), 'ordinal',
'nominal','ordinal',rep('nominal',3)),
active=c(FALSE, rep(TRUE, 20)), ndim=3, verbose=1)
我正在尝试预测第一个属性,因此我将其设置为 active=FALSE
。
输出如下所示(跳过所有迭代消息):
Iteration: 1 Loss Value: 0.000047
Iteration: 2 Loss Value: 0.000044
...
Iteration: 37 Loss Value: 0.000043
Iteration: 38 Loss Value: 0.000043
Error in dimnames(x) <- dn :
length of 'dimnames' [1] not equal to array extent
我不明白为什么会出现这个错误。我在其他一些数据集上使用了相同的代码并且工作正常所以我不明白为什么这个错误仍然存在。关于可能出现的问题以及如何解决此问题的任何建议?
谢谢!
似乎错误来自 homals
函数中生成 NA 的代码,专门针对 number_credits
级别的数据,这会导致 sort(as.numeric((rownames(clist[[i]]))))
出现问题并尝试捕获错误,因为其中一个级别没有给出 NA 值。
因此,您要么必须修改 homals
函数来处理这种边缘情况,要么更改有问题的因素水平。这可能会作为错误报告提交给包维护者。
作为您的解决方法,您可以这样做:
levels(dat$number_credits)[1] <- "_1"
并且函数应该 运行 没有问题。
编辑:
我认为一种解决方案是更改 homals
函数中的一行代码,但不能保证这会按预期工作。最好向包 author/maintainer 提交错误报告 - 请参阅 https://cran.r-project.org/web/packages/homals/ 获取地址。
使用 rnames <- as.numeric(rownames(clist[[i]]))[order(as.numeric(rownames(clist[[i]])))]
而不是 rnames <- sort(as.numeric((rownames(clist[[i]]))))
将允许以下代码识别 NA,但我不确定为什么作者没有尝试完全保留因子水平。
无论如何,您可以 运行 在本地环境中修改函数,这需要显式调用内部(未导出)homals
函数,如下所示。不一定是最好的方法,但会在紧要关头帮助你。
homals <- function (data, ndim = 2, rank = ndim, level = "nominal", sets = 0,
active = TRUE, eps = 0.000001, itermax = 1000, verbose = 0) {
dframe <- data
name <- deparse(substitute(dframe))
nobj <- nrow(dframe)
nvar <- ncol(dframe)
vname <- names(dframe)
rname <- rownames(dframe)
for (j in 1:nvar) {
dframe[, j] <- as.factor(dframe[, j])
levfreq <- table(dframe[, j])
if (any(levfreq == 0)) {
newlev <- levels(dframe[, j])[-which(levfreq == 0)]
}
else {
newlev <- levels(dframe[, j])
}
dframe[, j] <- factor(dframe[, j], levels = sort(newlev))
}
varcheck <- apply(dframe, 2, function(tl) length(table(tl)))
if (any(varcheck == 1))
stop("Variable with only 1 value detected! Can't proceed with estimation!")
active <- homals:::checkPars(active, nvar)
rank <- homals:::checkPars(rank, nvar)
level <- homals:::checkPars(level, nvar)
if (length(sets) == 1)
sets <- lapply(1:nvar, "c")
if (!all(sort(unlist(sets)) == (1:nvar))) {
print(cat("sets union", sort(unlist(sets)), "\n"))
stop("inappropriate set structure !")
}
nset <- length(sets)
mis <- rep(0, nobj)
for (l in 1:nset) {
lset <- sets[[l]]
if (all(!active[lset]))
(next)()
jset <- lset[which(active[lset])]
for (i in 1:nobj) {
if (any(is.na(dframe[i, jset])))
dframe[i, jset] <- NA
else mis[i] <- mis[i] + 1
}
}
for (j in 1:nvar) {
k <- length(levels(dframe[, j]))
if (rank[j] > min(ndim, k - 1))
rank[j] <- min(ndim, k - 1)
}
x <- cbind(homals:::orthogonalPolynomials(mis, 1:nobj, ndim))
x <- homals:::normX(homals:::centerX(x, mis), mis)$q
y <- lapply(1:nvar, function(j) homals:::computeY(dframe[, j], x))
sold <- homals:::totalLoss(dframe, x, y, active, rank, level, sets)
iter <- pops <- 0
repeat {
iter <- iter + 1
y <- homals:::updateY(dframe, x, y, active, rank, level, sets,
verbose = verbose)
smid <- homals:::totalLoss(dframe, x, y, active, rank, level,
sets)/(nobj * nvar * ndim)
ssum <- homals:::totalSum(dframe, x, y, active, rank, level, sets)
qv <- homals:::normX(homals:::centerX((1/mis) * ssum, mis), mis)
z <- qv$q
snew <- homals:::totalLoss(dframe, z, y, active, rank, level,
sets)/(nobj * nvar * ndim)
if (verbose > 0)
cat("Iteration:", formatC(iter, digits = 3, width = 3),
"Loss Value: ", formatC(c(smid), digits = 6,
width = 6, format = "f"), "\n")
r <- abs(qv$r)/2
ops <- sum(r)
aps <- sum(La.svd(crossprod(x, mis * z), 0, 0)$d)/ndim
if (iter == itermax) {
stop("maximum number of iterations reached")
}
if (smid > sold) {
warning(cat("Loss function increases in iteration ",
iter, "\n"))
}
if ((ops - pops) < eps)
break
else {
x <- z
pops <- ops
sold <- smid
}
}
ylist <- alist <- clist <- ulist <- NULL
for (j in 1:nvar) {
gg <- dframe[, j]
c <- homals:::computeY(gg, z)
d <- as.vector(table(gg))
lst <- homals:::restrictY(d, c, rank[j], level[j])
y <- lst$y
a <- lst$a
u <- lst$z
ylist <- c(ylist, list(y))
alist <- c(alist, list(a))
clist <- c(clist, list(c))
ulist <- c(ulist, list(u))
}
dimlab <- paste("D", 1:ndim, sep = "")
for (i in 1:nvar) {
if (ndim == 1) {
ylist[[i]] <- cbind(ylist[[i]])
ulist[[i]] <- cbind(ulist[[i]])
clist[[i]] <- cbind(clist[[i]])
}
options(warn = -1)
# Here is the line that I changed in the code:
# rnames <- sort(as.numeric((rownames(clist[[i]]))))
rnames <- as.numeric(rownames(clist[[i]]))[order(as.numeric(rownames(clist[[i]])))]
options(warn = 0)
if ((any(is.na(rnames))) || (length(rnames) == 0))
rnames <- rownames(clist[[i]])
if (!is.matrix(ulist[[i]]))
ulist[[i]] <- as.matrix(ulist[[i]])
rownames(ylist[[i]]) <- rownames(ulist[[i]]) <- rownames(clist[[i]]) <- rnames
rownames(alist[[i]]) <- paste(1:dim(alist[[i]])[1])
colnames(clist[[i]]) <- colnames(ylist[[i]]) <- colnames(alist[[i]]) <- dimlab
colnames(ulist[[i]]) <- paste(1:dim(as.matrix(ulist[[i]]))[2])
}
names(ylist) <- names(ulist) <- names(clist) <- names(alist) <- colnames(dframe)
rownames(z) <- rownames(dframe)
colnames(z) <- dimlab
dummymat <- as.matrix(homals:::expandFrame(dframe, zero = FALSE, clean = FALSE))
dummymat01 <- dummymat
dummymat[dummymat == 2] <- NA
dummymat[dummymat == 0] <- Inf
scoremat <- array(NA, dim = c(dim(dframe), ndim), dimnames = list(rownames(dframe),
colnames(dframe), paste("dim", 1:ndim, sep = "")))
for (i in 1:ndim) {
catscores.d1 <- do.call(rbind, ylist)[, i]
dummy.scores <- t(t(dummymat) * catscores.d1)
freqlist <- apply(dframe, 2, function(dtab) as.list(table(dtab)))
cat.ind <- sequence(sapply(freqlist, length))
scoremat[, , i] <- t(apply(dummy.scores, 1, function(ds) {
ind.infel <- which(ds == Inf)
ind.minfel <- which(ds == -Inf)
ind.nan <- which(is.nan(ds))
ind.nael <- which((is.na(ds) + (cat.ind != 1)) ==
2)
ds[-c(ind.infel, ind.minfel, ind.nael, ind.nan)]
}))
}
disc.mat <- apply(scoremat, 3, function(xx) {
apply(xx, 2, function(cols) {
(sum(cols^2, na.rm = TRUE))/nobj
})
})
result <- list(datname = name, catscores = ylist, scoremat = scoremat,
objscores = z, cat.centroids = clist, ind.mat = dummymat01,
loadings = alist, low.rank = ulist, discrim = disc.mat,
ndim = ndim, niter = iter, level = level, eigenvalues = r,
loss = smid, rank.vec = rank, active = active, dframe = dframe,
call = match.call())
class(result) <- "homals"
result
}
我正在尝试使用 R
中的 homals
包对数据集实施 NLPCA(非线性 PCA),但我不断收到以下错误消息:
Error in dimnames(x) <- dn : length of 'dimnames' [1] not equal to array extent
我使用的数据集可以在 UCI ML Repository 中找到,在 R
中导入时称为 dat
:https://archive.ics.uci.edu/ml/datasets/South+German+Credit+%28UPDATE%29
这是我的代码(下载数据集后会提供一些代码):
nlpcasouthgerman <- homals(dat, rank=1, level=c('nominal','numerical',rep('nominal',2),
'numerical','nominal',
rep('ordinal',2), rep('nominal',2),
'ordinal','nominal','numerical',
rep('nominal',2), 'ordinal',
'nominal','ordinal',rep('nominal',3)),
active=c(FALSE, rep(TRUE, 20)), ndim=3, verbose=1)
我正在尝试预测第一个属性,因此我将其设置为 active=FALSE
。
输出如下所示(跳过所有迭代消息):
Iteration: 1 Loss Value: 0.000047
Iteration: 2 Loss Value: 0.000044
...
Iteration: 37 Loss Value: 0.000043
Iteration: 38 Loss Value: 0.000043
Error in dimnames(x) <- dn :
length of 'dimnames' [1] not equal to array extent
我不明白为什么会出现这个错误。我在其他一些数据集上使用了相同的代码并且工作正常所以我不明白为什么这个错误仍然存在。关于可能出现的问题以及如何解决此问题的任何建议?
谢谢!
似乎错误来自 homals
函数中生成 NA 的代码,专门针对 number_credits
级别的数据,这会导致 sort(as.numeric((rownames(clist[[i]]))))
出现问题并尝试捕获错误,因为其中一个级别没有给出 NA 值。
因此,您要么必须修改 homals
函数来处理这种边缘情况,要么更改有问题的因素水平。这可能会作为错误报告提交给包维护者。
作为您的解决方法,您可以这样做:
levels(dat$number_credits)[1] <- "_1"
并且函数应该 运行 没有问题。
编辑:
我认为一种解决方案是更改 homals
函数中的一行代码,但不能保证这会按预期工作。最好向包 author/maintainer 提交错误报告 - 请参阅 https://cran.r-project.org/web/packages/homals/ 获取地址。
使用 rnames <- as.numeric(rownames(clist[[i]]))[order(as.numeric(rownames(clist[[i]])))]
而不是 rnames <- sort(as.numeric((rownames(clist[[i]]))))
将允许以下代码识别 NA,但我不确定为什么作者没有尝试完全保留因子水平。
无论如何,您可以 运行 在本地环境中修改函数,这需要显式调用内部(未导出)homals
函数,如下所示。不一定是最好的方法,但会在紧要关头帮助你。
homals <- function (data, ndim = 2, rank = ndim, level = "nominal", sets = 0,
active = TRUE, eps = 0.000001, itermax = 1000, verbose = 0) {
dframe <- data
name <- deparse(substitute(dframe))
nobj <- nrow(dframe)
nvar <- ncol(dframe)
vname <- names(dframe)
rname <- rownames(dframe)
for (j in 1:nvar) {
dframe[, j] <- as.factor(dframe[, j])
levfreq <- table(dframe[, j])
if (any(levfreq == 0)) {
newlev <- levels(dframe[, j])[-which(levfreq == 0)]
}
else {
newlev <- levels(dframe[, j])
}
dframe[, j] <- factor(dframe[, j], levels = sort(newlev))
}
varcheck <- apply(dframe, 2, function(tl) length(table(tl)))
if (any(varcheck == 1))
stop("Variable with only 1 value detected! Can't proceed with estimation!")
active <- homals:::checkPars(active, nvar)
rank <- homals:::checkPars(rank, nvar)
level <- homals:::checkPars(level, nvar)
if (length(sets) == 1)
sets <- lapply(1:nvar, "c")
if (!all(sort(unlist(sets)) == (1:nvar))) {
print(cat("sets union", sort(unlist(sets)), "\n"))
stop("inappropriate set structure !")
}
nset <- length(sets)
mis <- rep(0, nobj)
for (l in 1:nset) {
lset <- sets[[l]]
if (all(!active[lset]))
(next)()
jset <- lset[which(active[lset])]
for (i in 1:nobj) {
if (any(is.na(dframe[i, jset])))
dframe[i, jset] <- NA
else mis[i] <- mis[i] + 1
}
}
for (j in 1:nvar) {
k <- length(levels(dframe[, j]))
if (rank[j] > min(ndim, k - 1))
rank[j] <- min(ndim, k - 1)
}
x <- cbind(homals:::orthogonalPolynomials(mis, 1:nobj, ndim))
x <- homals:::normX(homals:::centerX(x, mis), mis)$q
y <- lapply(1:nvar, function(j) homals:::computeY(dframe[, j], x))
sold <- homals:::totalLoss(dframe, x, y, active, rank, level, sets)
iter <- pops <- 0
repeat {
iter <- iter + 1
y <- homals:::updateY(dframe, x, y, active, rank, level, sets,
verbose = verbose)
smid <- homals:::totalLoss(dframe, x, y, active, rank, level,
sets)/(nobj * nvar * ndim)
ssum <- homals:::totalSum(dframe, x, y, active, rank, level, sets)
qv <- homals:::normX(homals:::centerX((1/mis) * ssum, mis), mis)
z <- qv$q
snew <- homals:::totalLoss(dframe, z, y, active, rank, level,
sets)/(nobj * nvar * ndim)
if (verbose > 0)
cat("Iteration:", formatC(iter, digits = 3, width = 3),
"Loss Value: ", formatC(c(smid), digits = 6,
width = 6, format = "f"), "\n")
r <- abs(qv$r)/2
ops <- sum(r)
aps <- sum(La.svd(crossprod(x, mis * z), 0, 0)$d)/ndim
if (iter == itermax) {
stop("maximum number of iterations reached")
}
if (smid > sold) {
warning(cat("Loss function increases in iteration ",
iter, "\n"))
}
if ((ops - pops) < eps)
break
else {
x <- z
pops <- ops
sold <- smid
}
}
ylist <- alist <- clist <- ulist <- NULL
for (j in 1:nvar) {
gg <- dframe[, j]
c <- homals:::computeY(gg, z)
d <- as.vector(table(gg))
lst <- homals:::restrictY(d, c, rank[j], level[j])
y <- lst$y
a <- lst$a
u <- lst$z
ylist <- c(ylist, list(y))
alist <- c(alist, list(a))
clist <- c(clist, list(c))
ulist <- c(ulist, list(u))
}
dimlab <- paste("D", 1:ndim, sep = "")
for (i in 1:nvar) {
if (ndim == 1) {
ylist[[i]] <- cbind(ylist[[i]])
ulist[[i]] <- cbind(ulist[[i]])
clist[[i]] <- cbind(clist[[i]])
}
options(warn = -1)
# Here is the line that I changed in the code:
# rnames <- sort(as.numeric((rownames(clist[[i]]))))
rnames <- as.numeric(rownames(clist[[i]]))[order(as.numeric(rownames(clist[[i]])))]
options(warn = 0)
if ((any(is.na(rnames))) || (length(rnames) == 0))
rnames <- rownames(clist[[i]])
if (!is.matrix(ulist[[i]]))
ulist[[i]] <- as.matrix(ulist[[i]])
rownames(ylist[[i]]) <- rownames(ulist[[i]]) <- rownames(clist[[i]]) <- rnames
rownames(alist[[i]]) <- paste(1:dim(alist[[i]])[1])
colnames(clist[[i]]) <- colnames(ylist[[i]]) <- colnames(alist[[i]]) <- dimlab
colnames(ulist[[i]]) <- paste(1:dim(as.matrix(ulist[[i]]))[2])
}
names(ylist) <- names(ulist) <- names(clist) <- names(alist) <- colnames(dframe)
rownames(z) <- rownames(dframe)
colnames(z) <- dimlab
dummymat <- as.matrix(homals:::expandFrame(dframe, zero = FALSE, clean = FALSE))
dummymat01 <- dummymat
dummymat[dummymat == 2] <- NA
dummymat[dummymat == 0] <- Inf
scoremat <- array(NA, dim = c(dim(dframe), ndim), dimnames = list(rownames(dframe),
colnames(dframe), paste("dim", 1:ndim, sep = "")))
for (i in 1:ndim) {
catscores.d1 <- do.call(rbind, ylist)[, i]
dummy.scores <- t(t(dummymat) * catscores.d1)
freqlist <- apply(dframe, 2, function(dtab) as.list(table(dtab)))
cat.ind <- sequence(sapply(freqlist, length))
scoremat[, , i] <- t(apply(dummy.scores, 1, function(ds) {
ind.infel <- which(ds == Inf)
ind.minfel <- which(ds == -Inf)
ind.nan <- which(is.nan(ds))
ind.nael <- which((is.na(ds) + (cat.ind != 1)) ==
2)
ds[-c(ind.infel, ind.minfel, ind.nael, ind.nan)]
}))
}
disc.mat <- apply(scoremat, 3, function(xx) {
apply(xx, 2, function(cols) {
(sum(cols^2, na.rm = TRUE))/nobj
})
})
result <- list(datname = name, catscores = ylist, scoremat = scoremat,
objscores = z, cat.centroids = clist, ind.mat = dummymat01,
loadings = alist, low.rank = ulist, discrim = disc.mat,
ndim = ndim, niter = iter, level = level, eigenvalues = r,
loss = smid, rank.vec = rank, active = active, dframe = dframe,
call = match.call())
class(result) <- "homals"
result
}