当 运行 dropterm 时,并行 foreach 将数据更改为 NA
Parallel foreach changes data to NA when running dropterm
我有一个 GAMLSS 模型,我正在尝试适应我数据的多个子集。每个月都需要单独分析,所以我使用 foreach 循环遍历月份。然而,当我并行化我的循环时,dropterm
的结果全部被 NA'd。这是一个使用内置数据的类似示例:
library(dplyr)
library(gamlss)
library(MASS)
nCores <- detectCores()
gamlssCl <- makeCluster(nCores)
registerDoParallel(gamlssCl)
test.par <- foreach(s = unique(iris$Species),
.packages = c('dplyr', 'gamlss', 'MASS')) %dopar% {
species.data <- filter(iris, Species == s)
model <- gamlss(Petal.Length ~ Sepal.Length + Sepal.Width + Petal.Length,
data = species.data,
family = GA)
var.rank <- dropterm(model, test = 'Chisq') %>%
mutate(Variable = row.names(.)) %>%
arrange(AIC) %>%
filter(Variable != '<none>')
var.rank
}
stopCluster(gamlssCl)
test.par
# [[1]]
# Df AIC LRT Pr(Chi) Variable
# 1 NA NA NA NA Sepal.Length
# 2 NA NA NA NA Sepal.Width
# 3 NA NA NA NA Petal.Length
#
# [[2]]
# Df AIC LRT Pr(Chi) Variable
# 1 NA NA NA NA Sepal.Length
# 2 NA NA NA NA Sepal.Width
# 3 NA NA NA NA Petal.Length
#
# [[3]]
# Df AIC LRT Pr(Chi) Variable
# 1 NA NA NA NA Sepal.Length
# 2 NA NA NA NA Sepal.Width
# 3 NA NA NA NA Petal.Length
test.serial <- foreach(s = unique(iris$Species)) %do% {
species.data <- filter(iris, Species == s)
model <- gamlss(Petal.Length ~ Sepal.Length + Sepal.Width + Petal.Length,
data = species.data,
family = GA)
var.rank <- dropterm(model, test = 'Chisq') %>%
mutate(Variable = row.names(.)) %>%
arrange(AIC) %>%
filter(Variable != '<none>')
var.rank
}
test.serial
# [[1]]
# Df AIC LRT Pr(Chi) Variable
# 1 1 -31.66335 0.06406465 0.8001832 Sepal.Width
# 2 0 -29.72741 0.00000000 NA Petal.Length
# 3 1 -29.43731 2.29010516 0.1302011 Sepal.Length
#
# [[2]]
# Df AIC LRT Pr(Chi) Variable
# 1 0 31.03608 0.000000 NA Petal.Length
# 2 1 33.81852 4.782442 2.875132e-02 Sepal.Width
# 3 1 56.00459 26.968510 2.067972e-07 Sepal.Length
#
# [[3]]
# Df AIC LRT Pr(Chi) Variable
# 1 1 16.29265 0.08628226 7.689578e-01 Sepal.Width
# 2 0 18.20637 0.00000000 NA Petal.Length
# 3 1 77.14978 60.94341742 5.873901e-15 Sepal.Length
注意:使用 glm
而不是 gamlss
时不会出现错误
抱歉,还没有解决方案,但这里有一个最小的例子来说明这个问题,它不依赖于 foreach。
首先,做:
library("gamlss")
data <- subset(iris, Species == "setosa")
model <- gamlss(Petal.Length ~ Sepal.Length + Sepal.Width + Petal.Length,
data = data, family = GA)
## GAMLSS-RS iteration 1: Global Deviance = -37.7274
## GAMLSS-RS iteration 2: Global Deviance = -37.7274
model2 <- dropterm(model, test = "Chisq")
print(model2)
## Single term deletions for
## mu
##
## Model:
## Petal.Length ~ Sepal.Length + Sepal.Width + Petal.Length
## Df AIC LRT Pr(Chi)
## <none> -29.727
## Sepal.Length 1 -29.437 2.29011 0.1302
## Sepal.Width 1 -31.663 0.06406 0.8002
## Petal.Length 0 -29.727 0.00000
然后将结果保存到文件:
saveRDS(list(model = model, model2 = model2), file = "gamlss.rds")
然后 在新的 R 会话中 (R --vanilla
),执行:
> library("gamlss")
Loading required package: splines
Loading required package: gamlss.data
Loading required package: gamlss.dist
Loading required package: MASS
Loading required package: nlme
Loading required package: parallel
********** GAMLSS Version 5.0-1 **********
For more on GAMLSS look at http://www.gamlss.org/
Type gamlssNews() to see new features/changes/bug fixes.
> gamlss <- readRDS("gamlss.rds")
> model <- gamlss$model
> class(model)
[1] "gamlss" "gam" "glm" "lm"
> model2 <- dropterm(model, test = "Chisq")
Model with term Sepal.Length has failed
Model with term Sepal.Width has failed
Model with term Petal.Length has failed
> print(model2)
Single term deletions for
mu
Model:
Petal.Length ~ Sepal.Length + Sepal.Width + Petal.Length
Df AIC LRT Pr(Chi)
<none> -29.727
Sepal.Length
Sepal.Width
Petal.Length
比较 model2
在新 R 会话中与上面第一个会话的输出;
> all.equal(model2, gamlss$model2)
[1] "Component “Df”: 'is.NA' value mismatch: 1 in current 4 in target"
[2] "Component “AIC”: 'is.NA' value mismatch: 0 in current 3 in target"
[3] "Component “LRT”: 'is.NA' value mismatch: 1 in current 4 in target"
[4] "Component “Pr(Chi)”: 'is.NA' value mismatch: 2 in current 4 in target"
这里显然有些地方不正确。
我怀疑 model
对象包含一个或多个所谓的 promises 在转移到另一个 R 进程时没有正确保存(就像当你使用 SNOW 集群)。
我认为这是 gamlss 包本身的问题。问题似乎是 gamlss 对象无法 序列化 。我建议您将此报告给软件包维护者。请随意在您的报告中使用我的最小示例。
我有一个 GAMLSS 模型,我正在尝试适应我数据的多个子集。每个月都需要单独分析,所以我使用 foreach 循环遍历月份。然而,当我并行化我的循环时,dropterm
的结果全部被 NA'd。这是一个使用内置数据的类似示例:
library(dplyr)
library(gamlss)
library(MASS)
nCores <- detectCores()
gamlssCl <- makeCluster(nCores)
registerDoParallel(gamlssCl)
test.par <- foreach(s = unique(iris$Species),
.packages = c('dplyr', 'gamlss', 'MASS')) %dopar% {
species.data <- filter(iris, Species == s)
model <- gamlss(Petal.Length ~ Sepal.Length + Sepal.Width + Petal.Length,
data = species.data,
family = GA)
var.rank <- dropterm(model, test = 'Chisq') %>%
mutate(Variable = row.names(.)) %>%
arrange(AIC) %>%
filter(Variable != '<none>')
var.rank
}
stopCluster(gamlssCl)
test.par
# [[1]]
# Df AIC LRT Pr(Chi) Variable
# 1 NA NA NA NA Sepal.Length
# 2 NA NA NA NA Sepal.Width
# 3 NA NA NA NA Petal.Length
#
# [[2]]
# Df AIC LRT Pr(Chi) Variable
# 1 NA NA NA NA Sepal.Length
# 2 NA NA NA NA Sepal.Width
# 3 NA NA NA NA Petal.Length
#
# [[3]]
# Df AIC LRT Pr(Chi) Variable
# 1 NA NA NA NA Sepal.Length
# 2 NA NA NA NA Sepal.Width
# 3 NA NA NA NA Petal.Length
test.serial <- foreach(s = unique(iris$Species)) %do% {
species.data <- filter(iris, Species == s)
model <- gamlss(Petal.Length ~ Sepal.Length + Sepal.Width + Petal.Length,
data = species.data,
family = GA)
var.rank <- dropterm(model, test = 'Chisq') %>%
mutate(Variable = row.names(.)) %>%
arrange(AIC) %>%
filter(Variable != '<none>')
var.rank
}
test.serial
# [[1]]
# Df AIC LRT Pr(Chi) Variable
# 1 1 -31.66335 0.06406465 0.8001832 Sepal.Width
# 2 0 -29.72741 0.00000000 NA Petal.Length
# 3 1 -29.43731 2.29010516 0.1302011 Sepal.Length
#
# [[2]]
# Df AIC LRT Pr(Chi) Variable
# 1 0 31.03608 0.000000 NA Petal.Length
# 2 1 33.81852 4.782442 2.875132e-02 Sepal.Width
# 3 1 56.00459 26.968510 2.067972e-07 Sepal.Length
#
# [[3]]
# Df AIC LRT Pr(Chi) Variable
# 1 1 16.29265 0.08628226 7.689578e-01 Sepal.Width
# 2 0 18.20637 0.00000000 NA Petal.Length
# 3 1 77.14978 60.94341742 5.873901e-15 Sepal.Length
注意:使用 glm
而不是 gamlss
抱歉,还没有解决方案,但这里有一个最小的例子来说明这个问题,它不依赖于 foreach。
首先,做:
library("gamlss")
data <- subset(iris, Species == "setosa")
model <- gamlss(Petal.Length ~ Sepal.Length + Sepal.Width + Petal.Length,
data = data, family = GA)
## GAMLSS-RS iteration 1: Global Deviance = -37.7274
## GAMLSS-RS iteration 2: Global Deviance = -37.7274
model2 <- dropterm(model, test = "Chisq")
print(model2)
## Single term deletions for
## mu
##
## Model:
## Petal.Length ~ Sepal.Length + Sepal.Width + Petal.Length
## Df AIC LRT Pr(Chi)
## <none> -29.727
## Sepal.Length 1 -29.437 2.29011 0.1302
## Sepal.Width 1 -31.663 0.06406 0.8002
## Petal.Length 0 -29.727 0.00000
然后将结果保存到文件:
saveRDS(list(model = model, model2 = model2), file = "gamlss.rds")
然后 在新的 R 会话中 (R --vanilla
),执行:
> library("gamlss")
Loading required package: splines
Loading required package: gamlss.data
Loading required package: gamlss.dist
Loading required package: MASS
Loading required package: nlme
Loading required package: parallel
********** GAMLSS Version 5.0-1 **********
For more on GAMLSS look at http://www.gamlss.org/
Type gamlssNews() to see new features/changes/bug fixes.
> gamlss <- readRDS("gamlss.rds")
> model <- gamlss$model
> class(model)
[1] "gamlss" "gam" "glm" "lm"
> model2 <- dropterm(model, test = "Chisq")
Model with term Sepal.Length has failed
Model with term Sepal.Width has failed
Model with term Petal.Length has failed
> print(model2)
Single term deletions for
mu
Model:
Petal.Length ~ Sepal.Length + Sepal.Width + Petal.Length
Df AIC LRT Pr(Chi)
<none> -29.727
Sepal.Length
Sepal.Width
Petal.Length
比较 model2
在新 R 会话中与上面第一个会话的输出;
> all.equal(model2, gamlss$model2)
[1] "Component “Df”: 'is.NA' value mismatch: 1 in current 4 in target"
[2] "Component “AIC”: 'is.NA' value mismatch: 0 in current 3 in target"
[3] "Component “LRT”: 'is.NA' value mismatch: 1 in current 4 in target"
[4] "Component “Pr(Chi)”: 'is.NA' value mismatch: 2 in current 4 in target"
这里显然有些地方不正确。
我怀疑 model
对象包含一个或多个所谓的 promises 在转移到另一个 R 进程时没有正确保存(就像当你使用 SNOW 集群)。
我认为这是 gamlss 包本身的问题。问题似乎是 gamlss 对象无法 序列化 。我建议您将此报告给软件包维护者。请随意在您的报告中使用我的最小示例。