拟合 beta 回归的问题
issue with fitting beta regression
我正在尝试使用 R 模拟我的响应 'crop coverage [%]' ~ 杂草覆盖率 [%] + 土壤水分 [%] 之间的关系。由于我处理的是比例,所以我选择进行 beta 回归。
有人告诉我,为了更好地拟合和可视化模型,使用 weed_coverage 的平均值是个好主意。但是,当我这样做时,出现以下错误:
betareg (crop_coverage ~ soil_moisture + weed_coverage_mean, data = df) -> model_a
Error in optim(par = start, fn = loglikfun, gr = gradfun, method = method, : non-finite value supplied by optim
为什么会出现此错误?因为 weed_coverage 和 soil_moisture 都是连续变量,所以这真的是拟合和可视化该模型的最佳方式吗?非常感谢。
我的数据:
df <- structure(list(date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("2021-03-17",
"2021-04-07", "2021-04-13", "2021-04-27", "2021-05-11", "2021-05-27"
), class = "factor"), weed_coverage = c(0, 0, 0, 1.7, 1, 5, 0,
0, 0.1, 0.2, 1, 2.8, 2.5, 1, 1, 5, 0, 0, 0.9, 0.7, 0, 1.1, 0.5,
0.5, 0, 0, 0.5, 4, 0, 0.3, 0.8, 4, 1, 2, 2, 6, 0.2, 5, 0, 0,
3, 1, 0, 2, 0, 0, 0, 3, 3, 0), soil_moisture = c(36.28, 37.6,
38.55, 34.38, 34.02, 34.88, 34.92, 38.12, 35.38, 36.92, 27.15,
24.95, 21.38, 22.95, 27.65, 25.7, 27.02, 32.1, 27.18, 26, 14.97,
15.25, 17.02, 16.12, 15.32, 14.3, 14.5, 12.45, 13.07, 15.4, 14.9,
12, 16.85, 17.15, 18.52, 10.68, 13.82, 9.5, 15.32, 10.97, 14.8,
17.05, 26.75, 14.8, 25.75, 19.18, 18.12, 14.22, 18.95, 24.38),
crop_coverage = c(0.38, 0.6, 0.75, 0.5, 0.4, 0.48, 0.74,
0.75, 0.27, 0.45, 0.65, 0.3, 0.4, 0.38, 0.45, 0.58, 0.48,
0.75, 0.58, 0.4, 0.9999, 0.7, 0.75, 0.7, 0.85, 0.78, 0.7,
0.91, 0.2, 0.6, 0.95, 0.85, 0.6, 0.7, 0.75, 0.9, 0.8, 0.85,
0.75, 0.96, 0.85, 0.85, 0.75, 0.73, 0.68, 0.7, 0.97, 0.7,
0.75, 0.74), weed_coverage_mean = c(1.256, 1.256, 1.256,
1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256,
1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256,
1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256,
1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256,
1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256,
1.256, 1.256)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-50L))
您应该用原始数据拟合模型,然后在预测值[=30=时使用non-focal预测变量的平均值] 在情节中使用。例如:
适合
library(betareg)
m <- betareg (crop_coverage ~ soil_moisture + weed_coverage, data = df)
构建预测框并预测
pframe <- with(df, data.frame(soil_moisture = seq(min(soil_moisture),
max(soil_moisture),
length.out = 50),
weed_coverage = mean(weed_coverage)))
pframe$crop_coverage <- predict(m, newdata = pframe, type = "response")
情节
plot(crop_coverage ~ soil_moisture, data = df)
with(pframe, lines(soil_moisture, crop_coverage))
如果你想使用 expand.grid()
生成一个预测框架来计算几个不同水平的杂草覆盖率的预测值,你可以做一些更有趣的事情(如果你打算这样做,你可能想要移动到 ggplot2
进行绘图。
我正在尝试使用 R 模拟我的响应 'crop coverage [%]' ~ 杂草覆盖率 [%] + 土壤水分 [%] 之间的关系。由于我处理的是比例,所以我选择进行 beta 回归。 有人告诉我,为了更好地拟合和可视化模型,使用 weed_coverage 的平均值是个好主意。但是,当我这样做时,出现以下错误:
betareg (crop_coverage ~ soil_moisture + weed_coverage_mean, data = df) -> model_a
Error in optim(par = start, fn = loglikfun, gr = gradfun, method = method, : non-finite value supplied by optim
为什么会出现此错误?因为 weed_coverage 和 soil_moisture 都是连续变量,所以这真的是拟合和可视化该模型的最佳方式吗?非常感谢。
我的数据:
df <- structure(list(date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("2021-03-17",
"2021-04-07", "2021-04-13", "2021-04-27", "2021-05-11", "2021-05-27"
), class = "factor"), weed_coverage = c(0, 0, 0, 1.7, 1, 5, 0,
0, 0.1, 0.2, 1, 2.8, 2.5, 1, 1, 5, 0, 0, 0.9, 0.7, 0, 1.1, 0.5,
0.5, 0, 0, 0.5, 4, 0, 0.3, 0.8, 4, 1, 2, 2, 6, 0.2, 5, 0, 0,
3, 1, 0, 2, 0, 0, 0, 3, 3, 0), soil_moisture = c(36.28, 37.6,
38.55, 34.38, 34.02, 34.88, 34.92, 38.12, 35.38, 36.92, 27.15,
24.95, 21.38, 22.95, 27.65, 25.7, 27.02, 32.1, 27.18, 26, 14.97,
15.25, 17.02, 16.12, 15.32, 14.3, 14.5, 12.45, 13.07, 15.4, 14.9,
12, 16.85, 17.15, 18.52, 10.68, 13.82, 9.5, 15.32, 10.97, 14.8,
17.05, 26.75, 14.8, 25.75, 19.18, 18.12, 14.22, 18.95, 24.38),
crop_coverage = c(0.38, 0.6, 0.75, 0.5, 0.4, 0.48, 0.74,
0.75, 0.27, 0.45, 0.65, 0.3, 0.4, 0.38, 0.45, 0.58, 0.48,
0.75, 0.58, 0.4, 0.9999, 0.7, 0.75, 0.7, 0.85, 0.78, 0.7,
0.91, 0.2, 0.6, 0.95, 0.85, 0.6, 0.7, 0.75, 0.9, 0.8, 0.85,
0.75, 0.96, 0.85, 0.85, 0.75, 0.73, 0.68, 0.7, 0.97, 0.7,
0.75, 0.74), weed_coverage_mean = c(1.256, 1.256, 1.256,
1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256,
1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256,
1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256,
1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256,
1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256, 1.256,
1.256, 1.256)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-50L))
您应该用原始数据拟合模型,然后在预测值[=30=时使用non-focal预测变量的平均值] 在情节中使用。例如:
适合
library(betareg)
m <- betareg (crop_coverage ~ soil_moisture + weed_coverage, data = df)
构建预测框并预测
pframe <- with(df, data.frame(soil_moisture = seq(min(soil_moisture),
max(soil_moisture),
length.out = 50),
weed_coverage = mean(weed_coverage)))
pframe$crop_coverage <- predict(m, newdata = pframe, type = "response")
情节
plot(crop_coverage ~ soil_moisture, data = df)
with(pframe, lines(soil_moisture, crop_coverage))
如果你想使用 expand.grid()
生成一个预测框架来计算几个不同水平的杂草覆盖率的预测值,你可以做一些更有趣的事情(如果你打算这样做,你可能想要移动到 ggplot2
进行绘图。