为什么分类变量的一个水平的自由度与lme中的其他水平不同？

Question

我正在做具有嵌套随机效应的混合模型。当我运行模型的摘要时，我看到变量 "treatment 1" 的级别 "control fishless" 的自由度数是 11，而其他四个级别的 df 是 51。我会期望他们都是一样的。这可能是什么原因造成的，我该如何解决这个问题？

谢谢！

我的模型：

library(nlme)
library(emmeans)

data.score.merge2$treatment1 <- relevel(data.score.merge2$treatment1,"control fish")
mod.full.PC1 <- lme(PC1 ~ season + elevation + treatment1, method = "REML",
                random = ~1|lake/replicate,
                data = data.score.merge2)
summary(mod.full.PC1)
anova(mod.full.PC1)
pairs(emmeans(mod.full.PC1, ~treatment1))

我的数据（我把它们都放了，因为可能是因为它们的分布或类似的原因）：

structure(list(sample = structure(1:102, .Label = c("Annette 1", 
"Annette 2", "Annette 3", "Annette 4", "Annette 5", "Annette 6", 
"Cobb1", "Cobb2", "Cobb3", "Cobb4", "Cobb5", "Cobb6", "Dog1", 
"Dog2", "Dog3", "Dog4", "Dog5", "Dog6", "Helen1", "Helen2", "Helen3", 
"Helen4", "Helen5", "Helen6", "Herbert 1", "Herbert 2", "Herbert 3", 
"Herbert 4", "Herbert 5", "Herbert 6", "Hidden 10 Months 1", 
"Hidden 10 Months 2", "Hidden 10 Months 3", "Hidden 10 Months 4", 
"Hidden 10 Months 5", "Hidden 10 Months 6", "Hidden 10 Months 7", 
"Hidden 10 Months 8", "Hidden After 1", "Hidden After 2", "Hidden After 3", 
"Hidden After 4", "Hidden After 5", "Hidden After 6", "Hidden After 7", 
"Hidden After 8", "Hidden Before 1", "Hidden Before 2", "Hidden Before 3", 
"Hidden Before 4", "Hidden Before 5", "Hidden Before 6", "Hidden Before 7", 
"Hidden Before 8", "Kootenay Pond 1", "Kootenay Pond 2", "Kootenay Pond 3", 
"Kootenay Pond 4", "Kootenay Pond 5", "Kootenay Pond 6", "Margaret1", 
"Margaret2", "Margaret3", "Margaret4", "Margaret5", "Margaret6", 
"McNair1", "McNair2", "McNair3", "McNair4", "McNair5", "McNair6", 
"Mud1", "Mud2", "Mud3", "Mud4", "Mud5", "Mud6", "Olive1", "Olive2", 
"Olive3", "Olive4", "Olive5", "Olive6", "Ross1", "Ross2", "Ross3", 
"Ross4", "Ross5", "Ross6", "Sentinel 1", "Sentinel 2", "Sentinel 3", 
"Sentinel 4", "Sentinel 5", "Sentinel 6", "Temple1", "Temple2", 
"Temple3", "Temple4", "Temple5", "Temple6"), class = "factor"), 
    replicate = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 
    3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 
    3L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 
    4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 
    3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 
    3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 
    3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 
    3L), season = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 
    1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 
    2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 
    1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 
    2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 
    1L, 2L, 2L, 2L), .Label = c("july", "Sept"), class = "factor"), 
    elevation = c(1898L, 1898L, 1898L, 1898L, 1898L, 1898L, 1260L, 
    1260L, 1260L, 1260L, 1260L, 1260L, 1185L, 1185L, 1185L, 1185L, 
    1185L, 1185L, 2400L, 2400L, 2400L, 2400L, 2400L, 2400L, 1506L, 
    1506L, 1506L, 1506L, 1506L, 1506L, 2400L, 2400L, 2400L, 2400L, 
    2400L, 2400L, 2400L, 2400L, 2400L, 2400L, 2400L, 2400L, 2400L, 
    2400L, 2400L, 2400L, 2400L, 2400L, 2400L, 2400L, 2400L, 2400L, 
    2400L, 2400L, 1128L, 1128L, 1128L, 1128L, 1128L, 1128L, 1808L, 
    1808L, 1808L, 1808L, 1808L, 1808L, 1532L, 1532L, 1532L, 1532L, 
    1532L, 1532L, 1600L, 1600L, 1600L, 1600L, 1600L, 1600L, 1470L, 
    1470L, 1470L, 1470L, 1470L, 1470L, 1735L, 1735L, 1735L, 1735L, 
    1735L, 1735L, 2274L, 2274L, 2274L, 2274L, 2274L, 2274L, 2207L, 
    2207L, 2207L, 2207L, 2207L, 2207L), lake = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
    3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 
    6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
    6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 
    8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 
    10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 
    12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 
    14L, 14L, 14L), .Label = c("Annette", "Cobb", "Dog", "Helen", 
    "Herbert", "Hidden", "Kootenay Pond", "Margaret", "McNair", 
    "Mud", "Olive", "Ross", "Sentinel", "Temple"), class = "factor"), 
    treatment1 = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 5L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 
    1L, 1L, 1L, 1L), .Label = c("control fish", "10 month after", 
    "2 weeks", "Before", "control fish less"), class = "factor"), 
    PC1 = c(0.44326796769495, 0.391805088607907, 0.366947929408647, 
    0.441091779859967, 0.403451309938093, 0.290713511918726, 
    -0.116054225460659, 0.0837383015520615, -0.138878110788659, 
    -0.0964488183669892, -0.00381046526130373, -0.158529727412508, 
    0.228810116000753, 0.116624496704548, -0.0690976721539107, 
    0.241173083526596, 0.149149975299026, 0.144601131921389, 
    -0.132779778461221, -0.128217883448178, -0.131033607355805, 
    -0.214176904428602, -0.181864972831349, -0.178177705570812, 
    0.172493720206089, 0.208305957239218, 0.20607376930861, 0.238450520739572, 
    0.102790731251281, 0.0160873782197574, 0.302705601248893, 
    0.425953281002366, 0.274172226495253, 0.280315453212515, 
    0.0798037386796905, 0.506141404170783, 0.262641034377766, 
    0.400158058034833, 0.0532097743994839, -0.132559135111449, 
    -0.208628889600582, -0.283005491924375, 0.00488503259743488, 
    0.0532097743994839, -0.200151311512991, -0.19723781894123, 
    -0.2861581962982, -0.192654352006073, -0.260656011895704, 
    -0.222547061478245, -0.198133890113568, -0.201428667334639, 
    -0.303429745739617, -0.25748877830387, 0.438220569263306, 
    0.50013316765322, 0.516180868603105, 0.457174900484235, 0.191265288575061, 
    0.403629612339766, -0.032327404790195, -0.266767528170862, 
    -0.165290371763968, -0.253115189605474, -0.2661674781074, 
    -0.246530476585813, 0.161873213488864, 0.230102615861032, 
    0.247427735270813, 0.49154515223575, 0.332658929465825, 0.375955962214077, 
    -0.164941207044441, -0.14560006272253, -0.120381478479053, 
    -0.229436843745494, -0.135108208826027, -0.240164909215447, 
    0.132065444955584, -0.0264627444494227, -0.00763873712457839, 
    0.109028111315133, 0.0757425066027548, 0.0983200921588997, 
    -0.301499561057097, -0.226818786095393, -0.169404319537732, 
    -0.233550579992025, -0.19970763507962, -0.239831967809433, 
    -0.309404682344026, -0.311166025270994, -0.295989491984986, 
    -0.31253624380418, -0.300847881797013, -0.18667072522063, 
    -0.301875992307235, -0.308143568566698, -0.299702156473456, 
    -0.314275297789266, -0.306728420509431, -0.238861120432659
    )), class = "data.frame", row.names = c(NA, -102L))

Answer 1

注意treatment1和lake的关系：

> with(data.score.merge2, table(lake, treatment1))
               treatment1
lake            control fish 10 month after 2 weeks Before control fish less
  Annette                  0              0       0      0                 6
  Cobb                     6              0       0      0                 0
  Dog                      6              0       0      0                 0
  Helen                    6              0       0      0                 0
  Herbert                  0              0       0      0                 6
  Hidden                   0              8       8      8                 0
  Kootenay Pond            0              0       0      0                 6
  Margaret                 6              0       0      0                 0
  McNair                   6              0       0      0                 0
  Mud                      6              0       0      0                 0
  Olive                    6              0       0      0                 0
  Ross                     6              0       0      0                 0
  Sentinel                 0              0       0      0                 6
  Temple                   6              0       0      0                 0

其中三种处理仅在 Hidden 个湖泊中出现，并且在一个以上的湖泊中没有观察到其他处理。因此 10 month after、2 weeks 和 Before 级别之间的比较是 within-lake 比较，所有其他比较都是 between-湖。我得到的结果是：

> pairs(emmeans(mod.full.PC1, ~treatment1))
 contrast                           estimate     SE df t.ratio p.value
 control fish - 10 month after       -0.6384 0.2210 84 -2.888  0.0385 
 control fish - 2 weeks              -0.2206 0.2209 84 -0.999  0.8552 
 control fish - Before               -0.0816 0.2210 84 -0.369  0.9960 
 control fish - control fish less    -0.2442 0.1142 11 -2.138  0.2710 
 10 month after - 2 weeks             0.4178 0.0470 84  8.885  <.0001 
 10 month after - Before              0.5568 0.0438 84 12.701  <.0001 
 10 month after - control fish less   0.3942 0.2314 11  1.704  0.4698 
 2 weeks - Before                     0.1390 0.0470 84  2.957  0.0321 
 2 weeks - control fish less         -0.0235 0.2313 11 -0.102  1.0000 
 Before - control fish less          -0.1626 0.2314 11 -0.703  0.9516 

Results are averaged over the levels of: season 
Degrees-of-freedom method: containment 
P value adjustment: tukey method for comparing a family of 5 estimates

[我得到 84 d.f。你说 51 的地方 -- 那是个谜。] 我也不明白为什么没有 11 d.f。与 control fish 进行比较。这些结果使用了包含方法。

我也用近似的 Satterthwaite 方法试过了：

> pairs(emmeans(mod.full.PC1, ~treatment1, mode = "appx"))
 contrast                           estimate     SE    df t.ratio p.value
 control fish - 10 month after       -0.6384 0.2210  9.88 -2.888  0.0938 
 control fish - 2 weeks              -0.2206 0.2209  9.87 -0.999  0.8502 
 control fish - Before               -0.0816 0.2210  9.88 -0.369  0.9954 
 control fish - control fish less    -0.2442 0.1142  9.98 -2.138  0.2769 
 10 month after - 2 weeks             0.4178 0.0470 84.79  8.885  <.0001 
 10 month after - Before              0.5568 0.0438 84.79 12.701  <.0001 
 10 month after - control fish less   0.3942 0.2314  9.89  1.704  0.4735 
 2 weeks - Before                     0.1390 0.0470 84.79  2.957  0.0320 
 2 weeks - control fish less         -0.0235 0.2313  9.88 -0.102  1.0000 
 Before - control fish less          -0.1626 0.2314  9.89 -0.703  0.9512 

Results are averaged over the levels of: season 
Degrees-of-freedom method: appx-satterthwaite 
P value adjustment: tukey method for comparing a family of 5 estimates

这些d.f。更有意义，在 Hidden Lake 的三种治疗方法中明显更高仅。我只能猜测 d.f 的收容方法中存在错误。与 control fish 是参考水平这一事实有关。我会调查的。

附录

有关遏制的更多信息...如果我更改对比（即虚拟变量的构造方式），可能会影响 d.f。来自收容方法：

> mfp = update(mod.full.PC1, contrasts = list(treatment1 = "contr.sum"))
> pairs(emmeans(mfp, ~treatment1, mode = "cont"))
 contrast                           estimate     SE df t.ratio p.value
 control fish - 10 month after       -0.6384 0.2210 11 -2.888  0.0874 
 control fish - 2 weeks              -0.2206 0.2209 11 -0.999  0.8507 
 control fish - Before               -0.0816 0.2210 11 -0.369  0.9954 
 control fish - control fish less    -0.2442 0.1142 11 -2.138  0.2710 
 10 month after - 2 weeks             0.4178 0.0470 84  8.885  <.0001 
 10 month after - Before              0.5568 0.0438 84 12.701  <.0001 
 10 month after - control fish less   0.3942 0.2314 11  1.704  0.4698 
 2 weeks - Before                     0.1390 0.0470 84  2.957  0.0321 
 2 weeks - control fish less         -0.0235 0.2313 11 -0.102  1.0000 
 Before - control fish less          -0.1626 0.2314 11 -0.703  0.9516 

Results are averaged over the levels of: season 
Degrees-of-freedom method: containment 
P value adjustment: tukey method for comparing a family of 5 estimates

请注意，只有 3 个比较具有更大的 d.f，更符合我的直觉和 Satterthwaite 的结果。我现在的猜测是，这与一个因素的参考水平有关。使用默认的 "contr.treatment" 对比，参考水平不涉及与相关因素相关的任何预测变量，这会弄乱 d.f。与参考水平比较的计算。我推测此错误无法修复，解决方法是在包含 d.f 时使用 "contr.treatment" 以外的对比。用来。但我会进一步调查。

为什么分类变量的一个水平的自由度与lme中的其他水平不同？

Why does the degre of freedom of one level of a cathegoric variable is different from the other levels in lme?

r

mixed-models

nlme

附录