GLM 模型总结问题

GLM Model Summary Problems

我 运行 R 中的以下 GLM 模型,汇总结果没有意义。 P 值相同且系数非常大。有点不对劲,但我不知道具体是什么。

mod.glm <- glm(factor(CloseWon) ~ days_to_close + factor(state) + number_of_times_contacted + number_of_sales_activities + factor(original_source) + average_pageviews + marketing_emails_delivered + sends_since_last_engagement, data = HSBI_train, family = binomial('logit'))

summary(mod)

下面是我输出的图像 link, 谢谢 [1]: https://i.stack.imgur.com/A5jsQ.jpg

按要求输入:

dput(HSBI)
structure(list(days_to_close = c(143L, 0L, 264L, 0L, 138L, 0L, 
117L, 48L, 258L, 0L, 59L, 125L, 204L, 260L, 0L, 210L, 0L, 119L, 
0L, 286L, 29L, 0L, 56L, 0L, 0L, 92L, 92L, 94L, 38L, 223L, 284L, 
0L, 289L, 278L, 128L, 52L, 137L, 0L, 256L, 0L, 119L, 175L, 225L, 
118L, 161L, 129L, 94L, 0L, 33L, 0L, 0L, 26L, 0L, 0L, 0L, 0L, 
163L, 112L, 0L, 0L, 0L, 6L, 0L, 0L, 8L, 0L, 0L, 251L, 0L, 0L, 
100L, 0L, 118L, 126L, 65L, 0L, 116L, 120L, 0L, 115L, 20L, 40L, 
0L, 77L, 0L, 0L, 0L, 0L, 0L, 0L, 184L, 0L, 268L, 0L, 49L, 128L, 
0L, 129L, 240L, 0L, 164L, 0L, 73L, 0L, 0L, 200L, 0L, 22L, 0L, 
0L, 0L, 0L, 0L, 268L, 20L, 0L, 0L, 31L, 99L, 0L, 0L, 0L, 0L, 
263L, 0L, 0L, 265L, 0L, 280L, 174L, 267L, 0L, 0L, 260L, 0L, 0L, 
0L, 219L, 0L, 0L, 292L, 0L, 259L, 0L, 0L, 114L, 127L, 127L, 0L, 
41L, 0L, 251L, 281L, 0L, 226L, 277L, 268L, 0L, 219L, 0L, 97L, 
0L, 0L, 0L, 218L, 0L, 98L, 64L, 0L, 0L, 101L, 0L, 0L, 0L, 0L, 
0L, 165L, 0L, 0L, 0L, 0L, 76L, 48L, 233L, 0L, 0L, 0L, 0L, 0L, 
0L, 107L, 189L, 0L, 94L, 19L, 223L, 128L, 0L, 0L, 0L, 106L, 246L, 
0L, 0L, 118L, 168L, 160L, 0L, 225L, 231L, 222L, 0L, 0L, 122L, 
0L, 37L, 236L, 246L, 0L, 16L, 0L, 70L, 0L, 123L, 264L, 0L, 0L, 
0L, 0L, 264L, 0L, 41L, 296L, 124L, 198L, 0L, 0L, 0L, 58L, 156L, 
166L, 274L, 0L, 88L, 2L, 0L, 124L, 0L, 80L, 41L, 278L, 0L, 0L, 
252L, 0L, 80L, 0L, 0L), state = c("", "fl", "fl", "sj", "ga", 
"nc", "ga", "in", "ga", "ca", "va", "ca", "va", "tn", "co", "tx", 
"fl", "tn", "ca", "tn", "in", "ga", "il", "nj", "ca", "ga", "ga", 
"", "", "ga", "ga", "fl", "ga", "nc", "ga", "tx", "", "ga", "ga", 
"dc", "ny", "tn", "fl", "va", "ga", "al", "ca", "nv", "ca", "ga", 
"sc", "va", "ga", "oh", "ga", "fl", "la", "tn", "ny", "fl", "ca", 
"ca", "wa", "ny", "il", "ga", "ca", "fl", "fl", "al", "al", "fl", 
"al", "tn", "sc", "", "fl", "ga", "az", "fl", "ga", "", "ca", 
"ga", "ga", "oh", "ga", "al", "ga", "", "tx", "sc", "ga", "ny", 
"nc", "tn", "co", "oh", "al", "tx", "", "co", "ne", "ny", "fl", 
"oh", "ga", "ia", "va", "fl", "sc", "ca", "tn", "ga", "co", "ok", 
"ga", "al", "tx", "", "fl", "md", "ga", "al", "tn", "wa", "al", 
"oh", "ga", "ga", "", "pa", "oh", "al", "ny", "az", "tn", "oh", 
"ga", "tx", "ga", "tx", "tn", "va", "fl", "ga", "ga", "fl", "ny", 
"fl", "az", "ga", "ga", "tn", "ga", "", "ga", "pa", "fl", "tn", 
"al", "ga", "al", "ga", "ms", "vt", "ca", "fl", "fl", "ky", "oh", 
"wa", "fl", "wa", "ga", "az", "il", "al", "nc", "al", "nj", "tx", 
"tx", "fl", "la", "ga", "nc", "", "ga", "al", "tx", "oh", "fl", 
"tn", "tn", "fl", "ga", "ca", "", "ca", "fl", "ga", "fl", "dc", 
"md", "fl", "tx", "oh", "tx", "", "al", "tx", "fl", "tn", "tx", 
"ny", "ny", "tn", "az", "ga", "al", "tx", "sc", "tn", "tn", "ca", 
"al", "tn", "fl", "al", "tn", "ga", "ga", "ga", "fl", "pa", "tx", 
"co", "sc", "tx", "tx", "ga", "ny", "ma", "ny", "fl", "fl", "ca", 
"tn", "nv", "ga", "ga", "tx", "", "or", "", "fl", "il"), number_of_times_contacted = c(7L, 
5L, 7L, 2L, 40L, 4L, 6L, 6L, 5L, 1L, 3L, 5L, 8L, 8L, 1L, 10L, 
4L, 9L, 3L, 10L, 6L, 4L, 7L, 7L, 2L, 9L, 1L, 5L, 3L, 9L, 11L, 
4L, 8L, 10L, 10L, 6L, 10L, 3L, 10L, 12L, 5L, 7L, 8L, 5L, 31L, 
10L, 6L, 1L, 5L, 20L, 15L, 7L, 3L, 3L, 6L, 6L, 6L, 7L, 2L, 3L, 
1L, 2L, 1L, 19L, 2L, 3L, 1L, 10L, 5L, 3L, 7L, 2L, 8L, 9L, 3L, 
3L, 8L, 12L, 1L, 7L, 2L, 2L, 4L, 50L, 6L, 4L, 2L, 3L, 1L, 9L, 
7L, 3L, 14L, 1L, 3L, 5L, 7L, 4L, 8L, 4L, 6L, 2L, 4L, 7L, 5L, 
7L, 7L, 6L, 5L, 6L, 4L, 1L, 5L, 8L, 2L, 1L, 6L, 3L, 4L, 4L, 4L, 
1L, 6L, 8L, 4L, 3L, 8L, 11L, 10L, 6L, 8L, 5L, 8L, 6L, 4L, 2L, 
10L, 8L, 6L, 8L, 8L, 8L, 8L, 5L, 6L, 4L, 5L, 9L, 12L, 4L, 1L, 
11L, 12L, 7L, 9L, 14L, 8L, 3L, 7L, 17L, 8L, 4L, 2L, 33L, 9L, 
1L, 4L, 8L, 6L, NA, 7L, 3L, 9L, 2L, 5L, 8L, 6L, 6L, 4L, 10L, 
3L, 4L, 3L, 12L, 24L, 9L, 3L, 11L, 3L, 19L, 3L, 7L, 4L, 9L, 6L, 
7L, 10L, 1L, 1L, 1L, 4L, 10L, 1L, 3L, 5L, 7L, 6L, 2L, 7L, 9L, 
8L, 10L, 6L, 8L, 2L, 3L, 8L, 9L, 1L, 3L, 13L, 10L, 5L, 9L, 8L, 
1L, 6L, 12L, 4L, 11L, 10L, 4L, 10L, 10L, 7L, 6L, 3L, 4L, 3L, 
6L, 6L, 10L, 4L, 9L, 2L, 21L, 9L, 1L, 4L, 3L, 21L, 8L, 5L, 10L, 
3L, 8L, 8L, 7L), number_of_sales_activities = c(8L, 5L, 7L, 2L, 
61L, 4L, 7L, 6L, 5L, 1L, 3L, 5L, 8L, 8L, 1L, 10L, 4L, 9L, 3L, 
10L, 6L, 4L, 7L, 9L, 2L, 9L, 2L, 5L, 3L, 9L, 11L, 4L, 8L, 10L, 
10L, 6L, 14L, 3L, 10L, 14L, 5L, 8L, 8L, 5L, 43L, 10L, 7L, 1L, 
5L, 22L, 21L, 7L, 3L, 3L, 7L, 6L, 6L, 7L, 2L, 3L, 1L, 2L, 1L, 
28L, 2L, 3L, 1L, 10L, 5L, 4L, 7L, 2L, 8L, 9L, 3L, 3L, 8L, 12L, 
1L, 7L, 2L, 2L, 4L, 98L, 6L, 6L, 2L, 3L, 1L, 9L, 7L, 4L, 17L, 
2L, 3L, 5L, 7L, 4L, 8L, 4L, 7L, 2L, 5L, 8L, 5L, 7L, 8L, 6L, 5L, 
6L, 4L, 1L, 5L, 8L, 2L, 1L, 6L, 3L, 4L, 4L, 4L, 1L, 6L, 9L, 4L, 
3L, 8L, 16L, 10L, 6L, 8L, 5L, 9L, 6L, 4L, 2L, 10L, 8L, 7L, 8L, 
8L, 8L, 8L, 5L, 6L, 4L, 5L, 9L, 15L, 4L, 1L, 11L, 12L, 9L, 10L, 
21L, 8L, 4L, 7L, 21L, 8L, 4L, 2L, 61L, 9L, 1L, 4L, 8L, 7L, NA, 
7L, 3L, 9L, 2L, 5L, 9L, 6L, 6L, 4L, 12L, 3L, 5L, 3L, 12L, 35L, 
17L, 3L, 12L, 3L, 28L, 3L, 8L, 4L, 9L, 6L, 7L, 10L, 1L, 1L, 1L, 
4L, 10L, 1L, 3L, 5L, 7L, 6L, 2L, 7L, 10L, 8L, 10L, 6L, 8L, 3L, 
3L, 8L, 9L, 1L, 3L, 20L, 10L, 7L, 9L, 8L, 1L, 6L, 12L, 4L, 12L, 
10L, 5L, 10L, 11L, 7L, 7L, 3L, 4L, 3L, 6L, 6L, 11L, 4L, 9L, 2L, 
26L, 9L, 1L, 4L, 3L, 39L, 8L, 6L, 10L, 3L, 8L, 8L, 7L), original_source = c("Direct Traffic", 
"Paid Social", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Organic Search", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Paid Social", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Paid Social", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Paid Social", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Paid Social", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Paid Social", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Paid Social", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Paid Social", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Paid Social", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Paid Social", "Paid Social", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Paid Social", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Paid Social", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Paid Social", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Organic Search", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Organic Social", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Paid Social", "Paid Social", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Paid Social", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Paid Social", "Direct Traffic", 
"Paid Social", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Paid Social", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Paid Social", "Paid Social", "Direct Traffic", "Direct Traffic", 
"Paid Social", "Paid Social", "Direct Traffic", "Direct Traffic", 
"Organic Search", "Direct Traffic", "Direct Traffic", "Direct Traffic", 
"Direct Traffic"), average_pageviews = c(1L, 1L, 2L, 2L, 1L, 
2L, 1L, 2L, 7L, 2L, 2L, 1L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 
4L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 5L, 1L, 1L, 2L, 2L, 2L, 2L, 
2L, 3L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 3L, 2L, 
3L, 3L, 2L, 1L, 1L, 4L, 3L, 2L, 2L, 4L, 1L, 2L, 3L, 2L, 1L, 2L, 
5L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 5L, 6L, 1L, 1L, 4L, 
2L, 2L, 2L, 3L, 2L, 1L, 2L, 2L, 2L, 2L, 3L, 1L, 2L, 2L, 2L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 5L, 2L, 2L, 2L, 6L, 
3L, 2L, 2L, 2L, 2L, 1L, 2L, 5L, 1L, 2L, 3L, 2L, 2L, 3L, 1L, 3L, 
2L, 2L, 3L, 3L, 2L, 1L, 1L, 2L, 1L, 2L, 3L, 1L, 1L, 3L, 2L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 4L, 2L, 3L, 1L, 1L, 
1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 3L, 1L, 1L, 2L, 2L, 1L, 2L, 
3L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 3L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 4L, 
4L, 1L, 2L, 2L, 0L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 3L, 2L, 2L, 2L, 
4L, 1L, 3L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L), marketing_emails_delivered = c(15L, 
6L, 13L, 7L, 65L, 10L, 11L, 11L, 22L, 3L, 8L, 12L, 17L, 15L, 
2L, 18L, 9L, 11L, 9L, 20L, 10L, 12L, 11L, 14L, 8L, 11L, 11L, 
6L, 8L, 19L, 21L, 3L, 20L, 22L, 15L, 11L, 14L, 8L, 21L, 21L, 
11L, 14L, 15L, 11L, 47L, 16L, 10L, 9L, 8L, 22L, 14L, 10L, 8L, 
2L, 13L, 15L, 16L, 16L, 2L, 7L, 2L, 9L, 9L, 21L, 9L, 8L, 3L, 
21L, 15L, 2L, 11L, 2L, 11L, 16L, 8L, 3L, 16L, 17L, 2L, 16L, 8L, 
8L, 9L, 44L, 19L, 9L, 9L, 9L, 3L, 12L, 7L, 9L, 22L, 7L, 8L, 12L, 
12L, 12L, 21L, 5L, 16L, 2L, 3L, 18L, 15L, 15L, 21L, 10L, 9L, 
10L, 12L, 3L, 13L, 22L, 9L, 6L, 17L, 7L, 10L, 3L, 3L, 9L, 13L, 
22L, 14L, 9L, 22L, 8L, 23L, 13L, 20L, 9L, 10L, 22L, 9L, 9L, 18L, 
19L, 13L, 19L, 20L, 18L, 13L, 7L, 10L, 11L, 12L, 16L, 19L, 8L, 
3L, 13L, 24L, 15L, 28L, 24L, 22L, 4L, 19L, 31L, 15L, 9L, 2L, 
44L, 11L, 2L, 10L, 13L, 18L, 3L, 15L, 9L, 19L, 8L, 6L, 9L, 16L, 
10L, 5L, 19L, 2L, 9L, 8L, 20L, 14L, 8L, 9L, 21L, 7L, 21L, 11L, 
14L, 19L, 15L, 10L, 19L, 16L, 1L, 9L, 3L, 10L, 21L, 3L, 3L, 11L, 
15L, 16L, 2L, 11L, 21L, 11L, 22L, 16L, 16L, 9L, 8L, 21L, 21L, 
3L, 1L, 20L, 13L, 9L, 16L, 13L, 6L, 10L, 15L, 3L, 20L, 11L, 8L, 
21L, 16L, 14L, 8L, 9L, 15L, 8L, 16L, 9L, 23L, 9L, 15L, 2L, 22L, 
16L, 3L, 9L, 8L, 31L, 19L, 18L, 21L, 9L, 14L, 19L, 10L), CloseWon = c(1, 
0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 
1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 
1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 
1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 
0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 
0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 
0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 
0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 
0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 
1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 
0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 
1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 
1, 0, 1, 0, 0)), class = "data.frame", row.names = c(NA, -258L
))

您的回归器 days_to_close 有问题。请注意,在此简单回归中,拟合概率数值为 0 或 1 时出现错误。

> mod.glm <- glm(factor(CloseWon) ~ days_to_close, data=data,
+                family = binomial('logit'))
Warning messages:
1: glm.fit: algorithm did not converge 
2: glm.fit: fitted probabilities numerically 0 or 1 occurred 

要了解发生这种情况的原因,请注意

> table(data$days_to_close > 0, data$CloseWon)
       
          0   1
  FALSE 124   2
  TRUE    0 132

因此您会看到,无论其他回归变量获得什么值,只要 days_to_close 大于零,CloseWon 始终为 0。粗略地说,这意味着您要优化的函数获得 +oo 的值。优化算法在这一点上失败并最终将你的其他回归系数推到巨大的值。

有大量关于如何解决完美分离的论文。您可以开始,例如 here.