R Multiple/Logistic 三变量回归，统计检验？

Question

我需要一些关于统计测试代码的帮助。基本上，我试图研究年龄、政治地位和对大麻合法化的看法之间的关系。数据集是 2010 年英国社会态度调查。

bsa_2010 <- read.csv("https://dl.dropboxusercontent.com/s/ubl9huokroj9jw8/bsa%202010.csv")
> dput(head(bsa_2010))
structure(list(Country = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("England", 
"Scotland", "Wales"), class = "factor"), RSex = structure(c(1L, 
1L, 2L, 2L, 2L, 1L), .Label = c("Female", "Male"), class = "factor"), 
    RAge = c(75L, 34L, 81L, 25L, 33L, 76L), MarStat = structure(c(4L, 
    4L, 2L, 3L, 3L, 5L), .Label = c("Living as married", "Married", 
    "Not married", "Separated or divorced after marrying", "Widowed"
    ), class = "factor"), ChildHh = structure(c(1L, 1L, 1L, 1L, 
    1L, 1L), .Label = c("No", "Yes"), class = "factor"), WhPaper = structure(c(8L, 
    8L, 8L, 11L, 12L, 8L), .Label = c("(Scottish) Daily Express", 
    "(Scottish) Daily Mail", "Daily Mirror/ Scottish Mirror", 
    "Daily Record", "Daily Star", "Daily Telegraph", "Financial Times", 
    "Skip,not read paper normally", "The Guardian", "The Independent", 
    "The Sun/ Scottish Sun", "The Times"), class = "factor"), 
    PartyIDN = structure(c(2L, 7L, 2L, 6L, 2L, 6L), .Label = c("British National Party (BNP)/ National Front", 
    "Conservative", "Don't know", "Green Party", "Labour", "Liberal Democrat", 
    "None", "Other answer (WRITE IN)", "Other party (WRITE IN)", 
    "Plaid Cymru", "Refused to say", "Scottish National Party", 
    "UK Independence Party (UKIP)/Veritas"), class = "factor"), 
    Partyid1 = structure(c(2L, 7L, 2L, 6L, 2L, 6L), .Label = c("British National Party (BNP)/ National Front", 
    "Conservative", "Don't know", "Green Party", "Labour", "Liberal Democrat", 
    "None", "Other answer", "Other party", "Plaid Cymru", "Refusal", 
    "Scottish National Party", "UK Independence Party (UKIP)/Veritas"
    ), class = "factor"), PartyId2 = structure(c(1L, 5L, 1L, 
    4L, 1L, 4L), .Label = c("Conservative", "Green Party", "Labour", 
    "Liberal Democrat", "None", "Other party", "Other/DK/Ref"
    ), class = "factor"), Spend1 = structure(c(3L, 4L, 4L, 3L, 
    3L, 4L), .Label = c("(None of these)", "Defence", "Education", 
    "Health", "Help for industry", "Housing", "Overseas aid", 
    "Police and prisons", "Public transport", "Roads", "Social security benefits"
    ), class = "factor"), Spend2 = structure(c(6L, 3L, 2L, 4L, 
    9L, 10L), .Label = c("(None of these)", "Defence", "Education", 
    "Health", "Help for industry", "Housing", "Overseas aid", 
    "Police and prisons", "Public transport", "Roads", "Skip,no 1st priority", 
    "Social security benefits"), class = "factor"), RClassGp = structure(c(4L, 
    6L, 1L, 2L, 4L, 6L), .Label = c("Employers in small org; own account workers", 
    "Intermediate occupations", "Lower supervisory & technical occupations", 
    "Managerial & professional occups", "Not classifiable", "Semi-routine & routine occupations", 
    "Skip, never had a job+DK+NA last job"), class = "factor"), 
    RNSSECG = structure(c(4L, 8L, 9L, 3L, 4L, 8L), .Label = c("1.1", 
    "1.2", "Intermediate occupations", "Lower managerial and professional occupations", 
    "Lower supervisory & technical occupations", "Not classified", 
    "Routine occupations", "Semi-routine Occupations", "Small employers and own account workers"
    ), class = "factor"), CanLegal = structure(c(1L, 1L, 1L, 
    2L, 2L, 1L), .Label = c("Taking cannabis should remain illegal", 
    "should be legal, only licenced shops"), class = "factor"), 
    RaceOri3 = structure(c(10L, 10L, 10L, 10L, 10L, 10L), .Label = c("ASIAN: of Bangladeshi origin", 
    "ASIAN: of Chinese origin", "ASIAN: of Indian origin", "ASIAN: of Pakistani origin", 
    "ASIAN: of other origin (WRITE IN)", "BLACK: of African origin", 
    "BLACK: of Caribbean origin", "MIXED ORIGIN (WRITE IN)", 
    "OTHER (WRITE IN)", "WHITE: of any origin"), class = "factor"), 
    Agecat1 = structure(c(6L, 2L, 7L, 1L, 2L, 6L), .Label = c("(18,28]", 
    "(28,38]", "(38,48]", "(48,58]", "(58,68]", "(68,78]", "(78,88]", 
    "(88,98]"), class = "factor"), Agecat2 = structure(c(3L, 
    1L, 4L, 1L, 1L, 3L), .Label = c("(18,38]", "(38,58]", "(58,78]", 
    "(78,98]"), class = "factor")), .Names = c("Country", "RSex", 
"RAge", "MarStat", "ChildHh", "WhPaper", "PartyIDN", "Partyid1", 
"PartyId2", "Spend1", "Spend2", "RClassGp", "RNSSECG", "CanLegal", 
"RaceOri3", "Agecat1", "Agecat2"), row.names = c(NA, 6L), class = "data.frame")

使用的变量是：愤怒（年龄） PartyIDN（确定的政党） CanLegal（大麻合法化）

为简化起见，我对年龄进行了分类，只保留了两个最大的政党。

bsa_2010$Agecat1 <- cut(bsa_2010$RAge, c(18,28,38,48,58,68,78,88,98))

Parties <- subset(bsa_2010, PartyIDN == "Conservative" | PartyIDN == "Labour")


Parties$PartyIDN <- factor(Parties$PartyIDN)

我将如何进行统计测试以获得年龄、政治与他们如何看待大麻合法化之间关系的 P 值？

欢迎任何帮助，谢谢！

Answer 1

看来您还需要一些基础知识方面的帮助，所以我会 here https://stats.idre.ucla.edu/r/dae/logit-regression/ 了解统计部分，但 R 部分很简单（我将把您的新年龄类别视为一个因素，而不是一个连续的因素可变）...

bsa_2010$Agecat1 <- cut(bsa_2010$RAge, c(18,28,38,48,58,68,78,88,98))
Parties <- subset(bsa_2010, PartyIDN == "Conservative" | PartyIDN == "Labour")
Parties$PartyIDN <- factor(Parties$PartyIDN)
str(Parties)
Parties$Agecat1<-factor(Parties$Agecat1)
firstattempt <- glm(CanLegal ~ Agecat1 + PartyIDN, data = Parties, family = "binomial")
summary(firstattempt)

你在年龄上的削减是值得的，但这无助于使 R 命令或统计数据变得更容易。您可以轻松完成：

secondattempt <- glm(CanLegal ~ RAge + PartyIDN, data = Parties, family = "binomial")
summary(secondattempt)

它实际上使解释数据更容易，这支持了年龄对结果很重要但政党不重要的假设。你可以通过这些非常简单的图表看到

mosaicplot(xtabs(~CanLegal + PartyIDN, data = Parties))
mosaicplot(xtabs(~CanLegal + Agecat1, data = Parties))

R Multiple/Logistic 三变量回归，统计检验？

R Multiple/Logistic Regression with 3 Variables,Statistical Testing?

regression

r

statistical-test

logistic-regression