## More examples for GLM ## reference: http://statmath.wu.ac.at/courses/heather_turner/index.html ## Example 2: Budworm Data ## Reference: Collett (1991, Modelling binary data) # It's on the toxicity of the pyrethoid trans - cypermethrin to the tobacco budworm. # Batches of 20 moths of each sex were exposed to varying doses of the # pyrethoid for three days and the number knocked out in each batch was recorded. ## Data (out of 20): #Dose 1 2 4 8 16 32 #Male 1 4 9 13 18 20 #Female 0 2 6 10 12 16 ## a picture of Tobacco_budworm # http://en.wikipedia.org/wiki/File:Tobacco_budworm_2.jpg dose = c(1,2,4,8,16,32,1,2,4,8,16,32) dead <- c(1, 4, 9, 13, 18, 20, 0, 2, 6, 10, 12, 16) sex <- factor(rep(c("M", "F"), c(6, 6))) # pre-analysis pairs(cbind(dead,dose,sex)) plot(c(1, 32), c(0, 1), type = "n", xlab = "dose", ylab = "prob") text(dose, dead/20, labels = sex) ldose <- rep(0:5, 2) # log scale pairs(cbind(dead,ldose,sex)) plot(c(0, 5), c(0, 1), type = "n", xlab = "log(dose)", ylab = "prob") text(ldose, dead/20, labels = sex) ## models under consideration # (1) eta ~ ldose # (2) eta ~ ldose + sex # (3) eta ~ ldose + sex + ldose:sex # model 1 y <- cbind(dead, 20 - dead) fit1 <- glm(y ~ ldose, family = binomial) summary(fit1) par(mfrow=c(2,2)) plot(fit1) # model 2 fit2 <- glm(y ~ ldose + sex, family = binomial) summary(fit2) par(mfrow=c(2,2)) plot(fit2) #Coefficients: # Estimate Std. Error z value Pr(>|z|) #(Intercept) -3.4732 0.4685 -7.413 1.23e-13 *** #ldose 1.0642 0.1311 8.119 4.70e-16 *** #sexM 1.1007 0.3558 3.093 0.00198 ** # Null deviance: 124.8756 on 11 degrees of freedom #Residual deviance: 6.7571 on 9 degrees of freedom #AIC: 42.867 # model 3 fit3 <- glm(y ~ sex*ldose, family = binomial) summary(fit3) par(mfrow=c(2,2)) plot(fit3) anova(fit3, test = "Chisq") # model 2 (fit2) looks better ## Goodness-of-fit # D ~ Chisq_(n-p) # Residual deviance: 6.7571 on 9 degrees of freedom 1 - pchisq(q=6.7571, df=9) # p-value #[1] 0.662392 ## confidence interval stats:::confint.lm(fit2) # Wald confidence interval (symmetric) # 2.5 % 97.5 % #(Intercept) -4.5330217 -2.413289 #ldose 0.7676962 1.360732 #sexM 0.2958066 1.905680 confint(fit2) # Profile confidence interval (asymmetric) # 2.5 % 97.5 % #(Intercept) -4.4581438 -2.613610 #ldose 0.8228708 1.339058 #sexM 0.4192265 1.820471 ## prediction # empirical odds of death emp.logits <- log((dead + 0.5)/(20.5 - dead)) plot(c(1, 32), range(emp.logits), type = "n", xlab = "dose", ylab = "emp.logit", log = "x") text(dose, emp.logits, labels = sex) # plot fitted model on the logit scale par(mfrow=c(1,1)) plot(c(1, 32), range(emp.logits), type = "n", xlab = "dose", ylab = "emp.logit", log = "x") text(dose, emp.logits, labels = sex) lines(dose[sex == "M"], predict(fit2, type = "link")[sex == "M"], col = 3) lines(dose[sex == "M"], predict(fit2, type = "link")[sex == "F"], col = 2) # plot fitted model on the probability scale par(mfrow=c(1,1)) plot(c(1, 32), c(0, 1), type = "n", xlab = "dose", ylab = "prob", log = "x") text(dose, dead/20, labels = sex) ld <- seq(0, 5, 0.1) newdat <- data.frame(ldose = c(ld, ld), sex = gl(2, length(ld),labels = c("F", "M"))) lines(2^ld, predict(fit2, type = "response", newdat = subset(newdat, sex == "M")),col = 3) lines(2^ld, predict(fit2, type = "response", newdat = subset(newdat, sex == "F")),col = 2)