## More examples for GLM ## reference: http://statmath.wu.ac.at/courses/heather_turner/index.html ## Example 3: Ship Damage Data # concern a type of damage caused by waves to the forward section of cargo-carrying vessels # reference: P. McCullagh and J. A. Nelder, Generalized Linear Models. Chapman & Hall. library(MASS) data(ships) str(ships) #'data.frame': 40 obs. of 5 variables: # $ type : Factor w/ 5 levels "A","B","C","D",..: 1 1 1 1 1 1 1 1 2 2 ... # $ year : int 60 60 65 65 70 70 75 75 60 60 ... # $ period : int 60 75 60 75 60 75 60 75 60 75 ... # $ service : int 127 63 1095 1095 1512 3353 0 2244 44882 17176 ... # $ incidents: int 0 0 3 4 6 18 0 11 39 29 ... ?ships # incidents: number of damage incidents # service: aggregate months of service # period: period of operation : 1960-74, 75-79 # year: year of construction: 1960-64, 65-69, 70-74, 75-79 # type: type: '"A"' to '"E"' # exclude ships with 0 months of service ships2 <- subset(ships, service > 0) # convert the period and year variables to factors ships2$year <- as.factor(ships2$year) ships2$period <- as.factor(ships2$period) # consider a log-linear model including all the variables # "offset" means a term with a fixed coefficient of 1 glm1 <- glm(incidents ~ type + year + period, family = poisson(link = "log"), data = ships2, offset = log(service)) summary(glm1) par(mfrow=c(2,2)) plot(glm1) # use quasi-likelihood estimation to allow over-dispersion glm2 <- update(glm1, family = quasipoisson(link = "log")) summary(glm2) par(mfrow=c(2,2)) plot(glm2) # alternative R code for glm2 glm(formula = incidents ~ type + year + period, family = quasipoisson(link = "log"), data = ships2, offset = log(service)) #(Intercept) typeB typeC typeD typeE year65 # -6.40590 -0.54334 -0.68740 -0.07596 0.32558 0.69714 # year70 year75 period75 # 0.81843 0.45343 0.38447 # check the significance of predictors anova(glm2, test = "F") # Df Deviance Resid. Df Resid. Dev F Pr(>F) #NULL 33 146.328 #type 4 55.439 29 90.889 8.1961 0.0002289 *** #year 3 41.534 26 49.355 8.1871 0.0005777 *** #period 1 10.660 25 38.695 6.3039 0.0188808 * # check if damage is roughly proportional to service glm3 <- glm(formula = incidents ~ type + year + period + log(service), family = quasipoisson(link = "log"), data = ships2) summary(glm3) #log(service) 0.9027 0.1305 6.916 3.75e-07 ***