## Source: chap4.txt in ## http://www.stat.colostate.edu/computationalstatistics/datasets/computationalstatistics.zip # BASIC EM ALGORITHM # Example 4.2 on page 99 in Givens and Hoeting, 2nd edition # # Key idea: use the EM algorithm to estimate the parameters in the # peppered moth example described on page 99. #The observed data (phenotype counts for each variety) nc=85 #Number of observed carbonaria phenotype ni=196 #number of observed insularia phenotype nt=341 #Number of observed typica phenotype n=nc+ni+nt #total sample size #Initialize the parameter estimate vectors #One vector for each phenotype pct=rep(0,20) pit=rep(0,20) ptt=rep(0,20) pct[1]=1/3 #Initial estimates of the parameters pit[1]=1/3 ptt[1]=1/3 #This for loop carries out EM for the peppered moth example. for (t in 2:20) { #Equations (4.5)-(4.9) denom1=(pct[t-1]^2+2*pct[t-1]*pit[t-1]+2*pct[t-1]*ptt[t-1]) ncct=nc*pct[t-1]^2/denom1 ncit=2*nc*pct[t-1]*pit[t-1]/denom1 nctt=nc-ncct-ncit niit=ni*pit[t-1]^2/(pit[t-1]^2+2*pit[t-1]*ptt[t-1]) nitt=ni-niit nttt=nt #Equations (4.13-4.15) pct[t]=(2*ncct+ncit+nctt)/(2*n) pit[t]=(2*niit+ncit+nitt)/(2*n) ptt[t]=(2*nttt+nctt+nitt)/(2*n) } #Examine the output cbind(pct,pit,ptt) #equation (4.16) on page 101 rcc=sqrt( (diff(pct)^2+diff(pit)^2)/(pct[-20]^2+pit[-20]^2) ) rcc=c(0,rcc) #adjusts the length to make the table below #convergence diagnostics defined below (4.16) on page 101 d1=(pct[-1]-pct[20])/(pct[-20]-pct[20]) d1=c(d1,0) d2=(pit[-1]-pit[20])/(pit[-20]-pit[20]) d2=c(d2,0) #Table like Table 4.1 on page 102 print(cbind(pct,pit,rcc,d1,d2)[1:9,],digits=5) round(c(pct[9], pit[9], 1-pct[9]-pit[9]),3) # [1] 0.071 0.189 0.740