## Source: chap4.txt (with corrections) in ## http://www.stat.colostate.edu/computationalstatistics/datasets/computationalstatistics.zip #__________________________________________________________________ #__________________________________________________________________ # SEM ALGORITHM # Example 4.6 on page 109 in Givens and Hoeting, 2nd edition # # Key idea: use the SEM algorithm to estimate the standard errors for # the parameters estimated above in the "BASIC EM" section. #First, run the code in the basic EM algorithm in "4_2.R". #Then we run SEM as follows. #Starting values for SEM (see description just above equation 4.44) #Choosing the parameter values at the 20th iteration of EM. pcthat=pct[20] pithat=pit[20] ptthat=ptt[20] #Initialize the parameter estimate vectors, one vector for each phenotype sempct=rep(0,20) sempit=rep(0,20) semptt=rep(0,20) sempct[1]=0.07 #Initial SEM estimates of the parameters sempit[1]=0.19 semptt[1]=0.74 rij=array(0,c(2,2,20)) #This for loop implements the SEM algorithm for the peppered moth #example. #Below t is the number of SEM iterations for (t in 2:20) { cat(t,sempct[t-1],sempit[t-1],semptt[t-1]) cat("\n") print(rij[,,t-1]) cat("\n") #Take standard EM step (see code above for detailed description) denom1=(sempct[t-1]^2+2*sempct[t-1]*sempit[t-1]+2*sempct[t-1]*semptt[t-1]) ncct=nc*sempct[t-1]^2/denom1 ncit=2*nc*sempct[t-1]*sempit[t-1]/denom1 nctt=nc-ncct-ncit niit=ni*sempit[t-1]^2/(sempit[t-1]^2+2*sempit[t-1]*semptt[t-1]) nitt=ni-niit nttt=nt sempct[t]=(2*ncct+ncit+nctt)/(2*n) sempit[t]=(2*niit+ncit+nitt)/(2*n) semptt[t]=(2*nttt+nctt+nitt)/(2*n) #SEM #loop over each parameter for (j in 1:2) { #start at estimates from the the 20th iteration of EM sempj=c(pcthat,pithat,ptthat) #replace the jth element of sempj with the most recent EM estimate sempj[j]=c(sempct[t],sempit[t],semptt[t])[j] sempj[3]=1-sempj[1]-sempj[2] #Take one EM step for sempj #Equations (4.5)-(4.9) denom1=(sempj[1]^2+2*sempj[1]*sempj[2]+2*sempj[1]*sempj[3]) ncct=nc*sempj[1]^2/denom1 ncit=2*nc*sempj[1]*sempj[2]/denom1 nctt=nc-ncct-ncit niit=ni*sempj[2]^2/(sempj[2]^2+2*sempj[2]*sempj[3]) nitt=ni-niit nttt=nt nextstep=c((2*ncct+ncit+nctt)/(2*n),(2*niit+ncit+nitt)/(2*n)) # Calculate rij. This is (4.44) on page 109 rij[,j,t]=(nextstep-c(pcthat,pithat))/ (sempj[j]-c(pcthat,pithat,ptthat)[j]) } } #Note that the algorithm becomes unstable on 8th iteration #Below is the output for iteration 7 #EM after 20 iterations #> cbind(pct,pit,ptt)[20,] # pct pit ptt #0.07083691 0.18873652 0.74042657 #SEM after 7 iterations (the way the indices work, this is index 6 #> cbind(sempct,sempit,semptt)[6,] # sempct sempit semptt #0.07083691 0.18873670 0.74042639 rij[,,6] # [,1] [,2] #[1,] 0.03671882 0.0000000 #[2,] 0.02828559 0.1758729 #Now need iyhat (inverse of the information matrix for Y) used in (4.43) #Equations (4.5)-(4.9) denom1=(pcthat^2+2*pcthat*pithat+2*pcthat*ptthat) ncct=nc*pcthat^2/denom1 ncit=2*nc*pcthat*pithat/denom1 nctt=nc-ncct-ncit niit=ni*pithat^2/(pithat^2+2*pithat*ptthat) nitt=ni-niit nttt=nt #Required derivatives for iyhat (used in eqn (4.31)) d20q=-(2*ncct+ncit+nctt)/pcthat^2 -(2*nttt+nitt+nctt)/(ptthat^2) d02q=-(2*niit+ncit+nitt)/pithat^2 -(2*nttt+nitt+nctt)/(ptthat^2) d12q=-(2*nttt+nitt+nctt)/(ptthat^2) iyhat=-cbind(c(d20q,d12q),c(d12q,d02q)) solve(iyhat) #[1,] 5.290920e-05 -1.074720e-05 #[2,] -1.074720e-05 1.230828e-04 #Equation (4.31) #Since the percentages of the 3 moth phenotypes add up to 1, we only #presented results for carbonaria and insularia phenotypes. The next #lines define (4.31) from the SEM algorithm and delete the last row #and column of the 3 x 3 matrix psiprime22=rij[,,6] #(7th iteration) #Equation (4.43) varhat=solve(iyhat)%*%(diag(2)+t(psiprime22)%*%solve(diag(2)-t(psiprime22))) varhat=(varhat+t(varhat))/2 varhat # [,1] [,2] #[1,] 5.492602e-05 -0.0000111562 #[2,] -1.115620e-05 0.0001489664 #sd(pc), sd(pi) sqrt(diag(varhat)) #[1] 0.007411209 0.012205180 #cor(pc,pi) varhat[1,2]/prod(sqrt(diag(varhat))) #[1] -0.1233341 #var(pt) varhat[1,1]+varhat[2,2]+2*varhat[1,2] #[1] 0.0001815800 #sd(pt) sqrt(sum(varhat)) #[1] 0.01347516 #cor(pc,pt) (-varhat[1,1]-varhat[1,2])/(sqrt(varhat[1,1])*sqrt(sum(varhat))) #[1] -0.43828 #cor(pi,pt) (-varhat[2,2]-varhat[1,2])/(sqrt(varhat[2,2])*sqrt(sum(varhat))) #[1] -0.8379212 ## You may build a 3*3 matrix using the numbers above to show the covariance matrix of ## (pc, pi, pt).