## Source:   chap4.txt in 
## http://www.stat.colostate.edu/computationalstatistics/datasets/computationalstatistics.zip

# BASIC EM ALGORITHM
# Example 4.2 on page 99 in Givens and Hoeting, 2nd edition
#
# Key idea:  use the EM algorithm to estimate the parameters in the
# peppered moth example described on page 99.

#The observed data (phenotype counts for each variety)
nc=85  #Number of observed carbonaria phenotype
ni=196 #number of observed insularia phenotype
nt=341 #Number of observed typica phenotype
n=nc+ni+nt #total sample size

#Initialize the parameter estimate vectors
#One vector for each phenotype
pct=rep(0,20)
pit=rep(0,20)
ptt=rep(0,20)
pct[1]=1/3  #Initial estimates of the parameters
pit[1]=1/3
ptt[1]=1/3

#This for loop carries out EM for the peppered moth example.
for (t in 2:20) {
  #Equations (4.5)-(4.9)
  denom1=(pct[t-1]^2+2*pct[t-1]*pit[t-1]+2*pct[t-1]*ptt[t-1])
  ncct=nc*pct[t-1]^2/denom1
  ncit=2*nc*pct[t-1]*pit[t-1]/denom1
  nctt=nc-ncct-ncit
  niit=ni*pit[t-1]^2/(pit[t-1]^2+2*pit[t-1]*ptt[t-1])
  nitt=ni-niit
  nttt=nt

  #Equations (4.13-4.15)
  pct[t]=(2*ncct+ncit+nctt)/(2*n)
  pit[t]=(2*niit+ncit+nitt)/(2*n)
  ptt[t]=(2*nttt+nctt+nitt)/(2*n)
}

#Examine the output
cbind(pct,pit,ptt)

#equation (4.16) on page 101
rcc=sqrt( (diff(pct)^2+diff(pit)^2)/(pct[-20]^2+pit[-20]^2) )
rcc=c(0,rcc)  #adjusts the length to make the table below

#convergence diagnostics defined below (4.16) on page 101
d1=(pct[-1]-pct[20])/(pct[-20]-pct[20])
d1=c(d1,0)
d2=(pit[-1]-pit[20])/(pit[-20]-pit[20])
d2=c(d2,0)

#Table like Table 4.1 on page 102
print(cbind(pct,pit,rcc,d1,d2)[1:9,],digits=5)

round(c(pct[9], pit[9], 1-pct[9]-pit[9]),3)
# [1] 0.071 0.189 0.740