## Source:   chap7.txt (with modifications) in 
## http://www.stat.colostate.edu/computationalstatistics/datasets/computationalstatistics.zip

# MCMC:  INDEPENDENCE CHAIN
# Example 7.2 on page 205 in Givens and Hoeting, 2nd edition
#
# Key idea:  apply an MCMC independence chain to estimate the
# proportion in a simple mixture distribution problem.  Demonstrate
# the performance when using 2 very different prior distributions

#__________________________________________________________________
# Generate the data from a mixture of 2 normal distributions (see
# equation (7.6) on page 205.  To simulate a mixture distribution,
# simulate Bernoulli(.7) first, then simulate from appropriate normal
# distribution.

# Note: The data that used in Example 7.2 are available on the book
# website.  The code is given here in case you want to choose your own
# parameter values for the problem.

# generate data by yourself
set.seed(3)  #Set random seed
a=rbinom(100,1,.7)
sum(a) #69 from first mixture component

y=ifelse(a==1,rnorm(100,7,.5),rnorm(100,10,.5))

# R tip: read data into R from file
# read from the data file "mixture.dat"
# need to download the file and save into the same directory as R
#      or  use menu "File" --> "change dir..." and find the directory including the data file
temp <- read.table("mixture.dat", header=T)
data.size <- dim(temp)[1]
y <- rep(0, data.size)
for(i in 1:data.size) y[i] <- temp[i,1];

#__________________________________________________________________
# Examine the data
# The histogram is similar to figure 7.1 on page 205.  Your figure
# will look different if you have generated new y values instead of
# using the data read from the book webpage.

hist(y,probability=T,nclass=20,xlab="y",ylab="Density")
a=seq(min(y),max(y),length=1000)
lines(a,.7*dnorm(a,7,.5)+(1-.7)*dnorm(a,10,.5),lty=1)

#__________________________________________________________________
#Run MH sampler - independence chain

#Notes:
# 1.  Since this is an independence chain with uniform prior, the MH ratio
#     is the likelihood ratio (see equation 7.4).
#
# 2. We use the log likelihood instead of the likelihood for
#    computations of the MH ratio.  Using the likelihood in a MH chain can
#    lead to numerical instabilities in many cases as the number may be
#    quite small.  Doing calculations on the log scale and transforming
#    back at the end can avoid many problems associated with underflow.

log.like=function(p,x) {
# Computes the log likelihood based on equation (7.6) on page 205
# Inputs:  p=probability estimate (delta in equation 7.6)
#      x=data vector
# Outputs: the log likelihood
     sum(log(p*dnorm(x,7,.5)+(1-p)*dnorm(x,10,.5)))
}

# CASE 1: proposal distribution is Beta(1,1)  [i.e., Uniform dist]
bshape1=1
bshape2=1
# plot Beta(1,1)
temp <- function(x) { dbeta(x, bshape1, bshape2); }
plot(temp, 0, 1, xlab="x", ylab="density", main="Beta(1,1)")

num.its=10000       #Number of iterations
p=rep(0,num.its)      #MCMC output: vector of p realizations
p[1]=.2         #Starting value

set.seed(1)           #Set random seed for repeatability

j <- num.its-1        # counting acceptance
for (i in 1:(num.its-1)) {
   #Generate proposal
   p[i+1]=rbeta(1,bshape1,bshape2)
   #Compute Metropolis-Hastings ratio
   R=exp(log.like(p[i+1],y)-log.like(p[i],y))
   #Reject or accept proposal
   if (R<1) {
      if(rbinom(1,1,R)==0)  { p[i+1]=p[i]; j <- j-1; }
   }
}
P.1.1=p  #Save the output to examine later
cat("\nAcceptance Ratio:", round(j/num.its*100,2),"%\n")    
#Acceptance Ratio: 14.41 %

#CASE 2: Proposal distribution is Beta(2,10)   [mostly misses]
bshape1=2
bshape2=10
temp <- function(x) { dbeta(x, bshape1, bshape2); }
plot(seq(0,1,len=100),dbeta(seq(0,1,len=100),bshape1,bshape2),
   xlab="x",ylab="density",type="l")

num.its=10000       #Number of iterations
p=rep(0,num.its)      #MCMC output: vector of p realizations
p[1]=.2         #Starting value

set.seed(4) #Set random seed
j <- num.its-1        # counting acceptance
for (i in 1:(num.its-1)) {
    p[i+1]=rbeta(1,bshape1,bshape2)
    R=  exp(log.like(p[i+1],y)-log.like(p[i],y))*temp(p[i])/temp(p[i+1])
   if (R<1) {
      if(rbinom(1,1,R)==0)  { p[i+1]=p[i]; j <- j-1; }
   }
}
P.2.10=p  #Save the output to examine later
cat("\nAcceptance Ratio:", round(j/num.its*100,2),"%\n")
#Acceptance Ratio: 0.08 %

#__________________________________________________________________
#Plots of the output of the MCMC run:

#Sample paths (similar to figure 7.2 on page 206
#Note:  you may wish to remove iterations for a burn-in period
par(mfrow=c(2,1))
plot(P.1.1,type="l")
plot(P.2.10,type="l")

#Histograms of the estimated parameter (similar to figure 7.3 on page 207)
#Note:  you may wish to remove iterations for a burn-in period
par(mfrow=c(2,1))
hist(P.1.1,xlim=c(range(c(P.1.1,P.2.10))),nclass=40)
hist(P.2.10,xlim=c(range(c(P.1.1,P.2.10))),nclass=40)

#__________________________________________________________________________
#__________________________________________________________________________
# MCMC:  RANDOM WALK
# Example 7.3 on page 207 in Givens and Hoeting
#

# Key idea: Run random walk MH sampler to estimate the proportion, p,
# in a simple mixture distribution problem.  The likelihood is given in
# (7.6) and we assume that the prior for p is Uniform(0,1).  This
# example also demonstrates reparameterization of a MCMC problem,
# which can sometimes improve convergence performance.  Here, the
# sampler is run in transformed space, u=log(p/(1-p)), see page 207
# for more information.  We demonstrate the performance of the random
# walk chain when using two very different error distributions in the
# proposal , Unif(-1,1) and Unif(-.01,.01).

#__________________________________________________________________
# Generate the data from a mixture of 2 normal distributions (see
# equation (7.6) on page 205.  To simulate a mixture distribution,
# simulate Bernoulli(.7) first, then simulate from appropriate normal
# distribution.

# Note: The data that used in Example 7.2 are available on the book
# website.  The code is given here in case you want to choose your own
# parameter values for the problem.

# read from the data file "mixture.dat"
temp <- read.table("mixture.dat", header=T)
data.size <- dim(temp)[1]
y <- rep(0, data.size)
for(i in 1:data.size) y[i] <- temp[i,1];

#__________________________________________________________________
# Examine the data
# The histogram is similar to figure 7.1 on page 205.  Your figure
# will look different if you have generated new y values instead of
# using the data read from the book webpage.

par(mfrow=c(1,1))
hist(y,probability=T,nclass=20,xlab="y",ylab="Density")
a=seq(min(y),max(y),length=1000)
lines(a,.7*dnorm(a,7,.5)+(1-.7)*dnorm(a,10,.5),lty=1)

#__________________________________________________________________
#Run MH sampler - random walk

target.log=function(p,x) {
# Computes the log of the likelihood (based on equation (7.6) on page
# 205) times the prior.  Here the prior for p is
# Uniform(0,1)=Beta(1,1), so it won't play a part in the computation
# of the MH ratio and is omitted.

# Inputs:  p=probability estimate (delta in equation 7.6)
#      x=data vector
# Outputs: the log likelihood
     sum(log(p*dnorm(x,7,.5)+(1-p)*dnorm(x,10,.5)))
}


# CASE 1: proposal is a random walk with errors generated from
# Uniform(-1,1).

set.seed(2) #Set random seed

num.its=10000       #Number of iterations
u=rep(0,num.its)    #MCMC output: vector of u realizations
u[1]= runif(1,-1,1)     #Starting value
p=rep(0,num.its)        #MCMC output: vector of p realizations
p[1]=exp(u[1])/(1+exp(u[1]))    #transform u to p, see page 190-191


# This is the code for the Metropolis Hastings algorithm, random walk chain
j <- num.its-1        # counting acceptance
for (i in 1:(num.its-1)) {
   #Generate proposal (random walk)
   u[i+1]=u[i]+runif(1,-1,1)

   #Transform u to p, see page 190-191
   p[i+1]=exp(u[i+1])/(1+exp(u[i+1]))

   #Compute Metropolis-Hastings ratio
   R=exp(target.log(p[i+1],y)-target.log(p[i],y))*(p[i+1]*(1-p[i+1]))/(p[i]*(1-p[i]))

   #Reject or accept proposal
   if (R<1) {
      if(rbinom(1,1,R)==0)  { p[i+1]=p[i]; u[i+1]=u[i]; j <- j-1; }
   }
}

P.rw.1=p  #Save the output to examine later
u.rw.1=u
cat("\nAcceptance Ratio:", round(j/num.its*100,2),"%\n")
#Acceptance Ratio:  33.98 %

# CASE 2: proposal is a random walk with errors generated from
# Uniform(-.01,.01).

set.seed(2) #Set random seed

num.its=10000       #Number of iterations
u2=rep(0,num.its)   #MCMC output: vector of u realizations
u2[1]= runif(1,-1,1)    #Starting value
p2=rep(0,num.its)       #MCMC output: vector of u realizations
p2[1]=exp(u2[1])/(1+exp(u2[1]))  #transform u to p, see page 207

j <- num.its-1        # counting acceptance
for (i in 1:(num.its-1)) {
   #Generate proposal (random walk)
   u2[i+1]=u2[i]+runif(1,-.01,.01)

   #Transform u to p, see page 207
   p2[i+1]=exp(u2[i+1])/(1+exp(u2[i+1]))

   #Compute Metropolis-Hastings ratio
   R=exp(target.log(p2[i+1],y)-target.log(p2[i],y))*(p[i+1]*(1-p[i+1]))/(p[i]*(1-p[i]))

   #Reject or accept proposal
   if (R<1) {
      if(rbinom(1,1,R)==0)  { p[i+1]=p[i]; u[i+1]=u[i]; j <- j-1; }
   }
}

P.rw.01=p2  #Save the output to examine later
u.rw.01=u2
cat("\nAcceptance Ratio:", round(j/num.its*100,2),"%\n")
#Acceptance Ratio:  91.69 %

#__________________________________________________________________
#Plots of the output of the MCMC run:

#Sample paths (similar to figure 7.5 on page 209
#Note:  you may wish to remove iterations as a burn-in period
par(mfrow=c(2,1))
plot(P.rw.1,type="l",ylab="p",xlab="Iteration",ylim=c(.3,.85))
plot(P.rw.01,type="l",ylab="p",xlab="Iteration",ylim=c(.3,.85))


#Histograms of the estimated parameter
#Note:  you may wish to remove iterations as a burn-in period
par(mfrow=c(2,1))
hist(P.rw.1,xlim=c(range(c(P.rw.1,P.rw.01))),nclass=40)
hist(P.rw.01,xlim=c(range(c(P.rw.1,P.rw.01))),nclass=40)