## Source: chap7.txt (with modifications) in ## http://www.stat.colostate.edu/computationalstatistics/datasets/computationalstatistics.zip # MCMC: INDEPENDENCE CHAIN # Example 7.2 on page 205 in Givens and Hoeting, 2nd edition # # Key idea: apply an MCMC independence chain to estimate the # proportion in a simple mixture distribution problem. Demonstrate # the performance when using 2 very different prior distributions #__________________________________________________________________ # Generate the data from a mixture of 2 normal distributions (see # equation (7.6) on page 205. To simulate a mixture distribution, # simulate Bernoulli(.7) first, then simulate from appropriate normal # distribution. # Note: The data that used in Example 7.2 are available on the book # website. The code is given here in case you want to choose your own # parameter values for the problem. # generate data by yourself set.seed(3) #Set random seed a=rbinom(100,1,.7) sum(a) #69 from first mixture component y=ifelse(a==1,rnorm(100,7,.5),rnorm(100,10,.5)) # R tip: read data into R from file # read from the data file "mixture.dat" # need to download the file and save into the same directory as R # or use menu "File" --> "change dir..." and find the directory including the data file temp <- read.table("mixture.dat", header=T) data.size <- dim(temp)[1] y <- rep(0, data.size) for(i in 1:data.size) y[i] <- temp[i,1]; #__________________________________________________________________ # Examine the data # The histogram is similar to figure 7.1 on page 205. Your figure # will look different if you have generated new y values instead of # using the data read from the book webpage. hist(y,probability=T,nclass=20,xlab="y",ylab="Density") a=seq(min(y),max(y),length=1000) lines(a,.7*dnorm(a,7,.5)+(1-.7)*dnorm(a,10,.5),lty=1) #__________________________________________________________________ #Run MH sampler - independence chain #Notes: # 1. Since this is an independence chain with uniform prior, the MH ratio # is the likelihood ratio (see equation 7.4). # # 2. We use the log likelihood instead of the likelihood for # computations of the MH ratio. Using the likelihood in a MH chain can # lead to numerical instabilities in many cases as the number may be # quite small. Doing calculations on the log scale and transforming # back at the end can avoid many problems associated with underflow. log.like=function(p,x) { # Computes the log likelihood based on equation (7.6) on page 205 # Inputs: p=probability estimate (delta in equation 7.6) # x=data vector # Outputs: the log likelihood sum(log(p*dnorm(x,7,.5)+(1-p)*dnorm(x,10,.5))) } # CASE 1: proposal distribution is Beta(1,1) [i.e., Uniform dist] bshape1=1 bshape2=1 # plot Beta(1,1) temp <- function(x) { dbeta(x, bshape1, bshape2); } plot(temp, 0, 1, xlab="x", ylab="density", main="Beta(1,1)") num.its=10000 #Number of iterations p=rep(0,num.its) #MCMC output: vector of p realizations p[1]=.2 #Starting value set.seed(1) #Set random seed for repeatability j <- num.its-1 # counting acceptance for (i in 1:(num.its-1)) { #Generate proposal p[i+1]=rbeta(1,bshape1,bshape2) #Compute Metropolis-Hastings ratio R=exp(log.like(p[i+1],y)-log.like(p[i],y)) #Reject or accept proposal if (R<1) { if(rbinom(1,1,R)==0) { p[i+1]=p[i]; j <- j-1; } } } P.1.1=p #Save the output to examine later cat("\nAcceptance Ratio:", round(j/num.its*100,2),"%\n") #Acceptance Ratio: 14.41 % #CASE 2: Proposal distribution is Beta(2,10) [mostly misses] bshape1=2 bshape2=10 temp <- function(x) { dbeta(x, bshape1, bshape2); } plot(seq(0,1,len=100),dbeta(seq(0,1,len=100),bshape1,bshape2), xlab="x",ylab="density",type="l") num.its=10000 #Number of iterations p=rep(0,num.its) #MCMC output: vector of p realizations p[1]=.2 #Starting value set.seed(4) #Set random seed j <- num.its-1 # counting acceptance for (i in 1:(num.its-1)) { p[i+1]=rbeta(1,bshape1,bshape2) R= exp(log.like(p[i+1],y)-log.like(p[i],y))*temp(p[i])/temp(p[i+1]) if (R<1) { if(rbinom(1,1,R)==0) { p[i+1]=p[i]; j <- j-1; } } } P.2.10=p #Save the output to examine later cat("\nAcceptance Ratio:", round(j/num.its*100,2),"%\n") #Acceptance Ratio: 0.08 % #__________________________________________________________________ #Plots of the output of the MCMC run: #Sample paths (similar to figure 7.2 on page 206 #Note: you may wish to remove iterations for a burn-in period par(mfrow=c(2,1)) plot(P.1.1,type="l") plot(P.2.10,type="l") #Histograms of the estimated parameter (similar to figure 7.3 on page 207) #Note: you may wish to remove iterations for a burn-in period par(mfrow=c(2,1)) hist(P.1.1,xlim=c(range(c(P.1.1,P.2.10))),nclass=40) hist(P.2.10,xlim=c(range(c(P.1.1,P.2.10))),nclass=40) #__________________________________________________________________________ #__________________________________________________________________________ # MCMC: RANDOM WALK # Example 7.3 on page 207 in Givens and Hoeting # # Key idea: Run random walk MH sampler to estimate the proportion, p, # in a simple mixture distribution problem. The likelihood is given in # (7.6) and we assume that the prior for p is Uniform(0,1). This # example also demonstrates reparameterization of a MCMC problem, # which can sometimes improve convergence performance. Here, the # sampler is run in transformed space, u=log(p/(1-p)), see page 207 # for more information. We demonstrate the performance of the random # walk chain when using two very different error distributions in the # proposal , Unif(-1,1) and Unif(-.01,.01). #__________________________________________________________________ # Generate the data from a mixture of 2 normal distributions (see # equation (7.6) on page 205. To simulate a mixture distribution, # simulate Bernoulli(.7) first, then simulate from appropriate normal # distribution. # Note: The data that used in Example 7.2 are available on the book # website. The code is given here in case you want to choose your own # parameter values for the problem. # read from the data file "mixture.dat" temp <- read.table("mixture.dat", header=T) data.size <- dim(temp)[1] y <- rep(0, data.size) for(i in 1:data.size) y[i] <- temp[i,1]; #__________________________________________________________________ # Examine the data # The histogram is similar to figure 7.1 on page 205. Your figure # will look different if you have generated new y values instead of # using the data read from the book webpage. par(mfrow=c(1,1)) hist(y,probability=T,nclass=20,xlab="y",ylab="Density") a=seq(min(y),max(y),length=1000) lines(a,.7*dnorm(a,7,.5)+(1-.7)*dnorm(a,10,.5),lty=1) #__________________________________________________________________ #Run MH sampler - random walk target.log=function(p,x) { # Computes the log of the likelihood (based on equation (7.6) on page # 205) times the prior. Here the prior for p is # Uniform(0,1)=Beta(1,1), so it won't play a part in the computation # of the MH ratio and is omitted. # Inputs: p=probability estimate (delta in equation 7.6) # x=data vector # Outputs: the log likelihood sum(log(p*dnorm(x,7,.5)+(1-p)*dnorm(x,10,.5))) } # CASE 1: proposal is a random walk with errors generated from # Uniform(-1,1). set.seed(2) #Set random seed num.its=10000 #Number of iterations u=rep(0,num.its) #MCMC output: vector of u realizations u[1]= runif(1,-1,1) #Starting value p=rep(0,num.its) #MCMC output: vector of p realizations p[1]=exp(u[1])/(1+exp(u[1])) #transform u to p, see page 190-191 # This is the code for the Metropolis Hastings algorithm, random walk chain j <- num.its-1 # counting acceptance for (i in 1:(num.its-1)) { #Generate proposal (random walk) u[i+1]=u[i]+runif(1,-1,1) #Transform u to p, see page 190-191 p[i+1]=exp(u[i+1])/(1+exp(u[i+1])) #Compute Metropolis-Hastings ratio R=exp(target.log(p[i+1],y)-target.log(p[i],y))*(p[i+1]*(1-p[i+1]))/(p[i]*(1-p[i])) #Reject or accept proposal if (R<1) { if(rbinom(1,1,R)==0) { p[i+1]=p[i]; u[i+1]=u[i]; j <- j-1; } } } P.rw.1=p #Save the output to examine later u.rw.1=u cat("\nAcceptance Ratio:", round(j/num.its*100,2),"%\n") #Acceptance Ratio: 33.98 % # CASE 2: proposal is a random walk with errors generated from # Uniform(-.01,.01). set.seed(2) #Set random seed num.its=10000 #Number of iterations u2=rep(0,num.its) #MCMC output: vector of u realizations u2[1]= runif(1,-1,1) #Starting value p2=rep(0,num.its) #MCMC output: vector of u realizations p2[1]=exp(u2[1])/(1+exp(u2[1])) #transform u to p, see page 207 j <- num.its-1 # counting acceptance for (i in 1:(num.its-1)) { #Generate proposal (random walk) u2[i+1]=u2[i]+runif(1,-.01,.01) #Transform u to p, see page 207 p2[i+1]=exp(u2[i+1])/(1+exp(u2[i+1])) #Compute Metropolis-Hastings ratio R=exp(target.log(p2[i+1],y)-target.log(p2[i],y))*(p[i+1]*(1-p[i+1]))/(p[i]*(1-p[i])) #Reject or accept proposal if (R<1) { if(rbinom(1,1,R)==0) { p[i+1]=p[i]; u[i+1]=u[i]; j <- j-1; } } } P.rw.01=p2 #Save the output to examine later u.rw.01=u2 cat("\nAcceptance Ratio:", round(j/num.its*100,2),"%\n") #Acceptance Ratio: 91.69 % #__________________________________________________________________ #Plots of the output of the MCMC run: #Sample paths (similar to figure 7.5 on page 209 #Note: you may wish to remove iterations as a burn-in period par(mfrow=c(2,1)) plot(P.rw.1,type="l",ylab="p",xlab="Iteration",ylim=c(.3,.85)) plot(P.rw.01,type="l",ylab="p",xlab="Iteration",ylim=c(.3,.85)) #Histograms of the estimated parameter #Note: you may wish to remove iterations as a burn-in period par(mfrow=c(2,1)) hist(P.rw.1,xlim=c(range(c(P.rw.1,P.rw.01))),nclass=40) hist(P.rw.01,xlim=c(range(c(P.rw.1,P.rw.01))),nclass=40)