### R commands for section 10.2 -- Kernel density estimation ### G. Givens & J. Hoeting, Computational Statistics, Wiley, 2005. ## Example 10.1 (Bimodal density) ## need data "bimodal.dat" x = read.table("bimodal.dat")$V1 # plot Fig. 10.2 on page 282 with true density curve hist(x, xlim=c(-2,18), nclass=40, freq=F) curve((dnorm(x,4,1)+dnorm(x,9,2))/2, c(-2,18), add=T, lwd=2, col="black") temp = density(x, bw = 0.3, kernel="gaussian", n=200, from=-2, to=18) points(temp$x, temp$y, type="l", lty=1, col="red") temp = density(x, bw = 0.625, kernel="gaussian", n=200, from=-2, to=18) points(temp$x, temp$y, type="l", lty=1, lwd=2, col="blue") temp = density(x, bw = 1.875, kernel="gaussian", n=200, from=-2, to=18) points(temp$x, temp$y, type="l", lty=2, col="brown") legend(-2,0.25,legend=c("truth","h=0.3","h=0.625","h=1.875"),lty=c(1,1,1,2),lwd=c(2,1,2,1), col=c("black","red","blue","brown"),cex=1) ## Example 10.2 (Whale migration) ## need data "bimodal.dat" x = read.table("whalemigration.dat")$V1 # UCV method bw.ucv(x) #[1] 5.079602 # BCV method bw.bcv(x) #[1] 26.52264 # PL method bw.pl.self <- function (x, lower = 0.1 * hmax, upper = hmax) { if ((n <- length(x)) < 2) stop("need at least 2 data points") if (!is.numeric(x)) stop("invalid 'x'") flogpl <- function(h) { ans=0 for(i in 1:n) ans <- ans+log(sum(dnorm((x[i]-x[-i])/h))/(h*(n-1))); ans } hmax <- 1.144 * sqrt(var(x)) * n^(-1/5) h <- optimize(flogpl, c(lower, upper), tol = 0.1 * lower, maximum=T)$maximum if (h < 1.1 * lower | h > upper - 0.1 * lower) warning("minimum occurred at one end of the range") h } bw.pl.self(x) #[1] 9.752232 # Figure 10.4 on page 287 hist(x, xlim=c(950,1420), ylim=c(0,0.015), nclass=20, freq=F, col="gray") temp = density(x, bw = 5.08, kernel="gaussian", n=200, from=950, to=1420) points(temp$x, temp$y, type="l", lty=1, col="red",lwd=2) temp = density(x, bw = 26.52, kernel="gaussian", n=200, from=950, to=1420) points(temp$x, temp$y, type="l", lty=2, col="blue",lwd=2) temp = density(x, bw = 9.75, kernel="gaussian", n=200, from=950, to=1420) points(temp$x, temp$y, type="l", lty=4, col="brown",lwd=2) legend(950,0.015,legend=c("UCV","BCV","PL"),lty=c(1,2,4),lwd=c(2,2,2),col=c("red","blue","brown"),cex=1) ## Example 10.3 (Whale migration, continued) ## need data "bimodal.dat" x = read.table("whalemigration.dat")$V1 # Silverman's rule of thumb # (10.25) on page 288 bw.silver.self <- function(x) { sd(x) * (4/(3*length(x)))^0.2; } bw.silver.self(x) #[1] 32.96233 # Silverman's rule of thumb II # replace sigma^ in (10.25) by sigma~ = min(sigma^, IQR/(Phi^{-1}(0.75)-Phi^{-1}(0.25)) bw.silver2.self <- function(x) { min(sd(x), IQR(x)/(qnorm(0.75)-qnorm(0.25))) * (4/(3*length(x)))^0.2; } bw.silver2.self(x) #[1] 18.75403 # Silverman(1986)'s version of Sliverman's rule of thumb, R function bw.nrd0(x) #[1] 16.04168 # Scott(1992)'s version of Sliverman's rule of thumb, R function bw.nrd(x) #[1] 18.89354 # Sheather & Jones (1991)'s version of Sheather-Jones method bw.SJ(x) #[1] 10.30286 ## Example 10.4 (Whale migration, continued) ## need data "bimodal.dat" x = read.table("whalemigration.dat")$V1 # Terrell's maximal smoothing principle, gaussian kernel # (10.30) on page 291 bw.msp.self <- function(x) { 3*sd(x)*(35*length(x)*2*sqrt(pi))^(-0.2); } bw.msp.self(x) #[1] 35.59728 ## plot Figure 10.5 on page 291 # Figure 10.4 on page 287 hist(x, xlim=c(950,1420), ylim=c(0,0.015), nclass=20, freq=F, col="gray") temp = density(x, bw = 10.30, kernel="gaussian", n=200, from=950, to=1420) points(temp$x, temp$y, type="l", lty=1, col="black",lwd=2) temp = density(x, bw = 32.96, kernel="gaussian", n=200, from=950, to=1420) points(temp$x, temp$y, type="l", lty=2, col="blue",lwd=2) temp = density(x, bw = 35.60, kernel="gaussian", n=200, from=950, to=1420) points(temp$x, temp$y, type="l", lty=3, col="brown",lwd=2) legend(950,0.015,legend=c("Sheather-Jones","Silverman","Terrell"),lty=c(1,2,3),lwd=c(2,2,2),col=c("black","blue","brown"),cex=1) ## Bimodal data, continued ## need data "bimodal.dat" x = read.table("bimodal.dat")$V1 # compare different bandwidth selection methods with the true density curve par(mfrow=c(2,3)) # "PL" hist(x, xlim=c(-2,18), nclass=40, freq=F, main="PL",xlab=round(bw.pl.self(x),3)) curve((dnorm(x,4,1)+dnorm(x,9,2))/2, c(-2,18), add=T, lwd=2, col="red") temp = density(x, bw = bw.pl.self(x), kernel="gaussian", n=200, from=-2, to=18) points(temp$x, temp$y, type="l", lty=2, col="blue",lwd=2) # "UCV" hist(x, xlim=c(-2,18), nclass=40, freq=F, main="UCV",xlab=round(bw.ucv(x),3)) curve((dnorm(x,4,1)+dnorm(x,9,2))/2, c(-2,18), add=T, lwd=2, col="red") temp = density(x, bw = bw.ucv(x), kernel="gaussian", n=200, from=-2, to=18) points(temp$x, temp$y, type="l", lty=2, col="blue",lwd=2) # "BCV" hist(x, xlim=c(-2,18), nclass=40, freq=F, main="BCV",xlab=round(bw.bcv(x),3)) curve((dnorm(x,4,1)+dnorm(x,9,2))/2, c(-2,18), add=T, lwd=2, col="red") temp = density(x, bw = bw.bcv(x), kernel="gaussian", n=200, from=-2, to=18) points(temp$x, temp$y, type="l", lty=2, col="blue",lwd=2) # "Silverman" hist(x, xlim=c(-2,18), nclass=40, freq=F, main="Silverman",xlab=round(bw.silver.self(x),3)) curve((dnorm(x,4,1)+dnorm(x,9,2))/2, c(-2,18), add=T, lwd=2, col="red") temp = density(x, bw = bw.silver.self(x), kernel="gaussian", n=200, from=-2, to=18) points(temp$x, temp$y, type="l", lty=2, col="blue",lwd=2) # "Sheather-Jones" hist(x, xlim=c(-2,18), nclass=40, freq=F, main="Sheather-Jones",xlab=round(bw.SJ(x),3)) curve((dnorm(x,4,1)+dnorm(x,9,2))/2, c(-2,18), add=T, lwd=2, col="red") temp = density(x, bw = bw.SJ(x), kernel="gaussian", n=200, from=-2, to=18) points(temp$x, temp$y, type="l", lty=2, col="blue",lwd=2) # "Terrell" hist(x, xlim=c(-2,18), nclass=40, freq=F, main="Terrell",xlab=round(bw.msp.self(x),3)) curve((dnorm(x,4,1)+dnorm(x,9,2))/2, c(-2,18), add=T, lwd=2, col="red") temp = density(x, bw = bw.msp.self(x), kernel="gaussian", n=200, from=-2, to=18) points(temp$x, temp$y, type="l", lty=2, col="blue",lwd=2)