## R code for Section 4.9: Bootstrap Procedures ## Example 4.9.1: Percentile Bootstrap ## function to obtain bootstrap percentile ## Source: http://www.stat.wmich.edu/mckean/HMC/Rcode/AppendixB/percentciboot.s # x is a vector containing the original sample # b is the desired number of bootstraps # alpha: (1 - alpha) is the confidence level percentciboot <- function(x, b, alpha) { theta <- mean(x) # sample mean thetastar <- rep(0, b) # initialized vector for bootstrapped theta^*s n <- length(x) # sample size for(i in 1:b) { xstar <- sample(x, n, replace=T) # resampling the sample x thetastar[i] <- mean(xstar) # bootstrapped sample mean } thetastar <- sort(thetastar) # reorder the bootstrapped theta^*s pick <- round((alpha/2)*(b+1)) # (alpha/2)th quantile lower <- thetastar[pick] # lower end of bootstrap confidence interval upper <- thetastar[b-pick+1] # upper end of bootstrap confidence interval list(SampleMean=theta, BootstrapCI=c(lower,upper), thetastar=thetastar) } # input data and parameters X <- c(131.7, 182.7, 73.3, 10.7, 150.4, 42.3, 22.2, 17.9, 264.0, 154.4, 4.3, 265.6, 61.9, 10.8, 48.8, 22.5, 8.8, 150.6, 103.0, 85.9) b <- 3000 alpha <- 0.10 # to obtain 90% bootstrap confidence interval ans <- percentciboot(X, b, alpha) # output the results ans$SampleMean # sample mean ans$BootstrapCI # 90% bootstrap confidence interval # plot the boostrap histogram hist(ans$thetastar, freq=F) points(ans$SampleMean, 0, pch=24) text(ans$BootstrapCI[1], 0, "(") text(ans$BootstrapCI[2], 0, ")") ## Example 4.9.2: Bootstrap Testing ## function to test location shift between two samples # x: vector containing first sample # y: vector containing first sample # b: number of bootstrap replications # origtest: value of test statistic on original samples # pvalue: bootstrap p-value # teststatall: vector of bootstrap test statistics boottesttwo <- function(x, y, b) { n1 <- length(x) # size of sample 1 n2 <- length(y) # size of sample 2 v <- mean(y) - mean(x) # difference in sample means z <- c(x, y) # combine x and y into z counter <- 0 # record how many bootstrap samples that have greater difference than v teststatall <- rep(0, b) for(i in 1:b) { xstar <- sample(z, n1, replace=T) ystar <- sample(z, n2, replace=T) vstar <- mean(ystar) - mean(xstar) if(vstar >= v) { counter <- counter+1 } teststatall[i] <- vstar } pvalue <- counter/b list(SampleDiff=v, pvalue=pvalue, BootstrapDiff=teststatall) } # input data and parameters X <- c( 94.2, 111.3, 90.0, 99.7, 116.8, 92.2, 166.0, 95.7, 109.3, 106.0, 111.7, 111.9, 111.6, 146.4, 103.9) Y <- c(125.5, 107.1, 67.9, 98.2, 128.6, 123.5, 116.5, 143.2, 120.3, 118.6, 105.0, 111.8, 129.3, 130.8, 139.8) boxplot(X, Y) # use boxplots to obtain summary information b <- 3000 ans <- boottesttwo(X, Y, b) # output the results ans$SampleDiff # sample difference ans$pvalue # bootstrap p-value # plot the boostrap histogram hist(ans$BootstrapDiff, freq=F) points(ans$SampleDiff, 0, pch=24) ## two-sample pooled t-test t.test(X, Y, alternative = "less", var.equal = TRUE)