## Source: Chapter11.r (with modifications) in ## http://www.stat.colostate.edu/computationalstatistics/CompStatR.zip ############################################################################ ### EXAMPLE 11.4 EASY DATA (RUNNING-LINE SMOOTH) ############################################################################ # easy = observed data # x = observed predictor data # y = observed response data # k = smoothing parameter # RLSMOOTH1 = running-line smoother (for k odd > 1) # RLSMOOTH2 = running-line smoother (for k odd > 1) ############################################################################ ## NOTES # The following has two implementations for the running-line smoother. # The first builds the hat matrix for each neighborhood while the second # uses the sufficient statistics for regression to reduce computations. ############################################################################ ## INITIAL VALUES easy = read.table(file.choose(),header=T) x = easy$X y = easy$Y plot(x,y,xlab="Predictor",ylab="Response") s = function(x){(x^3) * sin((x+3.4)/2)} plot(s, min(x), max(x), add=T) k = 23 # chosen by cross-validation ## FUNCTIONS # USES HAT MATRIX RLSMOOTH1 = function(k,y,x){ n = length(y) s.hat = rep(0,n) b = (k-1)/2 if(k>1){ for(i in 1:(b+1)){ xi = x[1:(b+i)] xi = cbind(rep(1,length(xi)),xi) hi = xi%*%solve(t(xi)%*%xi)%*%t(xi) s.hat[i] = y[1:(b+i)]%*%hi[i,] xi = x[(n-b-i+1):n] xi = cbind(rep(1,length(xi)),xi) hi = xi%*%solve(t(xi)%*%xi)%*%t(xi) s.hat[n-i+1] = y[(n-b-i+1):n]%*%hi[nrow(hi)-i+1,] } for(i in (b+2):(n-b-1)){ xi = x[(i-b):(i+b)] xi = cbind(rep(1,length(xi)),xi) hi = xi%*%solve(t(xi)%*%xi)%*%t(xi) s.hat[i] = y[(i-b):(i+b)]%*%hi[b+1,] }} if(k==1){s.hat = y} return(s.hat) } # USES SUFFICIENT STATISTICS RLSMOOTH2 = function(k,y,x){ if(k>1){ n = length(y) s.hat = rep(0,n) b = (k-1)/2 x.bar = mean(x[1:(b+1)]) y.bar = mean(y[1:(b+1)]) xy.bar = mean(x[1:(b+1)]*y[1:(b+1)]) x2.bar = mean(x[1:(b+1)]^2) beta1 = (xy.bar - x.bar*y.bar)/(x2.bar - x.bar^2) s.hat[1] = y.bar + beta1*(x[1]-x.bar) for(i in 2:(b+1)){ x.bar = (x.bar*(i+b-1) + x[i+b])/(i+b) y.bar = (y.bar*(i+b-1) + y[i+b])/(i+b) xy.bar = (xy.bar*(i+b-1) + x[i+b]*y[i+b])/(i+b) x2.bar = (x2.bar*(i+b-1) + (x[i+b]^2))/(i+b) beta1 = (xy.bar - x.bar*y.bar)/(x2.bar - x.bar^2) s.hat[i] = y.bar + beta1*(x[i]-x.bar) } for(i in (b+2):(n-b)){ x.bar = x.bar + (x[i+b] - x[i-b-1])/k y.bar = y.bar + (y[i+b] - y[i-b-1])/k xy.bar = xy.bar + (x[i+b]*y[i+b] - x[i-b-1]*y[i-b-1])/k x2.bar = x2.bar + ((x[i+b]^2)-(x[i-b-1]^2))/k beta1 = (xy.bar - x.bar*y.bar)/(x2.bar - x.bar^2) s.hat[i] = y.bar + beta1*(x[i]-x.bar) } for(i in (n-b+1):n){ x.bar = (x.bar*(k-i+n-b+1) - x[i-b-1])/(k-i+n-b) y.bar = (y.bar*(k-i+n-b+1) - y[i-b-1])/(k-i+n-b) xy.bar = (xy.bar*(k-i+n-b+1) - x[i-b-1]*y[i-b-1])/(k-i+n-b) x2.bar = (x2.bar*(k-i+n-b+1) - (x[i-b-1]^2))/(k-i+n-b) beta1 = (xy.bar - x.bar*y.bar)/(x2.bar - x.bar^2) s.hat[i] = y.bar + beta1*(x[i]-x.bar) } } if(k==1){s.hat = y} return(s.hat) } ## MAIN rlsmooth1.val = RLSMOOTH1(k,y,x) rlsmooth2.val = RLSMOOTH2(k,y,x) ## OUTPUT PLOTS s = function(x){(x^3) * sin((x+3.4)/2)} x.plot = seq(min(x),max(x),length.out=1000) y.plot = s(x.plot) plot(x,y,xlab="Predictor",ylab="Response") lines(x.plot,y.plot,lty=2) lines(x,rlsmooth2.val,type="l") # plot smoother without data points plot(x,y,xlab="Predictor",ylab="Response", main=paste("Running-line, k=",k), type="n") lines(x.plot,y.plot,lty=1,lwd=2) lines(x,rlsmooth2.val,lty=1, col="red", lwd=2) legend("bottomright",c("True relation","k=23"), lty=c(1,1), col=c("black","red")) # other k k1 = 13 rlsmooth.val = RLSMOOTH2(k1,y,x) plot(x,y,xlab="Predictor",ylab="Response", main=paste("k=",k1), type="n") lines(x.plot,y.plot,lty=2) lines(x,rlsmooth.val,type="l") k1 = 33 rlsmooth.val = RLSMOOTH2(k1,y,x) plot(x,y,xlab="Predictor",ylab="Response", main=paste("k=",k1), type="n") lines(x.plot,y.plot,lty=2) lines(x,rlsmooth.val,type="l") ############################################################################ ### EXAMPLE 11.5 EASY DATA (KERNEL SMOOTH) ############################################################################ # easy = observed data # x = observed predictor data # y = observed response data # h = bandwidth # fx.hat = normal kernel density # KSMOOTH = kernel smoother (for k odd > 1) ############################################################################ ## INITIAL VALUES easy = read.table(file.choose(),header=T) x = easy$X y = easy$Y h = 0.16 ## FUNCTIONS fx.hat = function(z,h){dnorm((z-x)/h)/h} KSMOOTH = function(h,y,x){ n = length(y) s.hat = rep(0,n) for(i in 1:n){ a = fx.hat(x[i],h) s.hat[i] = sum(y * a/sum(a)) } return(s.hat) } ## MAIN ksmooth.val = KSMOOTH(h,y,x) ## OUTPUT PLOTS s = function(x){(x^3) * sin((x+3.4)/2)} x.plot = seq(min(x),max(x),length.out=1000) y.plot = s(x.plot) plot(x,y,xlab="Predictor",ylab="Response") lines(x.plot,y.plot,lty=2) lines(x,ksmooth.val,type="l") ## without data points plot(x,y,xlab="Predictor",ylab="Response", type="n", main="Kernel Smoother") lines(x.plot,y.plot,lty=1,lwd=2) lines(x,ksmooth.val,lty=1, col="red", lwd=2) legend("bottomright",c("True relation","h=0.16"), lty=c(1,1), col=c("black","red")) ############################################################################ ### EXAMPLE 11.6 EASY DATA (SPLINE SMOOTH) ############################################################################ # easy = observed data # x = observed predictor data # y = observed response data ############################################################################ ## NOTES # The following uses the smooth.spline function in R which has a # different implementation of the smoothing parameter than S-PLUS, # the programming language used originally for some of the examples # in the first edition. Thus the lambdas differ. ############################################################################ ## INITIAL VALUES easy = read.table(file.choose(),header=T) x = easy$X y = easy$Y ## MAIN s.hat = smooth.spline(x=x,y=y) ## OUTPUT s.hat # SMOOTH.SPLINE OUTPUT ## OUTPUT PLOTS s = function(x){(x^3) * sin((x+3.4)/2)} x.plot = seq(min(x),max(x),length.out=1000) y.plot = s(x.plot) plot(x,y,xlab="Predictor",ylab="Response") lines(x.plot,y.plot,lty=2) lines(s.hat) ## without data points plot(x,y,xlab="Predictor",ylab="Response", type="n", main="Spline Smoother") lines(x.plot,y.plot,lty=1,lwd=2) lines(s.hat) legend("bottomright",c("True relation","lambda=0.066"), lty=c(1,1), lwd=c(2,1),cex=0.8)