## Source:   Chapter11.r (with modifications) in 
## http://www.stat.colostate.edu/computationalstatistics/CompStatR.zip

############################################################################
### EXAMPLE 11.4 EASY DATA (RUNNING-LINE SMOOTH)
############################################################################
# easy      = observed data
# x         = observed predictor data
# y         = observed response data
# k         = smoothing parameter
# RLSMOOTH1 = running-line smoother (for k odd > 1)
# RLSMOOTH2 = running-line smoother (for k odd > 1)
############################################################################
## NOTES
# The following has two implementations for the running-line smoother. 
# The first builds the hat matrix for each neighborhood while the second
# uses the sufficient statistics for regression to reduce computations.
############################################################################

## INITIAL VALUES
easy = read.table(file.choose(),header=T)
x    = easy$X
y    = easy$Y

plot(x,y,xlab="Predictor",ylab="Response")
s = function(x){(x^3) * sin((x+3.4)/2)}
plot(s, min(x), max(x), add=T)

k = 23   # chosen by cross-validation

## FUNCTIONS
# USES HAT MATRIX
RLSMOOTH1 = function(k,y,x){
      n = length(y)
      s.hat = rep(0,n)
      b = (k-1)/2
      if(k>1){
      for(i in 1:(b+1)){
            xi = x[1:(b+i)]
            xi = cbind(rep(1,length(xi)),xi)
            hi = xi%*%solve(t(xi)%*%xi)%*%t(xi)
            s.hat[i] = y[1:(b+i)]%*%hi[i,]

            xi = x[(n-b-i+1):n]
            xi = cbind(rep(1,length(xi)),xi)
            hi = xi%*%solve(t(xi)%*%xi)%*%t(xi)
            s.hat[n-i+1] = y[(n-b-i+1):n]%*%hi[nrow(hi)-i+1,]
      }
      for(i in (b+2):(n-b-1)){
            xi = x[(i-b):(i+b)]
            xi = cbind(rep(1,length(xi)),xi)
            hi = xi%*%solve(t(xi)%*%xi)%*%t(xi)
            s.hat[i] = y[(i-b):(i+b)]%*%hi[b+1,]
      }}
      if(k==1){s.hat = y}
      return(s.hat)
}
# USES SUFFICIENT STATISTICS
RLSMOOTH2 = function(k,y,x){
      if(k>1){
      n = length(y)
      s.hat = rep(0,n)
      b = (k-1)/2
      x.bar = mean(x[1:(b+1)])
      y.bar = mean(y[1:(b+1)])
      xy.bar = mean(x[1:(b+1)]*y[1:(b+1)])
      x2.bar = mean(x[1:(b+1)]^2)
      beta1 = (xy.bar - x.bar*y.bar)/(x2.bar - x.bar^2)
      s.hat[1] = y.bar + beta1*(x[1]-x.bar)
      for(i in 2:(b+1)){
            x.bar  = (x.bar*(i+b-1) + x[i+b])/(i+b)
            y.bar  = (y.bar*(i+b-1) + y[i+b])/(i+b)
            xy.bar = (xy.bar*(i+b-1) + x[i+b]*y[i+b])/(i+b)
            x2.bar = (x2.bar*(i+b-1) + (x[i+b]^2))/(i+b)
            beta1  = (xy.bar - x.bar*y.bar)/(x2.bar - x.bar^2)
            s.hat[i] = y.bar + beta1*(x[i]-x.bar)
      }
      for(i in (b+2):(n-b)){
            x.bar = x.bar + (x[i+b] - x[i-b-1])/k
            y.bar = y.bar + (y[i+b] - y[i-b-1])/k
            xy.bar = xy.bar + (x[i+b]*y[i+b] - x[i-b-1]*y[i-b-1])/k
            x2.bar = x2.bar + ((x[i+b]^2)-(x[i-b-1]^2))/k
            beta1 = (xy.bar - x.bar*y.bar)/(x2.bar - x.bar^2)
            s.hat[i] = y.bar + beta1*(x[i]-x.bar)
      }
      for(i in (n-b+1):n){
            x.bar  = (x.bar*(k-i+n-b+1) - x[i-b-1])/(k-i+n-b)
            y.bar  = (y.bar*(k-i+n-b+1) - y[i-b-1])/(k-i+n-b)
            xy.bar = (xy.bar*(k-i+n-b+1) - x[i-b-1]*y[i-b-1])/(k-i+n-b)
            x2.bar = (x2.bar*(k-i+n-b+1) - (x[i-b-1]^2))/(k-i+n-b)
            beta1  = (xy.bar - x.bar*y.bar)/(x2.bar - x.bar^2)
            s.hat[i] = y.bar + beta1*(x[i]-x.bar)
      }
      }
      if(k==1){s.hat = y}
      return(s.hat)
}

## MAIN
rlsmooth1.val = RLSMOOTH1(k,y,x)
rlsmooth2.val = RLSMOOTH2(k,y,x)

## OUTPUT PLOTS
s = function(x){(x^3) * sin((x+3.4)/2)}
x.plot = seq(min(x),max(x),length.out=1000)
y.plot = s(x.plot)
plot(x,y,xlab="Predictor",ylab="Response")
lines(x.plot,y.plot,lty=2)
lines(x,rlsmooth2.val,type="l")

# plot smoother without data points
plot(x,y,xlab="Predictor",ylab="Response", main=paste("Running-line, k=",k), type="n")
lines(x.plot,y.plot,lty=1,lwd=2)
lines(x,rlsmooth2.val,lty=1, col="red", lwd=2)
legend("bottomright",c("True relation","k=23"), lty=c(1,1), col=c("black","red"))

# other k
k1 = 13
rlsmooth.val = RLSMOOTH2(k1,y,x)
plot(x,y,xlab="Predictor",ylab="Response", main=paste("k=",k1), type="n")
lines(x.plot,y.plot,lty=2)
lines(x,rlsmooth.val,type="l")

k1 = 33
rlsmooth.val = RLSMOOTH2(k1,y,x)
plot(x,y,xlab="Predictor",ylab="Response", main=paste("k=",k1), type="n")
lines(x.plot,y.plot,lty=2)
lines(x,rlsmooth.val,type="l")


############################################################################
### EXAMPLE 11.5 EASY DATA (KERNEL SMOOTH)
############################################################################
# easy    = observed data
# x       = observed predictor data
# y       = observed response data
# h       = bandwidth
# fx.hat  = normal kernel density
# KSMOOTH = kernel smoother (for k odd > 1)
############################################################################

## INITIAL VALUES
easy = read.table(file.choose(),header=T)
x    = easy$X
y    = easy$Y
h    = 0.16

## FUNCTIONS
fx.hat = function(z,h){dnorm((z-x)/h)/h}
KSMOOTH = function(h,y,x){
      n = length(y)
      s.hat = rep(0,n)
      for(i in 1:n){
            a = fx.hat(x[i],h)
            s.hat[i] = sum(y * a/sum(a))
      }
      return(s.hat)
}

## MAIN
ksmooth.val = KSMOOTH(h,y,x)

## OUTPUT PLOTS
s = function(x){(x^3) * sin((x+3.4)/2)}
x.plot = seq(min(x),max(x),length.out=1000)
y.plot = s(x.plot)
plot(x,y,xlab="Predictor",ylab="Response")
lines(x.plot,y.plot,lty=2)
lines(x,ksmooth.val,type="l")

## without data points
plot(x,y,xlab="Predictor",ylab="Response", type="n", main="Kernel Smoother")
lines(x.plot,y.plot,lty=1,lwd=2)
lines(x,ksmooth.val,lty=1, col="red", lwd=2)
legend("bottomright",c("True relation","h=0.16"), lty=c(1,1), col=c("black","red"))

############################################################################
### EXAMPLE 11.6 EASY DATA (SPLINE SMOOTH)
############################################################################
# easy    = observed data
# x       = observed predictor data
# y       = observed response data
############################################################################
## NOTES
# The following uses the smooth.spline function in R which has a 
# different implementation of the smoothing parameter than S-PLUS,
# the programming language used originally for some of the examples
# in the first edition.  Thus the lambdas differ.
############################################################################

## INITIAL VALUES
easy = read.table(file.choose(),header=T)
x    = easy$X
y    = easy$Y

## MAIN
s.hat = smooth.spline(x=x,y=y)

## OUTPUT
s.hat      # SMOOTH.SPLINE OUTPUT

## OUTPUT PLOTS
s = function(x){(x^3) * sin((x+3.4)/2)}
x.plot = seq(min(x),max(x),length.out=1000)
y.plot = s(x.plot)
plot(x,y,xlab="Predictor",ylab="Response")
lines(x.plot,y.plot,lty=2)
lines(s.hat)

## without data points
plot(x,y,xlab="Predictor",ylab="Response", type="n", main="Spline Smoother")
lines(x.plot,y.plot,lty=1,lwd=2)
lines(s.hat)
legend("bottomright",c("True relation","lambda=0.066"), lty=c(1,1), lwd=c(2,1),cex=0.8)