Check the three examples from the faraway package.

library(faraway)
par(mfrow=c(1,3))
data(exa)
#?exa # info for this simulated dataset
plot (y ~ x, exa, main="Example A")
lines(m ~ x, exa)
data(exb)
#?exb
plot(y ~ x, exb, main="Example B")
lines(m ~ x, exb)
data(faithful)
#?faithful
plot(waiting ~ eruptions, faithful,main="old Faithful")

Kernel smoother with different bandwidths for the Old Faithful data.

?ksmooth

par(mfrow=c(1,3))
plot(waiting ~ eruptions,
     faithful,main="bandwidth=0.1", col="gray", cex=0.5)
lines(ksmooth(faithful$eruptions, faithful$waiting, "normal", 0.1), lwd=2)

plot(waiting ~ eruptions, faithful, main="bandwidth=0.5", col="gray", cex=0.5)
lines(ksmooth(faithful$eruptions, faithful$waiting,"normal", 0.5), lwd=2)

plot(waiting ~ eruptions, faithful, main="bandwidth=2", col="gray", cex=0.5)
lines(ksmooth(faithful$eruptions, faithful$waiting, "normal", 2), lwd=2)

Use CV to select bandwidth

library(sm)
?hcv
?sm.options  # check the options
?sm.regression

par(mfrow=c(1,2))
hm = hcv(faithful$eruptions,faithful$waiting,display="lines")
sm.regression(faithful$eruptions, faithful$waiting, h=hm,
              xlab="duration", ylab="waiting")

hm
## [1] 0.4243375
par(mfrow=c(1,2))
hm=hcv(exa$x, exa$y, display="lines")
sm.regression(exa$x, exa$y, h=hm, xlab="x", ylab="y")

hm
## [1] 0.02240222

Error message for exb data since search boundary search is reached.

# hcv(exb$x,exb$y)

# try a smaller hstart, still error
#par(mfrow=c(1,2))
#hm = hcv(exb$x, exb$y, dislay = "lines", hstart=0.005)
#hm
#sm.regression(exb$x,exb$y,h=0.005)

Try Loess

#?loess
# default: span = 0.75, degree = 2
par(mfrow=c(1,3))
plot(waiting ~ eruptions, faithful,col="gray", cex=0.5)
f=loess(waiting ~ eruptions, faithful)
i = order(faithful$eruptions)
lines(f$x[i],f$fitted[i], lwd=1.5, col="red")

plot(y ~ x, exa, col="gray", cex=0.5)
lines(exa$x,exa$m,lty=1)
f = loess(y ~ x,exa)
lines(f$x,f$fitted,lty=2, lwd=1.5, col="red")
f = loess(y ~ x, exa, span=0.22)
lines(f$x,f$fitted,lty=5, lwd=1.5, col="blue")

plot(y ~ x, exb, col="gray", cex=0.5)
lines(exb$x,exb$m, lty=1)
f =loess(y ~ x, exb)
lines(f$x,f$fitted,lty=2,lwd=1.5, col="red")
f = loess(y ~ x, exb,span=1)
lines(f$x,f$fitted,lty=5, lwd=1.5, col="blue")

In the Quiz, students will be asked to write their own code to use LOO-CV and GCV to select span for loess.

lo.lev <- function(x1, sp){
  
  ## YOUR CODE: compute the diagonal entries of 
  ##            the smoother matrix S
  
}

onestep_CV <- function(x1, y1, sp){
  
  ## YOUR CODE: 
  ## 1) fit a loess model y1 ~ x1 with span = sp, and extract 
  ##    the corresponding residual vector
  ## 2) call lo.lev to obtain the diagonal entries of S
  ## 3) compute LOO-CV and GCV
  
  return(list(cv = cv, gcv = gcv))
}

myCV <- function(x1, y1, span){
  ## x1, y1: two vectors
  ## span: a sequence of values for "span"
  
  m = length(span)
  cv = rep(0, m)
  gcv = rep(0, m)
  for(i in 1:m){
    tmp = onestep_CV(x1, y1, span[i])
    cv[i] = tmp$cv
    gcv[i] = tmp$gcv
  }
  return(list(cv = cv, gcv = gcv))