Check the three examples from the faraway
package.
library(faraway)
par(mfrow=c(1,3))
data(exa)
#?exa # info for this simulated dataset
plot (y ~ x, exa, main="Example A")
lines(m ~ x, exa)
data(exb)
#?exb
plot(y ~ x, exb, main="Example B")
lines(m ~ x, exb)
data(faithful)
#?faithful
plot(waiting ~ eruptions, faithful,main="old Faithful")
Kernel smoother with different bandwidths for the Old Faithful data.
?ksmooth
par(mfrow=c(1,3))
plot(waiting ~ eruptions,
faithful,main="bandwidth=0.1", col="gray", cex=0.5)
lines(ksmooth(faithful$eruptions, faithful$waiting, "normal", 0.1), lwd=2)
plot(waiting ~ eruptions, faithful, main="bandwidth=0.5", col="gray", cex=0.5)
lines(ksmooth(faithful$eruptions, faithful$waiting,"normal", 0.5), lwd=2)
plot(waiting ~ eruptions, faithful, main="bandwidth=2", col="gray", cex=0.5)
lines(ksmooth(faithful$eruptions, faithful$waiting, "normal", 2), lwd=2)
Use CV to select bandwidth
library(sm)
?hcv
?sm.options # check the options
?sm.regression
par(mfrow=c(1,2))
hm = hcv(faithful$eruptions,faithful$waiting,display="lines")
sm.regression(faithful$eruptions, faithful$waiting, h=hm,
xlab="duration", ylab="waiting")
hm
## [1] 0.4243375
par(mfrow=c(1,2))
hm=hcv(exa$x, exa$y, display="lines")
sm.regression(exa$x, exa$y, h=hm, xlab="x", ylab="y")
hm
## [1] 0.02240222
Error message for exb
data since search boundary search is reached.
# hcv(exb$x,exb$y)
# try a smaller hstart, still error
#par(mfrow=c(1,2))
#hm = hcv(exb$x, exb$y, dislay = "lines", hstart=0.005)
#hm
#sm.regression(exb$x,exb$y,h=0.005)
Try Loess
#?loess
# default: span = 0.75, degree = 2
par(mfrow=c(1,3))
plot(waiting ~ eruptions, faithful,col="gray", cex=0.5)
f=loess(waiting ~ eruptions, faithful)
i = order(faithful$eruptions)
lines(f$x[i],f$fitted[i], lwd=1.5, col="red")
plot(y ~ x, exa, col="gray", cex=0.5)
lines(exa$x,exa$m,lty=1)
f = loess(y ~ x,exa)
lines(f$x,f$fitted,lty=2, lwd=1.5, col="red")
f = loess(y ~ x, exa, span=0.22)
lines(f$x,f$fitted,lty=5, lwd=1.5, col="blue")
plot(y ~ x, exb, col="gray", cex=0.5)
lines(exb$x,exb$m, lty=1)
f =loess(y ~ x, exb)
lines(f$x,f$fitted,lty=2,lwd=1.5, col="red")
f = loess(y ~ x, exb,span=1)
lines(f$x,f$fitted,lty=5, lwd=1.5, col="blue")
In the Quiz, students will be asked to write their own code to use LOO-CV and GCV to select span
for loess
.
lo.lev <- function(x1, sp){
## YOUR CODE: compute the diagonal entries of
## the smoother matrix S
}
onestep_CV <- function(x1, y1, sp){
## YOUR CODE:
## 1) fit a loess model y1 ~ x1 with span = sp, and extract
## the corresponding residual vector
## 2) call lo.lev to obtain the diagonal entries of S
## 3) compute LOO-CV and GCV
return(list(cv = cv, gcv = gcv))
}
myCV <- function(x1, y1, span){
## x1, y1: two vectors
## span: a sequence of values for "span"
m = length(span)
cv = rep(0, m)
gcv = rep(0, m)
for(i in 1:m){
tmp = onestep_CV(x1, y1, span[i])
cv[i] = tmp$cv
gcv[i] = tmp$gcv
}
return(list(cv = cv, gcv = gcv))