Logistic Regression with Splines

Prepare the Phoneme Data

#install.packages("ElemStatLearn")
library(ElemStatLearn)
library(splines)
data(phoneme)

mydata = phoneme[phoneme$g %in% c("aa", "ao"), ]
# mydata[1, ]
mydata[1, c(1:5, 250:258)]

##        x.1      x.2      x.3      x.4     x.5   x.250   x.251   x.252
## 5 12.96705 13.69454 14.91182 18.22292 18.4539 7.58624 6.65202 7.69109
##     x.253 x.254   x.255   x.256  g             speaker
## 5 6.93683 7.036 7.01278 8.52197 aa train.dr1.mcpm0.sa1

X = data.matrix(mydata[, -c(257, 258)])
Y = mydata[, 257]
table(Y)

## Y
##   aa   ao  dcl   iy   sh 
##  695 1022    0    0    0

Y = as.numeric(Y)-1
table(Y)  # 1: ao

## Y
##    0    1 
##  695 1022

Fit a Logitsic Regressio Model

logfit1 = glm(Y ~ X, family="binomial")
coef1 = logfit1$coef[-1]

Fit a Logistic Regression Model with Coefficients Modeled by Splines

H = ns(1:256, df = 12)
B = X %*% H
logfit2 = glm(Y ~ B, family="binomial")
coef2 = H %*% logfit2$coef[-1]

plot(1:256, coef1, type="n", 
      xlab="Frequency", ylab="Logistic Regression Coefficients")
lines(1:256, coef1, col="gray")
lines(1:256, coef2, col="red")

Compare Classification Performance

T = 5
n = dim(X)[1]
Err1 = matrix(0, T, 2)  # col 1: training; col 2: test
Err2 = matrix(0, T, 2)  # col 1: training; col 2: test
set.seed(123)
for(t in 1:T){
  testid = sample(1:n, round(n*0.2))
  
  logfit1 = glm(Y[-testid] ~ X[-testid, ], family="binomial")
  yhat1 = as.numeric(logfit1$fitted.values>0.5)
  Err1[t, 1] = mean(yhat1 != Y[-testid])
  yhat1 = X[testid, ] %*% logfit1$coef[-1] + logfit1$coef[1]
  yhat1 = as.numeric(yhat1>0.5)
  Err1[t, 2] = mean(yhat1 != Y[testid])
  
  logfit2 = glm(Y[-testid] ~ B[-testid, ], family="binomial")
  yhat2 = as.numeric(logfit2$fitted.values>0.5)
  Err2[t, 1] = mean(yhat2 != Y[-testid])
  yhat2 = B[testid, ] %*% logfit2$coef[-1] + logfit2$coef[1]
  yhat2 = as.numeric(yhat2>0.5)
  Err2[t, 2] = mean(yhat2 != Y[testid])
}

round(cbind(Err1, Err2), dig=3)

##       [,1]  [,2]  [,3]  [,4]
## [1,] 0.114 0.207 0.179 0.181
## [2,] 0.111 0.239 0.183 0.166
## [3,] 0.101 0.245 0.176 0.204
## [4,] 0.100 0.274 0.170 0.213
## [5,] 0.108 0.227 0.168 0.213