Skip to content

finished half of chapter 7 #10

Merged
merged 1 commit into from
Nov 15, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 158 additions & 0 deletions ch07.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#############
# Chapter 7.1 Principles of Regression

## Linear regression: Straight Line

# R code to fit data points using a straight line

library(pracma)

N <- 50
x <- runif(N)

a <- 2.5 # true parameter
b <- 1.3 # true parameter
y <- a*x + b + 0.2*rnorm(N) # Synthesize training data

X <- cbind(x, rep(1, N))
theta <- lsfit(X, y)$coefficients
t <- linspace(0, 1, 200)
yhat <- theta[2]*t + theta[1]
plot(x, y, pch=19)
lines(t, yhat, col='red', lwd=4)
legend("bottomright", c("Best Fit", "Data"), fill=c("red", "black"))

## Linear regression: Polynomial

# R code to fit data using a quadratic equation

N <- 50
x <- runif(N)
a <- -2.5
b <- 1.3
c <- 1.2
y <- a*x**2 + b*x + c + 0.2*rnorm(N)
X <- cbind(rep(1, N), x, x**2)
theta <- lsfit(X, y)$coefficients
t = linspace(0, 1, 200)
print(theta)
yhat = theta[1] + theta[2]*t + theta[3]*t**2
plot(x,y,pch=19)
lines(t,yhat,col='red',lwd=4)

## Legendre Polynomial

# R code to fit data using a quadratic equation

library(pracma)

N <- 50
x <- linspace(-1,1,N)
a <- c(-0.001, 0.01, 0.55, 1.5, 1.2)
y <- a[1]*legendre(0, x) + a[2]*legendre(1, x)[1,] +
a[3]*legendre(2, x)[1,] + a[4]*legendre(3, x)[1,] +
a[5]*legendre(4, x)[1,] + 0.2*rnorm(N)

X <- cbind(legendre(0, x), legendre(1, x)[1,],
legendre(2, x)[1,], legendre(3, x)[1,],
legendre(4, x)[1,]) # good

beta <- mldivide(X, y)

t <- linspace(-1, 1, 50)
yhat <- beta[1]*legendre(0, x) + beta[2]*legendre(1, x)[1,] +
beta[3]*legendre(2, x)[1,] + beta[4]*legendre(3, x)[1,] +
beta[5]*legendre(4, x)[1,]

plot(x, y, pch=19, col="blue")
lines(t, yhat, lwd=2, col="orange")

## Auto-regressive model

# R code for auto-regressive model

library(pracma)

N <- 500
y <- cumsum(0.2*rnorm(N)) + 0.05*rnorm(N)

L <- 100
c <- c(0, y[0:(400-1)])
r = rep(0, L)
X = Toeplitz(c,r)
beta <- mldivide(X, y[1:400])
yhat = X %*% beta
plot(y[1:400])
lines(yhat[1:400], col="red")

## Robust regression by linear programming

# R code to demonstrate robust regression TODO

library(pracma)

N <- 50
x <- linspace(-1,1,N)
a <- c(-0.001, 0.01, 0.55, 1.5, 1.2)
y <- a[1]*legendre(0, x) + a[2]*legendre(1, x)[1,] +
a[3]*legendre(2, x)[1,] + a[4]*legendre(3, x)[1,] +
a[5]*legendre(4, x)[1,] + 0.2*rnorm(N)

idx <- c(10, 16, 23, 37, 45)
y[idx] <- 5

X <- cbind(rep(1,N), x, x**2, x**3, x**4)
A <- rbind(cbind(X, -1*diag(N)), cbind(-X, -1*diag(N)))
b <- c(y, -y)
c <- c(rep(0, 5), rep(1, N))
res <- linprog(c, A, b, maxiter=1000000)
beta <- res.x
t <- linspace(-1, 1, 200)

# yhat <-
# plot(x, y, pch=19, col="blue")
# lines(t, yhat, lwd=2, col="orange")

#############
# Chapter 7.2 Overfitting

## Overfitting example

# R: An overfitting example (TODO)

N <- 20
x <- sort(1*rnorm(N)*2-1)
a <- c(-0.001, 0.01, 0.55, 1.5, 1.2)
y <- a[1]*legendre(0, x) + a[2]*legendre(1, x)[1,] +
a[3]*legendre(2, x)[1,] + a[4]*legendre(3, x)[1,] +
a[5]*legendre(4, x)[1,] + 0.1*rnorm(N)

P <- 20

beta <- mldivide(X\y)

## Learning curve

# R

#############
# Chapter 7.3 Bias and Variance

## Mean estimator

# R code to visualize the average predictor (TODO)

#############
# Chapter 7.4 Regularization

## Ridge regression

# R code to demonstrate a ridge regression example (TODO)

## LASSO regression

# R (TODO)

## LASSO vs Ridge

# R code to demonstrate overfitting and LASSO (TODO)