Skip to content

Commit

Permalink
finished half of chapter 7
Browse files Browse the repository at this point in the history
  • Loading branch information
park1119 committed Nov 3, 2021
1 parent 4ec971c commit 6398101
Showing 1 changed file with 158 additions and 0 deletions.
158 changes: 158 additions & 0 deletions ch07.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#############
# Chapter 7.1 Principles of Regression

## Linear regression: Straight Line

# R code to fit data points using a straight line

library(pracma)

N <- 50
x <- runif(N)

a <- 2.5 # true parameter
b <- 1.3 # true parameter
y <- a*x + b + 0.2*rnorm(N) # Synthesize training data

X <- cbind(x, rep(1, N))
theta <- lsfit(X, y)$coefficients
t <- linspace(0, 1, 200)
yhat <- theta[2]*t + theta[1]
plot(x, y, pch=19)
lines(t, yhat, col='red', lwd=4)
legend("bottomright", c("Best Fit", "Data"), fill=c("red", "black"))

## Linear regression: Polynomial

# R code to fit data using a quadratic equation

N <- 50
x <- runif(N)
a <- -2.5
b <- 1.3
c <- 1.2
y <- a*x**2 + b*x + c + 0.2*rnorm(N)
X <- cbind(rep(1, N), x, x**2)
theta <- lsfit(X, y)$coefficients
t = linspace(0, 1, 200)
print(theta)
yhat = theta[1] + theta[2]*t + theta[3]*t**2
plot(x,y,pch=19)
lines(t,yhat,col='red',lwd=4)

## Legendre Polynomial

# R code to fit data using a quadratic equation

library(pracma)

N <- 50
x <- linspace(-1,1,N)
a <- c(-0.001, 0.01, 0.55, 1.5, 1.2)
y <- a[1]*legendre(0, x) + a[2]*legendre(1, x)[1,] +
a[3]*legendre(2, x)[1,] + a[4]*legendre(3, x)[1,] +
a[5]*legendre(4, x)[1,] + 0.2*rnorm(N)

X <- cbind(legendre(0, x), legendre(1, x)[1,],
legendre(2, x)[1,], legendre(3, x)[1,],
legendre(4, x)[1,]) # good

beta <- mldivide(X, y)

t <- linspace(-1, 1, 50)
yhat <- beta[1]*legendre(0, x) + beta[2]*legendre(1, x)[1,] +
beta[3]*legendre(2, x)[1,] + beta[4]*legendre(3, x)[1,] +
beta[5]*legendre(4, x)[1,]

plot(x, y, pch=19, col="blue")
lines(t, yhat, lwd=2, col="orange")

## Auto-regressive model

# R code for auto-regressive model

library(pracma)

N <- 500
y <- cumsum(0.2*rnorm(N)) + 0.05*rnorm(N)

L <- 100
c <- c(0, y[0:(400-1)])
r = rep(0, L)
X = Toeplitz(c,r)
beta <- mldivide(X, y[1:400])
yhat = X %*% beta
plot(y[1:400])
lines(yhat[1:400], col="red")

## Robust regression by linear programming

# R code to demonstrate robust regression TODO

library(pracma)

N <- 50
x <- linspace(-1,1,N)
a <- c(-0.001, 0.01, 0.55, 1.5, 1.2)
y <- a[1]*legendre(0, x) + a[2]*legendre(1, x)[1,] +
a[3]*legendre(2, x)[1,] + a[4]*legendre(3, x)[1,] +
a[5]*legendre(4, x)[1,] + 0.2*rnorm(N)

idx <- c(10, 16, 23, 37, 45)
y[idx] <- 5

X <- cbind(rep(1,N), x, x**2, x**3, x**4)
A <- rbind(cbind(X, -1*diag(N)), cbind(-X, -1*diag(N)))
b <- c(y, -y)
c <- c(rep(0, 5), rep(1, N))
res <- linprog(c, A, b, maxiter=1000000)
beta <- res.x
t <- linspace(-1, 1, 200)

# yhat <-
# plot(x, y, pch=19, col="blue")
# lines(t, yhat, lwd=2, col="orange")

#############
# Chapter 7.2 Overfitting

## Overfitting example

# R: An overfitting example (TODO)

N <- 20
x <- sort(1*rnorm(N)*2-1)
a <- c(-0.001, 0.01, 0.55, 1.5, 1.2)
y <- a[1]*legendre(0, x) + a[2]*legendre(1, x)[1,] +
a[3]*legendre(2, x)[1,] + a[4]*legendre(3, x)[1,] +
a[5]*legendre(4, x)[1,] + 0.1*rnorm(N)

P <- 20

beta <- mldivide(X\y)

## Learning curve

# R

#############
# Chapter 7.3 Bias and Variance

## Mean estimator

# R code to visualize the average predictor (TODO)

#############
# Chapter 7.4 Regularization

## Ridge regression

# R code to demonstrate a ridge regression example (TODO)

## LASSO regression

# R (TODO)

## LASSO vs Ridge

# R code to demonstrate overfitting and LASSO (TODO)

0 comments on commit 6398101

Please sign in to comment.