diff --git a/ch07.R b/ch07.R new file mode 100644 index 0000000..ea0891d --- /dev/null +++ b/ch07.R @@ -0,0 +1,158 @@ +############# +# Chapter 7.1 Principles of Regression + +## Linear regression: Straight Line + +# R code to fit data points using a straight line + +library(pracma) + +N <- 50 +x <- runif(N) + +a <- 2.5 # true parameter +b <- 1.3 # true parameter +y <- a*x + b + 0.2*rnorm(N) # Synthesize training data + +X <- cbind(x, rep(1, N)) +theta <- lsfit(X, y)$coefficients +t <- linspace(0, 1, 200) +yhat <- theta[2]*t + theta[1] +plot(x, y, pch=19) +lines(t, yhat, col='red', lwd=4) +legend("bottomright", c("Best Fit", "Data"), fill=c("red", "black")) + +## Linear regression: Polynomial + +# R code to fit data using a quadratic equation + +N <- 50 +x <- runif(N) +a <- -2.5 +b <- 1.3 +c <- 1.2 +y <- a*x**2 + b*x + c + 0.2*rnorm(N) +X <- cbind(rep(1, N), x, x**2) +theta <- lsfit(X, y)$coefficients +t = linspace(0, 1, 200) +print(theta) +yhat = theta[1] + theta[2]*t + theta[3]*t**2 +plot(x,y,pch=19) +lines(t,yhat,col='red',lwd=4) + +## Legendre Polynomial + +# R code to fit data using a quadratic equation + +library(pracma) + +N <- 50 +x <- linspace(-1,1,N) +a <- c(-0.001, 0.01, 0.55, 1.5, 1.2) +y <- a[1]*legendre(0, x) + a[2]*legendre(1, x)[1,] + + a[3]*legendre(2, x)[1,] + a[4]*legendre(3, x)[1,] + + a[5]*legendre(4, x)[1,] + 0.2*rnorm(N) + +X <- cbind(legendre(0, x), legendre(1, x)[1,], + legendre(2, x)[1,], legendre(3, x)[1,], + legendre(4, x)[1,]) # good + +beta <- mldivide(X, y) + +t <- linspace(-1, 1, 50) +yhat <- beta[1]*legendre(0, x) + beta[2]*legendre(1, x)[1,] + + beta[3]*legendre(2, x)[1,] + beta[4]*legendre(3, x)[1,] + + beta[5]*legendre(4, x)[1,] + +plot(x, y, pch=19, col="blue") +lines(t, yhat, lwd=2, col="orange") + +## Auto-regressive model + +# R code for auto-regressive model + +library(pracma) + +N <- 500 +y <- cumsum(0.2*rnorm(N)) + 0.05*rnorm(N) + +L <- 100 +c <- c(0, y[0:(400-1)]) +r = rep(0, L) +X = Toeplitz(c,r) +beta <- mldivide(X, y[1:400]) +yhat = X %*% beta +plot(y[1:400]) +lines(yhat[1:400], col="red") + +## Robust regression by linear programming + +# R code to demonstrate robust regression TODO + +library(pracma) + +N <- 50 +x <- linspace(-1,1,N) +a <- c(-0.001, 0.01, 0.55, 1.5, 1.2) +y <- a[1]*legendre(0, x) + a[2]*legendre(1, x)[1,] + + a[3]*legendre(2, x)[1,] + a[4]*legendre(3, x)[1,] + + a[5]*legendre(4, x)[1,] + 0.2*rnorm(N) + +idx <- c(10, 16, 23, 37, 45) +y[idx] <- 5 + +X <- cbind(rep(1,N), x, x**2, x**3, x**4) +A <- rbind(cbind(X, -1*diag(N)), cbind(-X, -1*diag(N))) +b <- c(y, -y) +c <- c(rep(0, 5), rep(1, N)) +res <- linprog(c, A, b, maxiter=1000000) +beta <- res.x +t <- linspace(-1, 1, 200) + +# yhat <- +# plot(x, y, pch=19, col="blue") +# lines(t, yhat, lwd=2, col="orange") + +############# +# Chapter 7.2 Overfitting + +## Overfitting example + +# R: An overfitting example (TODO) + +N <- 20 +x <- sort(1*rnorm(N)*2-1) +a <- c(-0.001, 0.01, 0.55, 1.5, 1.2) +y <- a[1]*legendre(0, x) + a[2]*legendre(1, x)[1,] + + a[3]*legendre(2, x)[1,] + a[4]*legendre(3, x)[1,] + + a[5]*legendre(4, x)[1,] + 0.1*rnorm(N) + +P <- 20 + +beta <- mldivide(X\y) + +## Learning curve + +# R + +############# +# Chapter 7.3 Bias and Variance + +## Mean estimator + +# R code to visualize the average predictor (TODO) + +############# +# Chapter 7.4 Regularization + +## Ridge regression + +# R code to demonstrate a ridge regression example (TODO) + +## LASSO regression + +# R (TODO) + +## LASSO vs Ridge + +# R code to demonstrate overfitting and LASSO (TODO)