diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d772925 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*~ +*.swp +*.swo diff --git a/cameraman.tif b/cameraman.tif new file mode 100644 index 0000000..2fd7bbd Binary files /dev/null and b/cameraman.tif differ diff --git a/ch06.r b/ch06.r index cc44527..d7a13ec 100644 --- a/ch06.r +++ b/ch06.r @@ -1,3 +1,24 @@ +############# +# Chapter 6.2 (Probability Inequality) + +## Compare Chebyshev's and Chernoff's bounds + +# R code to compare the probability bounds + +library(pracma) + +epsilon <- 0.1 +sigma <- 1; +N <- logspace(1,3.9,50) +p_exact <- 1-pnorm(N**(1/2)*epsilon/sigma, 0, 1) +p_cheby <- sigma**2. / (epsilon**2*N) +p_chern <- exp(-epsilon**2*N/(2*sigma**2)) + +plot(log(N), log(p_exact), pch=1, col="orange", lwd=2, xlab="log(N)", ylab="log(Probability)") +points(log(N), log(p_cheby), pch=15, col="green", lwd=2) +lines(log(N), log(p_chern), pch=19, col="blue", lwd=2) +legend("bottomleft", c("Exact","Chebyshev","Chernoff"), fill=c("orange", "green", "blue")) + ############# # Chapter 6.3 (Law of Large Numbers) @@ -19,3 +40,130 @@ Nset_grid <- repmat(Nset, m=1, n=1000) semilogx(Nset_grid, x, col='black', pch=19) points(Nset, p + 3*(((p*(1-p))/Nset)^(1/2)), col='red', pch=19, lwd=1) points(Nset, p - 3*(((p*(1-p))/Nset)^(1/2)), col='red', pch=19, lwd=1) + +############# +# Chapter 6.4 (Central Limit Theorem) + +## PDF of the sum of two Gaussians + +# Plot the PDF of the sum of two Gaussians + +library(pracma) + +n <- 10000 +K <- 2 +Z <- rep(0, n) + +for (i in 1:K) { + X <- runif(n, min=1, max=6) + Z <- Z + X +} +hist(Z,breaks=(K-0.5):(6*K+0.5),freq=FALSE) + +# Visualize convergence in distribution + +library(pracma) + +N <- 10 +N <- 50 +x <- linspace(0, N, 1001) +p <- 0.5 +p_b <- dbinom(x, N, p) +p_n <- dnorm(x, N*p, (N*p*(1-p))**(1/2)) + +c_b <- pbinom(x, N, p) +c_n <- pnorm(x, N*p, (N*p*(1-p))**(1/2)) + +plot(x, p_n, lwd=1, col='red') +lines(x, p_b, lwd=2, col='black') +legend("topright", c('Binomial', 'Gaussian'), fill=c('black', 'red')) + +# Poisson to Gaussian: convergence in distribution + +library(pracma) + +N <- 4 +# N = 10 +# N = 50 + +x <- linspace(0,2*N,1001) +lambda <- 1 +p_b <- dpois(x, N*lambda) +p_n <- dnorm(x, N*lambda, sqrt(N*lambda)) + +c_b <- ppois(x, N*lambda); +c_n = pnorm(x, N*lambda, sqrt(N*lambda)); + +plot(x, p_n, col="red") +lines(x, p_b, col="black") +legend("topright", c('Poisson', 'Gaussian'), fill=c('black', 'red')) + +plot(x, c_n, col="red") +lines(x, c_b, col="black") +legend("topright", c('Poisson', 'Gaussian'), fill=c('black', 'red')) + +# Visualize the Central Limit Theorem + +library(pracma) + +N <- 10 +x <- linspace(0,N,1001) +p <- 0.5 +p_b <- dbinom(x, N, p); +p_n <- dnorm(x, N*p, sqrt(N*p*(1-p))); + +c_b <- pbinom(x, N, p); +c_n <- pnorm(x, N*p, sqrt(N*p*(1-p))); + +x2 <- linspace(5-2.5,5+2.5,1001); +q2 <- dnorm(x2,N*p, sqrt(N*p*(1-p))); + +plot(x, p_n, col="red") +points(x, p_b, col="black", pch=19) + +polygon(c(min(x2), x2, max(x2)), c(0, q2, 0), col='lightblue') + +# How moment generating of Gaussian approximates in CLT + +library(pracma) + +p <- 0.5 +s <- linspace(-10,10,1001) +MX <- 1-p+p*exp(s) +N <- 2 +semilogy(s, (1-p+p*exp(s/N))**N, lwd=4, col="lightblue", xlim=c(-10,10), ylim=c(10**-2, 10**5)) +mu <- p +sigma <- sqrt(p*(1-p)/N); +MZ <- exp(mu*s + sigma^2*s**2/2); +lines(s, MZ, lwd=4); +legend("topleft", c('Binomial MGF', 'Gaussian MGF'), fill=c('lightblue', 'black')) + + +# Failure of Central Limit Theorem at tails + +library(pracma) + +x <- linspace(-1,5,1001) +lambda <- 1 + +N <- 1 +f1 <- (N**(1/2)/lambda)*dgamma((x+sqrt(N))/(lambda/sqrt(N)), N, lambda) +semilogy(x, f1, lwd=1, col='lightgray', xlim=c(-1,5), ylim=c(10**-6, 1)) + +N <- 10 +f1 <- (N**(1/2)/lambda)*dgamma((x+sqrt(N))/(lambda/sqrt(N)), N, lambda) +lines(x, f1, lwd=2, col='gray') + +N <- 100 +f1 <- (N**(1/2)/lambda)*dgamma((x+sqrt(N))/(lambda/sqrt(N)), N, lambda) +lines(x, f1, lwd=2, col='darkgray') + +N <- 1000 +f1 <- (N**(1/2)/lambda)*dgamma((x+sqrt(N))/(lambda/sqrt(N)), N, lambda) +lines(x, f1, lwd=2, col='black') + +g <- dnorm(x,0,1) +lines(x, g, lwd=2, pch=1, col='red') + +legend("bottomleft", c('N=1', 'N=10', 'N=100', 'N=1000', 'Gaussian'), fill=c('lightgray', 'gray', 'darkgray', 'black', 'red')) + diff --git a/ch08.r b/ch08.r new file mode 100644 index 0000000..7866ea7 --- /dev/null +++ b/ch08.r @@ -0,0 +1,172 @@ +############# +# Chapter 8.1 Maximum-likelihood Estimation + +## Visualizing the likelihood function + +# R: Visualize the likelood function +library(pracma) +library(plot3D) + +N = 50 +S = 1:N +theta = seq(0.1, 0.9, (0.9+0.1)/100) +mesh = meshgrid(S, theta) +S_grid = mesh$X +theta_grid = mesh$Y +L = S_grid * log(theta_grid) + (N-S_grid) * log(1-theta_grid) +L = t(L) +persp3D(S, theta, L, theta=65, phi=15, border="black", lwd=0.3, bty="b2", xlab="S", ylab="θ", zlab="", ticktype="detailed") + +N = 50 +S = seq(from=1, to=N, by=0.1) +theta = seq(0.1, 0.9, (0.1+0.9)/1000) +mesh = meshgrid(S, theta) +S_grid = mesh$X +theta_grid = mesh$Y +L = S_grid * log(theta_grid) + (N-S_grid) * log(1-theta_grid) +L = t(L) +image(S, theta, L, col=rainbow(256), ylim=c(0.9, 0.1)) + +## Visualizing the likelihood function + +# R code +library(pracma) + +N = 50 +S = 1:N +theta = seq(0.1, 0.9, (0.1+0.9)/100) +mesh = meshgrid(S, theta) +S_grid = mesh$X +theta_grid = mesh$Y +L = S_grid * log(theta_grid) + (N-S_grid) * log(1-theta_grid) +L_df = data.frame(L) +colnames(L_df) = S + +plot(theta, L_df$"12", type="n") +grid() +lines(theta, L_df$"12", lwd=6) +title(expression(paste("L(", theta, " | S = 12)"))) + +## ML estimation for single-photon imaging + +# R code +library(imager) +lambda = as.data.frame(load.image("cameraman.tif")) +lambda = xtabs(value ~ x+y, data=lambda) +T = 100 +x = c() +for (i in 1:T) { + x = append(x, rpois(length(lambda), lambda)) +} +x = array(x, c(256, 256, 100)) +y = (x>=1) +mu = apply(y, c(1,2), mean) +lambdahat = -log(1-mu) +fig1 = x[,,1] + +# Flip matrix since `image()` reads the matrix bottom up +flip_matrix = function(m) m[,nrow(m):1] + +image(flip_matrix(fig1), col=gray.colors(255)) + +image(flip_matrix(lambdahat), col=gray.colors(255)) + +############# +# Chapter 8.2 Properties of the ML estimation + +## Visualizing the invariance principle + +# R code +N = 50 +S = 20 +theta = seq(0.1, 0.9, (0.1+0.9)/1000) +L = S * log(theta) + (N-S) * log(1-theta) +plot(theta, L, type="n", xlab=expression(theta), ylab=expression(paste("Log L(", theta, "|S = 20)"))) +title("Bernoulli") +grid() +lines(theta, L, lwd=6, col="#8080BF") + +h_theta = -log(1-theta) +plot(theta, h_theta, type="n", xlab=expression(theta), ylab=expression(paste(eta, " = h(", theta, ")"))) +grid() +lines(theta, h_theta, lwd=6) + +theta = seq(0.1, 2.5, (0.1+2.5)/1000) +L = S * log(1-exp(-theta)) - theta * (N-S) +plot(theta, L, type="n") +title("Truncated Poisson") +grid() +lines(theta, L, lwd=6, col="#0000BF") + +############# +# Chapter 8.3 Maximum-a-Posteriori Estimation + +## Influence of the priors + +# R code +N = 5 +mu0 = 0.0 +sigma0 = 1 +theta = 5 +x = rnorm(N, 5, 1) +xbar = mean(x) +t = seq(-3, 7, (10)/1000) + +q = numeric(1000) +for (i in 1:N) { + x = abs(t-x[i]) + a = min(x) + q[match(a, x)] = 0.1 +} + +thetahat = (sigma0^2. * xbar + mu0/N)/(sigma0^2. + 1. /N) +sigmahat = sqrt(1. / (1. / sigma0^2 + N)) +p0 = dnorm(t, xbar, 1.) +p1 = dnorm(t, thetahat, sigmahat) +prior = dnorm(t, mu0, sigma0)/10 +plot(t, p1, type="n") +title("N = 5") +legend(-2, 0.9, legend=c("Likelihood", "Posterior", "Prior", "Data"), col=c("blue", "orange", "green", "purple"), lty=1:1) +grid() +lines(t, p0, lwd = 4, col="blue") +lines(t, p1, lwd = 4, col="orange") +lines(t, prior, lwd = 4, col="green") +lines(t, q, lwd = 4, col="purple") + +## Conjugate priors + +# R code +sigma0 = 0.25 +mu0 = 0.0 + +mu = 1 +sigma = 0.25 + +Nset = c(0, 1, 2, 5, 8, 12, 20) +x0 = sigma * rnorm(100) +posterior = list() + +for (i in 1:7) { + N = Nset[i] + x = x0[1:N] + t = seq(-1, 1.5, 2.5/1000) + + p0 = dnorm(t, 0, 1) + theta = mu*(N*sigma0^2)/(N*sigma0^2+sigma^2) + mu0*(sigma^2)/(N*sigma0^2+sigma^2) + sigmaN = sqrt(1/(1/sigma0^2+N/sigma^2)); + posterior[[i]] = dnorm(t, theta, sigmaN) +} + +plot(t, posterior[[7]], type="n", xlab="", ylab="") +grid() +lines(t, posterior[[1]], lwd=3, col="red") +lines(t, posterior[[2]], lwd=3, col="orange") +lines(t, posterior[[3]], lwd=3, col="yellow") +lines(t, posterior[[4]], lwd=3, col="green") +lines(t, posterior[[5]], lwd=3, col="turquoise") +lines(t, posterior[[6]], lwd=3, col="blue") +lines(t, posterior[[7]], lwd=3, col="purple") + +legend(-0.9, 7, legend=c("N = 0", "N = 1", "N = 2", "N = 5", "N = 8", "N = 12", "N = 20"), col=c("red", "orange", "yellow", "green", "turquoise", "blue", "purple"), lty=1:1, lwd=3) + +############# diff --git a/ch6.R b/ch6.R deleted file mode 100644 index d7a13ec..0000000 --- a/ch6.R +++ /dev/null @@ -1,169 +0,0 @@ -############# -# Chapter 6.2 (Probability Inequality) - -## Compare Chebyshev's and Chernoff's bounds - -# R code to compare the probability bounds - -library(pracma) - -epsilon <- 0.1 -sigma <- 1; -N <- logspace(1,3.9,50) -p_exact <- 1-pnorm(N**(1/2)*epsilon/sigma, 0, 1) -p_cheby <- sigma**2. / (epsilon**2*N) -p_chern <- exp(-epsilon**2*N/(2*sigma**2)) - -plot(log(N), log(p_exact), pch=1, col="orange", lwd=2, xlab="log(N)", ylab="log(Probability)") -points(log(N), log(p_cheby), pch=15, col="green", lwd=2) -lines(log(N), log(p_chern), pch=19, col="blue", lwd=2) -legend("bottomleft", c("Exact","Chebyshev","Chernoff"), fill=c("orange", "green", "blue")) - -############# -# Chapter 6.3 (Law of Large Numbers) - -## Weak law of large numbers - -# R code to illustrate the weak law of large numbers - -library(pracma) - -p <- 0.5 -Nset <- as.integer(round(logspace(2,5,100))) -x <- matrix(rep(0, 1000*length(Nset)), nrow=1000) -for (i in 1:length(Nset)) { - N = Nset[i] - x[,i] <- rbinom(1000, N, p) / N -} -Nset_grid <- repmat(Nset, m=1, n=1000) - -semilogx(Nset_grid, x, col='black', pch=19) -points(Nset, p + 3*(((p*(1-p))/Nset)^(1/2)), col='red', pch=19, lwd=1) -points(Nset, p - 3*(((p*(1-p))/Nset)^(1/2)), col='red', pch=19, lwd=1) - -############# -# Chapter 6.4 (Central Limit Theorem) - -## PDF of the sum of two Gaussians - -# Plot the PDF of the sum of two Gaussians - -library(pracma) - -n <- 10000 -K <- 2 -Z <- rep(0, n) - -for (i in 1:K) { - X <- runif(n, min=1, max=6) - Z <- Z + X -} -hist(Z,breaks=(K-0.5):(6*K+0.5),freq=FALSE) - -# Visualize convergence in distribution - -library(pracma) - -N <- 10 -N <- 50 -x <- linspace(0, N, 1001) -p <- 0.5 -p_b <- dbinom(x, N, p) -p_n <- dnorm(x, N*p, (N*p*(1-p))**(1/2)) - -c_b <- pbinom(x, N, p) -c_n <- pnorm(x, N*p, (N*p*(1-p))**(1/2)) - -plot(x, p_n, lwd=1, col='red') -lines(x, p_b, lwd=2, col='black') -legend("topright", c('Binomial', 'Gaussian'), fill=c('black', 'red')) - -# Poisson to Gaussian: convergence in distribution - -library(pracma) - -N <- 4 -# N = 10 -# N = 50 - -x <- linspace(0,2*N,1001) -lambda <- 1 -p_b <- dpois(x, N*lambda) -p_n <- dnorm(x, N*lambda, sqrt(N*lambda)) - -c_b <- ppois(x, N*lambda); -c_n = pnorm(x, N*lambda, sqrt(N*lambda)); - -plot(x, p_n, col="red") -lines(x, p_b, col="black") -legend("topright", c('Poisson', 'Gaussian'), fill=c('black', 'red')) - -plot(x, c_n, col="red") -lines(x, c_b, col="black") -legend("topright", c('Poisson', 'Gaussian'), fill=c('black', 'red')) - -# Visualize the Central Limit Theorem - -library(pracma) - -N <- 10 -x <- linspace(0,N,1001) -p <- 0.5 -p_b <- dbinom(x, N, p); -p_n <- dnorm(x, N*p, sqrt(N*p*(1-p))); - -c_b <- pbinom(x, N, p); -c_n <- pnorm(x, N*p, sqrt(N*p*(1-p))); - -x2 <- linspace(5-2.5,5+2.5,1001); -q2 <- dnorm(x2,N*p, sqrt(N*p*(1-p))); - -plot(x, p_n, col="red") -points(x, p_b, col="black", pch=19) - -polygon(c(min(x2), x2, max(x2)), c(0, q2, 0), col='lightblue') - -# How moment generating of Gaussian approximates in CLT - -library(pracma) - -p <- 0.5 -s <- linspace(-10,10,1001) -MX <- 1-p+p*exp(s) -N <- 2 -semilogy(s, (1-p+p*exp(s/N))**N, lwd=4, col="lightblue", xlim=c(-10,10), ylim=c(10**-2, 10**5)) -mu <- p -sigma <- sqrt(p*(1-p)/N); -MZ <- exp(mu*s + sigma^2*s**2/2); -lines(s, MZ, lwd=4); -legend("topleft", c('Binomial MGF', 'Gaussian MGF'), fill=c('lightblue', 'black')) - - -# Failure of Central Limit Theorem at tails - -library(pracma) - -x <- linspace(-1,5,1001) -lambda <- 1 - -N <- 1 -f1 <- (N**(1/2)/lambda)*dgamma((x+sqrt(N))/(lambda/sqrt(N)), N, lambda) -semilogy(x, f1, lwd=1, col='lightgray', xlim=c(-1,5), ylim=c(10**-6, 1)) - -N <- 10 -f1 <- (N**(1/2)/lambda)*dgamma((x+sqrt(N))/(lambda/sqrt(N)), N, lambda) -lines(x, f1, lwd=2, col='gray') - -N <- 100 -f1 <- (N**(1/2)/lambda)*dgamma((x+sqrt(N))/(lambda/sqrt(N)), N, lambda) -lines(x, f1, lwd=2, col='darkgray') - -N <- 1000 -f1 <- (N**(1/2)/lambda)*dgamma((x+sqrt(N))/(lambda/sqrt(N)), N, lambda) -lines(x, f1, lwd=2, col='black') - -g <- dnorm(x,0,1) -lines(x, g, lwd=2, pch=1, col='red') - -legend("bottomleft", c('N=1', 'N=10', 'N=100', 'N=1000', 'Gaussian'), fill=c('lightgray', 'gray', 'darkgray', 'black', 'red')) -