diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d772925
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+*~
+*.swp
+*.swo
diff --git a/cameraman.tif b/cameraman.tif
new file mode 100644
index 0000000..2fd7bbd
Binary files /dev/null and b/cameraman.tif differ
diff --git a/ch06.r b/ch06.r
index cc44527..d7a13ec 100644
--- a/ch06.r
+++ b/ch06.r
@@ -1,3 +1,24 @@
+#############
+# Chapter 6.2 (Probability Inequality)
+
+## Compare Chebyshev's and Chernoff's bounds
+
+# R code to compare the probability bounds
+
+library(pracma)
+
+epsilon <- 0.1
+sigma <- 1;
+N <- logspace(1,3.9,50)
+p_exact <- 1-pnorm(N**(1/2)*epsilon/sigma, 0, 1)
+p_cheby <- sigma**2. / (epsilon**2*N)
+p_chern <- exp(-epsilon**2*N/(2*sigma**2))
+
+plot(log(N), log(p_exact), pch=1, col="orange", lwd=2, xlab="log(N)", ylab="log(Probability)")
+points(log(N), log(p_cheby), pch=15, col="green", lwd=2)
+lines(log(N), log(p_chern), pch=19, col="blue", lwd=2)
+legend("bottomleft", c("Exact","Chebyshev","Chernoff"), fill=c("orange", "green", "blue"))
+
 #############
 # Chapter 6.3 (Law of Large Numbers)
 
@@ -19,3 +40,130 @@ Nset_grid <- repmat(Nset, m=1, n=1000)
 semilogx(Nset_grid, x, col='black', pch=19)
 points(Nset, p + 3*(((p*(1-p))/Nset)^(1/2)), col='red', pch=19, lwd=1)
 points(Nset, p - 3*(((p*(1-p))/Nset)^(1/2)), col='red', pch=19, lwd=1)
+
+#############
+# Chapter 6.4 (Central Limit Theorem)
+
+## PDF of the sum of two Gaussians
+
+# Plot the PDF of the sum of two Gaussians
+
+library(pracma)
+
+n <- 10000
+K <- 2
+Z <- rep(0, n)
+
+for (i in 1:K) {
+  X <- runif(n, min=1, max=6)
+  Z <- Z + X
+}
+hist(Z,breaks=(K-0.5):(6*K+0.5),freq=FALSE)
+
+# Visualize convergence in distribution
+
+library(pracma)
+
+N <- 10
+N <- 50
+x <- linspace(0, N, 1001)
+p <- 0.5
+p_b <- dbinom(x, N, p)
+p_n <- dnorm(x, N*p, (N*p*(1-p))**(1/2))
+
+c_b <- pbinom(x, N, p)
+c_n <- pnorm(x, N*p, (N*p*(1-p))**(1/2))
+
+plot(x, p_n, lwd=1, col='red')
+lines(x, p_b, lwd=2, col='black')
+legend("topright", c('Binomial', 'Gaussian'), fill=c('black', 'red'))
+
+# Poisson to Gaussian: convergence in distribution
+
+library(pracma)
+
+N <- 4
+# N = 10
+# N = 50
+
+x <- linspace(0,2*N,1001)
+lambda <- 1
+p_b <- dpois(x, N*lambda)
+p_n <- dnorm(x, N*lambda, sqrt(N*lambda))
+
+c_b <- ppois(x, N*lambda);
+c_n = pnorm(x, N*lambda, sqrt(N*lambda));
+
+plot(x, p_n, col="red")
+lines(x, p_b, col="black")
+legend("topright", c('Poisson', 'Gaussian'), fill=c('black', 'red'))
+
+plot(x, c_n, col="red")
+lines(x, c_b, col="black")
+legend("topright", c('Poisson', 'Gaussian'), fill=c('black', 'red'))
+
+# Visualize the Central Limit Theorem
+
+library(pracma)
+
+N <- 10
+x <- linspace(0,N,1001)
+p <- 0.5
+p_b <- dbinom(x, N, p);
+p_n <- dnorm(x, N*p, sqrt(N*p*(1-p)));
+
+c_b <- pbinom(x, N, p);
+c_n <- pnorm(x, N*p, sqrt(N*p*(1-p)));
+
+x2 <- linspace(5-2.5,5+2.5,1001);
+q2 <- dnorm(x2,N*p, sqrt(N*p*(1-p)));
+
+plot(x, p_n, col="red")
+points(x, p_b, col="black", pch=19)
+
+polygon(c(min(x2), x2, max(x2)), c(0, q2, 0), col='lightblue')
+
+# How moment generating of Gaussian approximates in CLT
+
+library(pracma)
+
+p <- 0.5
+s <- linspace(-10,10,1001)
+MX <- 1-p+p*exp(s)
+N <- 2
+semilogy(s, (1-p+p*exp(s/N))**N, lwd=4, col="lightblue", xlim=c(-10,10), ylim=c(10**-2, 10**5))
+mu <- p
+sigma <- sqrt(p*(1-p)/N);
+MZ <- exp(mu*s + sigma^2*s**2/2);
+lines(s, MZ, lwd=4);
+legend("topleft", c('Binomial MGF', 'Gaussian MGF'), fill=c('lightblue', 'black'))
+
+
+# Failure of Central Limit Theorem at tails
+
+library(pracma)
+
+x <- linspace(-1,5,1001)
+lambda <- 1
+
+N <- 1
+f1 <- (N**(1/2)/lambda)*dgamma((x+sqrt(N))/(lambda/sqrt(N)), N, lambda)
+semilogy(x, f1, lwd=1, col='lightgray', xlim=c(-1,5), ylim=c(10**-6, 1))
+
+N <- 10
+f1 <- (N**(1/2)/lambda)*dgamma((x+sqrt(N))/(lambda/sqrt(N)), N, lambda)
+lines(x, f1, lwd=2, col='gray')
+
+N <- 100
+f1 <- (N**(1/2)/lambda)*dgamma((x+sqrt(N))/(lambda/sqrt(N)), N, lambda)
+lines(x, f1, lwd=2, col='darkgray')
+
+N <- 1000
+f1 <- (N**(1/2)/lambda)*dgamma((x+sqrt(N))/(lambda/sqrt(N)), N, lambda)
+lines(x, f1, lwd=2, col='black')
+
+g <- dnorm(x,0,1)
+lines(x, g, lwd=2, pch=1, col='red')
+
+legend("bottomleft", c('N=1', 'N=10', 'N=100', 'N=1000', 'Gaussian'), fill=c('lightgray', 'gray', 'darkgray', 'black', 'red'))
+
diff --git a/ch08.r b/ch08.r
new file mode 100644
index 0000000..7866ea7
--- /dev/null
+++ b/ch08.r
@@ -0,0 +1,172 @@
+#############
+# Chapter 8.1 Maximum-likelihood Estimation
+
+## Visualizing the likelihood function
+
+# R: Visualize the likelood function
+library(pracma)
+library(plot3D)
+
+N = 50
+S = 1:N
+theta = seq(0.1, 0.9, (0.9+0.1)/100)
+mesh = meshgrid(S, theta)
+S_grid = mesh$X
+theta_grid = mesh$Y
+L = S_grid * log(theta_grid) + (N-S_grid) * log(1-theta_grid)
+L = t(L)
+persp3D(S, theta, L, theta=65, phi=15, border="black", lwd=0.3, bty="b2", xlab="S", ylab="θ", zlab="", ticktype="detailed")
+
+N = 50
+S = seq(from=1, to=N, by=0.1)
+theta = seq(0.1, 0.9, (0.1+0.9)/1000)
+mesh = meshgrid(S, theta)
+S_grid = mesh$X
+theta_grid = mesh$Y
+L = S_grid * log(theta_grid) + (N-S_grid) * log(1-theta_grid)
+L = t(L)
+image(S, theta, L, col=rainbow(256), ylim=c(0.9, 0.1))
+
+## Visualizing the likelihood function
+
+# R code
+library(pracma)
+
+N = 50
+S = 1:N
+theta = seq(0.1, 0.9, (0.1+0.9)/100)
+mesh = meshgrid(S, theta)
+S_grid = mesh$X
+theta_grid = mesh$Y
+L = S_grid * log(theta_grid) + (N-S_grid) * log(1-theta_grid)
+L_df = data.frame(L)
+colnames(L_df) = S
+
+plot(theta, L_df$"12", type="n")
+grid()
+lines(theta, L_df$"12", lwd=6)
+title(expression(paste("L(", theta, " | S = 12)")))
+
+## ML estimation for single-photon imaging
+
+# R code
+library(imager)
+lambda = as.data.frame(load.image("cameraman.tif"))
+lambda = xtabs(value ~ x+y, data=lambda)
+T = 100
+x = c()
+for (i in 1:T) { 
+    x = append(x, rpois(length(lambda), lambda))
+}
+x = array(x, c(256, 256, 100))
+y = (x>=1)
+mu = apply(y, c(1,2), mean)
+lambdahat = -log(1-mu)
+fig1 = x[,,1]
+
+# Flip matrix since `image()` reads the matrix bottom up
+flip_matrix = function(m) m[,nrow(m):1]
+
+image(flip_matrix(fig1), col=gray.colors(255))
+
+image(flip_matrix(lambdahat), col=gray.colors(255))
+
+#############
+# Chapter 8.2 Properties of the ML estimation
+
+## Visualizing the invariance principle
+
+# R code
+N = 50
+S = 20
+theta = seq(0.1, 0.9, (0.1+0.9)/1000)
+L = S * log(theta) + (N-S) * log(1-theta)
+plot(theta, L, type="n", xlab=expression(theta), ylab=expression(paste("Log L(", theta, "|S = 20)")))
+title("Bernoulli")
+grid()
+lines(theta, L, lwd=6, col="#8080BF")
+
+h_theta = -log(1-theta)
+plot(theta, h_theta, type="n", xlab=expression(theta), ylab=expression(paste(eta, " = h(", theta, ")")))
+grid()
+lines(theta, h_theta, lwd=6)
+
+theta = seq(0.1, 2.5, (0.1+2.5)/1000)
+L = S * log(1-exp(-theta)) - theta * (N-S)
+plot(theta, L, type="n")
+title("Truncated Poisson")
+grid()
+lines(theta, L, lwd=6, col="#0000BF")
+
+#############
+# Chapter 8.3 Maximum-a-Posteriori Estimation
+
+## Influence of the priors
+
+# R code
+N = 5
+mu0 = 0.0
+sigma0 = 1
+theta = 5
+x = rnorm(N, 5, 1)
+xbar = mean(x)
+t = seq(-3, 7, (10)/1000)
+
+q = numeric(1000)
+for (i in 1:N) {
+    x = abs(t-x[i])
+    a = min(x)
+    q[match(a, x)] = 0.1 
+}
+
+thetahat = (sigma0^2. * xbar + mu0/N)/(sigma0^2. + 1. /N)
+sigmahat = sqrt(1. / (1. / sigma0^2 + N))
+p0 = dnorm(t, xbar, 1.)
+p1 = dnorm(t, thetahat, sigmahat)
+prior = dnorm(t, mu0, sigma0)/10
+plot(t, p1, type="n")
+title("N = 5")
+legend(-2, 0.9, legend=c("Likelihood", "Posterior", "Prior", "Data"), col=c("blue", "orange", "green", "purple"), lty=1:1)
+grid()
+lines(t, p0, lwd = 4, col="blue")
+lines(t, p1, lwd = 4, col="orange")
+lines(t, prior, lwd = 4, col="green")
+lines(t, q, lwd = 4, col="purple")
+
+## Conjugate priors
+
+# R code
+sigma0 = 0.25
+mu0 = 0.0
+
+mu = 1
+sigma = 0.25
+
+Nset = c(0, 1, 2, 5, 8, 12, 20)
+x0 = sigma * rnorm(100)
+posterior = list()
+
+for (i in 1:7) {
+    N = Nset[i]
+    x = x0[1:N]
+    t = seq(-1, 1.5, 2.5/1000)
+
+    p0 = dnorm(t, 0, 1)
+    theta = mu*(N*sigma0^2)/(N*sigma0^2+sigma^2) + mu0*(sigma^2)/(N*sigma0^2+sigma^2)
+    sigmaN = sqrt(1/(1/sigma0^2+N/sigma^2));
+    posterior[[i]] = dnorm(t, theta, sigmaN)
+}
+
+plot(t, posterior[[7]], type="n", xlab="", ylab="")
+grid()
+lines(t, posterior[[1]], lwd=3, col="red")
+lines(t, posterior[[2]], lwd=3, col="orange")
+lines(t, posterior[[3]], lwd=3, col="yellow")
+lines(t, posterior[[4]], lwd=3, col="green")
+lines(t, posterior[[5]], lwd=3, col="turquoise")
+lines(t, posterior[[6]], lwd=3, col="blue")
+lines(t, posterior[[7]], lwd=3, col="purple")
+
+legend(-0.9, 7, legend=c("N = 0", "N = 1", "N = 2", "N = 5", "N = 8", "N = 12", "N = 20"), col=c("red", "orange", "yellow", "green", "turquoise", "blue", "purple"), lty=1:1, lwd=3)
+
+#############
diff --git a/ch6.R b/ch6.R
deleted file mode 100644
index d7a13ec..0000000
--- a/ch6.R
+++ /dev/null
@@ -1,169 +0,0 @@
-#############
-# Chapter 6.2 (Probability Inequality)
-
-## Compare Chebyshev's and Chernoff's bounds
-
-# R code to compare the probability bounds
-
-library(pracma)
-
-epsilon <- 0.1
-sigma <- 1;
-N <- logspace(1,3.9,50)
-p_exact <- 1-pnorm(N**(1/2)*epsilon/sigma, 0, 1)
-p_cheby <- sigma**2. / (epsilon**2*N)
-p_chern <- exp(-epsilon**2*N/(2*sigma**2))
-
-plot(log(N), log(p_exact), pch=1, col="orange", lwd=2, xlab="log(N)", ylab="log(Probability)")
-points(log(N), log(p_cheby), pch=15, col="green", lwd=2)
-lines(log(N), log(p_chern), pch=19, col="blue", lwd=2)
-legend("bottomleft", c("Exact","Chebyshev","Chernoff"), fill=c("orange", "green", "blue"))
-
-#############
-# Chapter 6.3 (Law of Large Numbers)
-
-## Weak law of large numbers
-
-# R code to illustrate the weak law of large numbers
-
-library(pracma)
-
-p <- 0.5
-Nset <- as.integer(round(logspace(2,5,100)))
-x <- matrix(rep(0, 1000*length(Nset)), nrow=1000)
-for (i in 1:length(Nset)) {
-  N = Nset[i]
-  x[,i] <- rbinom(1000, N, p) / N
-}
-Nset_grid <- repmat(Nset, m=1, n=1000)
-
-semilogx(Nset_grid, x, col='black', pch=19)
-points(Nset, p + 3*(((p*(1-p))/Nset)^(1/2)), col='red', pch=19, lwd=1)
-points(Nset, p - 3*(((p*(1-p))/Nset)^(1/2)), col='red', pch=19, lwd=1)
-
-#############
-# Chapter 6.4 (Central Limit Theorem)
-
-## PDF of the sum of two Gaussians
-
-# Plot the PDF of the sum of two Gaussians
-
-library(pracma)
-
-n <- 10000
-K <- 2
-Z <- rep(0, n)
-
-for (i in 1:K) {
-  X <- runif(n, min=1, max=6)
-  Z <- Z + X
-}
-hist(Z,breaks=(K-0.5):(6*K+0.5),freq=FALSE)
-
-# Visualize convergence in distribution
-
-library(pracma)
-
-N <- 10
-N <- 50
-x <- linspace(0, N, 1001)
-p <- 0.5
-p_b <- dbinom(x, N, p)
-p_n <- dnorm(x, N*p, (N*p*(1-p))**(1/2))
-
-c_b <- pbinom(x, N, p)
-c_n <- pnorm(x, N*p, (N*p*(1-p))**(1/2))
-
-plot(x, p_n, lwd=1, col='red')
-lines(x, p_b, lwd=2, col='black')
-legend("topright", c('Binomial', 'Gaussian'), fill=c('black', 'red'))
-
-# Poisson to Gaussian: convergence in distribution
-
-library(pracma)
-
-N <- 4
-# N = 10
-# N = 50
-
-x <- linspace(0,2*N,1001)
-lambda <- 1
-p_b <- dpois(x, N*lambda)
-p_n <- dnorm(x, N*lambda, sqrt(N*lambda))
-
-c_b <- ppois(x, N*lambda);
-c_n = pnorm(x, N*lambda, sqrt(N*lambda));
-
-plot(x, p_n, col="red")
-lines(x, p_b, col="black")
-legend("topright", c('Poisson', 'Gaussian'), fill=c('black', 'red'))
-
-plot(x, c_n, col="red")
-lines(x, c_b, col="black")
-legend("topright", c('Poisson', 'Gaussian'), fill=c('black', 'red'))
-
-# Visualize the Central Limit Theorem
-
-library(pracma)
-
-N <- 10
-x <- linspace(0,N,1001)
-p <- 0.5
-p_b <- dbinom(x, N, p);
-p_n <- dnorm(x, N*p, sqrt(N*p*(1-p)));
-
-c_b <- pbinom(x, N, p);
-c_n <- pnorm(x, N*p, sqrt(N*p*(1-p)));
-
-x2 <- linspace(5-2.5,5+2.5,1001);
-q2 <- dnorm(x2,N*p, sqrt(N*p*(1-p)));
-
-plot(x, p_n, col="red")
-points(x, p_b, col="black", pch=19)
-
-polygon(c(min(x2), x2, max(x2)), c(0, q2, 0), col='lightblue')
-
-# How moment generating of Gaussian approximates in CLT
-
-library(pracma)
-
-p <- 0.5
-s <- linspace(-10,10,1001)
-MX <- 1-p+p*exp(s)
-N <- 2
-semilogy(s, (1-p+p*exp(s/N))**N, lwd=4, col="lightblue", xlim=c(-10,10), ylim=c(10**-2, 10**5))
-mu <- p
-sigma <- sqrt(p*(1-p)/N);
-MZ <- exp(mu*s + sigma^2*s**2/2);
-lines(s, MZ, lwd=4);
-legend("topleft", c('Binomial MGF', 'Gaussian MGF'), fill=c('lightblue', 'black'))
-
-
-# Failure of Central Limit Theorem at tails
-
-library(pracma)
-
-x <- linspace(-1,5,1001)
-lambda <- 1
-
-N <- 1
-f1 <- (N**(1/2)/lambda)*dgamma((x+sqrt(N))/(lambda/sqrt(N)), N, lambda)
-semilogy(x, f1, lwd=1, col='lightgray', xlim=c(-1,5), ylim=c(10**-6, 1))
-
-N <- 10
-f1 <- (N**(1/2)/lambda)*dgamma((x+sqrt(N))/(lambda/sqrt(N)), N, lambda)
-lines(x, f1, lwd=2, col='gray')
-
-N <- 100
-f1 <- (N**(1/2)/lambda)*dgamma((x+sqrt(N))/(lambda/sqrt(N)), N, lambda)
-lines(x, f1, lwd=2, col='darkgray')
-
-N <- 1000
-f1 <- (N**(1/2)/lambda)*dgamma((x+sqrt(N))/(lambda/sqrt(N)), N, lambda)
-lines(x, f1, lwd=2, col='black')
-
-g <- dnorm(x,0,1)
-lines(x, g, lwd=2, pch=1, col='red')
-
-legend("bottomleft", c('N=1', 'N=10', 'N=100', 'N=1000', 'Gaussian'), fill=c('lightgray', 'gray', 'darkgray', 'black', 'red'))
-