GNC_LP_forLinearRegression.py

# -*- coding: utf-8 -*-
"""final_algorithm.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/114tP1lTn5sWFHcbp54RPhwgnOJNkZI5p
"""

import numpy as np
from matplotlib import pyplot as plt
from scipy.optimize import minimize
import scipy.stats as stats
import random
import math

epsilon = stats.chi2.ppf(0.99, 2) * (0.4)**2
print(epsilon)
def weight_update(mu,res,barc2):
  weight=np.ones(100)
  for j in range(100):
    weight[j]=((mu*(barc2))/(res[j]+mu*(barc2)))**2
  return weight


def problem(N,outlier_ratio):
  np.random.seed(0)
  X = np.random.rand(N, 1) * 10
  y = 6+2*X +0.4*np.random.randn(N, 1)
  A=np.column_stack((np.ones(N),X))
  x_gt=[6,2]
  y1=np.dot(A,x_gt)
  e=np.zeros(N)
  for k in range(N):
    e=np.linalg.norm(y1[k]-y[k])
  e_max=np.max(e)
  e_max=e_max**2

  outlier_percentage = 10 # Specify the percentage of outliers
  num_outliers = int(outlier_ratio / 100 * len(X)) # Calculate number of outliers
  outlier_indices = np.random.randint(0, len(X), num_outliers) # Generate random indices for outliers
  y[outlier_indices] = y[outlier_indices] + 20 #  20added to the y values of the outliers

  return X,y,A,outlier_indices

#Calculation of residual error
def residuals(weights,y,A):

  y_n=np.zeros(y.shape)
  A_n=np.zeros(A.shape)
  w=weights
  for l in range(100):
    A_n[l,:]=math.sqrt(w[l])*A[l,:]
    y_n[l]=math.sqrt(w[l])*y[l]
  var=np.dot(np.linalg.inv(np.dot(A_n.T,A_n)),np.dot(A_n.T,y_n))
  g_x=np.dot(A,var)
  residues=y-g_x
  val=np.linalg.norm(y_n-np.dot(A_n,var))**2
  residuals=np.zeros(100)

  for i in range(100):
    residuals[i]=np.linalg.norm(residues[i])**2

  return residues,residuals,var,val

#Implementation of GNC-GM algorithm
def gnc(y,A,epsilon):
  max_iteration=1000
  barc2=epsilon

  weights=np.ones(100)
  _,res,var_0,val_0=residuals(weights,y,A)  # Equivalent to the values obtained using ordinary least square method
  r=np.max(res)
  mu=2*r/barc2
  mu1=mu
  i=1

  res_his=[]
  while i<max_iteration and mu>1:
    weights=weight_update(mu,res,barc2)
    _,res,var,val=residuals(weights,y,A)
    mu=mu/1.4          #1.4 taken as continuation factor
    i=i+1
  return weights,var_0,val_0,i,var,val

N=100
Outliers_cent=68                #set outlier percentage to observe the performance of the algorithm
X,y,A,outliers=problem(N,Outliers_cent)
weights,var_0,val_0,itr,var,err=gnc(y,A,epsilon)

eps=0.1
inliers=np.where(weights>(1-eps))[0]
print(var_0,var,inliers,val_0,err,itr)

t=np.linspace(0,1,100)
yhat=np.dot(A,var_0)
plt.plot(X,y,'o')
plt.plot(X[inliers],y[inliers],'o',color='blue')
plt.plot(X[outliers],y[outliers],'o',color='r')

plt.plot(X,yhat,linewidth=2)
plt.title('Regression Fit ordinary Least square method')
plt.xlabel('x(input)')
plt.ylabel('y(true values with outliers)')
plt.show()
ygnc=np.dot(A,var)
plt.plot(X,y,'o')
plt.plot(X[inliers],y[inliers],'o',color='b')
plt.plot(X[outliers],y[outliers],'o',color='r')
plt.plot(X,ygnc,linewidth=2)
plt.title('Regression Fit using GNC-GM solver')
plt.xlabel('x(input)')
plt.ylabel('y(true values with outliers)')
plt.show()

"""**Linear Programming Method:**"""

from scipy.optimize import linprog

M=np.vstack((np.hstack((A,-np.eye(100))),np.hstack((-A,-np.eye(100)))))

b=np.concatenate((y,-y))
print(M.shape,b.shape)
c=np.hstack((np.zeros(2),np.ones(100)))
reslp=linprog(c,M,b,bounds=(None,None),method="highs")
beta=reslp.x

print(beta.shape)
beta_new=np.array([beta[0],beta[1]])
print(beta_new)
x_gt=np.array([6,2])
yhat=np.dot(A,beta_new)
y_gt=np.dot(A,x_gt)
err_lp=np.linalg.norm(y_gt-yhat)**2
print(err_lp)
plt.plot(X,y,'o')
#plt.plot(X[inliers],y[inliers],'o',color='blue')
plt.plot(X[outliers],y[outliers],'o',color='r')
plt.plot(X,yhat,linewidth=2)
plt.xlabel('x(input)')
plt.ylabel('y(true values with outliers)')
plt.title('Regression Fit using Linear Programming method')
	# -- coding: utf-8 --
	"""final_algorithm.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/114tP1lTn5sWFHcbp54RPhwgnOJNkZI5p
	"""

	import numpy as np
	from matplotlib import pyplot as plt
	from scipy.optimize import minimize
	import scipy.stats as stats
	import random
	import math

	epsilon = stats.chi2.ppf(0.99, 2) * (0.4)**2
	print(epsilon)
	def weight_update(mu,res,barc2):
	weight=np.ones(100)
	for j in range(100):
	weight[j]=((mu(barc2))/(res[j]+mu(barc2)))**2
	return weight


	def problem(N,outlier_ratio):
	np.random.seed(0)
	X = np.random.rand(N, 1) * 10
	y = 6+2X +0.4np.random.randn(N, 1)
	A=np.column_stack((np.ones(N),X))
	x_gt=[6,2]
	y1=np.dot(A,x_gt)
	e=np.zeros(N)
	for k in range(N):
	e=np.linalg.norm(y1[k]-y[k])
	e_max=np.max(e)
	e_max=e_max**2

	outlier_percentage = 10 # Specify the percentage of outliers
	num_outliers = int(outlier_ratio / 100 * len(X)) # Calculate number of outliers
	outlier_indices = np.random.randint(0, len(X), num_outliers) # Generate random indices for outliers
	y[outlier_indices] = y[outlier_indices] + 20 # 20added to the y values of the outliers

	return X,y,A,outlier_indices

	#Calculation of residual error
	def residuals(weights,y,A):

	y_n=np.zeros(y.shape)
	A_n=np.zeros(A.shape)
	w=weights
	for l in range(100):
	A_n[l,:]=math.sqrt(w[l])*A[l,:]
	y_n[l]=math.sqrt(w[l])*y[l]
	var=np.dot(np.linalg.inv(np.dot(A_n.T,A_n)),np.dot(A_n.T,y_n))
	g_x=np.dot(A,var)
	residues=y-g_x
	val=np.linalg.norm(y_n-np.dot(A_n,var))**2
	residuals=np.zeros(100)

	for i in range(100):
	residuals[i]=np.linalg.norm(residues[i])**2

	return residues,residuals,var,val

	#Implementation of GNC-GM algorithm
	def gnc(y,A,epsilon):
	max_iteration=1000
	barc2=epsilon

	weights=np.ones(100)
	_,res,var_0,val_0=residuals(weights,y,A) # Equivalent to the values obtained using ordinary least square method
	r=np.max(res)
	mu=2*r/barc2
	mu1=mu
	i=1

	res_his=[]
	while i<max_iteration and mu>1:
	weights=weight_update(mu,res,barc2)
	_,res,var,val=residuals(weights,y,A)
	mu=mu/1.4 #1.4 taken as continuation factor
	i=i+1
	return weights,var_0,val_0,i,var,val

	N=100
	Outliers_cent=68 #set outlier percentage to observe the performance of the algorithm
	X,y,A,outliers=problem(N,Outliers_cent)
	weights,var_0,val_0,itr,var,err=gnc(y,A,epsilon)

	eps=0.1
	inliers=np.where(weights>(1-eps))[0]
	print(var_0,var,inliers,val_0,err,itr)

	t=np.linspace(0,1,100)
	yhat=np.dot(A,var_0)
	plt.plot(X,y,'o')
	plt.plot(X[inliers],y[inliers],'o',color='blue')
	plt.plot(X[outliers],y[outliers],'o',color='r')

	plt.plot(X,yhat,linewidth=2)
	plt.title('Regression Fit ordinary Least square method')
	plt.xlabel('x(input)')
	plt.ylabel('y(true values with outliers)')
	plt.show()
	ygnc=np.dot(A,var)
	plt.plot(X,y,'o')
	plt.plot(X[inliers],y[inliers],'o',color='b')
	plt.plot(X[outliers],y[outliers],'o',color='r')
	plt.plot(X,ygnc,linewidth=2)
	plt.title('Regression Fit using GNC-GM solver')
	plt.xlabel('x(input)')
	plt.ylabel('y(true values with outliers)')
	plt.show()

	"""Linear Programming Method:"""

	from scipy.optimize import linprog

	M=np.vstack((np.hstack((A,-np.eye(100))),np.hstack((-A,-np.eye(100)))))

	b=np.concatenate((y,-y))
	print(M.shape,b.shape)
	c=np.hstack((np.zeros(2),np.ones(100)))
	reslp=linprog(c,M,b,bounds=(None,None),method="highs")
	beta=reslp.x

	print(beta.shape)
	beta_new=np.array([beta[0],beta[1]])
	print(beta_new)
	x_gt=np.array([6,2])
	yhat=np.dot(A,beta_new)
	y_gt=np.dot(A,x_gt)
	err_lp=np.linalg.norm(y_gt-yhat)**2
	print(err_lp)
	plt.plot(X,y,'o')
	#plt.plot(X[inliers],y[inliers],'o',color='blue')
	plt.plot(X[outliers],y[outliers],'o',color='r')
	plt.plot(X,yhat,linewidth=2)
	plt.xlabel('x(input)')
	plt.ylabel('y(true values with outliers)')
	plt.title('Regression Fit using Linear Programming method')