Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
ECE50024_FinalProject_Asha/GNC_LP_forLinearRegression.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
142 lines (118 sloc)
3.8 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
"""final_algorithm.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/114tP1lTn5sWFHcbp54RPhwgnOJNkZI5p | |
""" | |
import numpy as np | |
from matplotlib import pyplot as plt | |
from scipy.optimize import minimize | |
import scipy.stats as stats | |
import random | |
import math | |
epsilon = stats.chi2.ppf(0.99, 2) * (0.4)**2 | |
print(epsilon) | |
def weight_update(mu,res,barc2): | |
weight=np.ones(100) | |
for j in range(100): | |
weight[j]=((mu*(barc2))/(res[j]+mu*(barc2)))**2 | |
return weight | |
def problem(N,outlier_ratio): | |
np.random.seed(0) | |
X = np.random.rand(N, 1) * 10 | |
y = 6+2*X +0.4*np.random.randn(N, 1) | |
A=np.column_stack((np.ones(N),X)) | |
x_gt=[6,2] | |
y1=np.dot(A,x_gt) | |
e=np.zeros(N) | |
for k in range(N): | |
e=np.linalg.norm(y1[k]-y[k]) | |
e_max=np.max(e) | |
e_max=e_max**2 | |
outlier_percentage = 10 # Specify the percentage of outliers | |
num_outliers = int(outlier_ratio / 100 * len(X)) # Calculate number of outliers | |
outlier_indices = np.random.randint(0, len(X), num_outliers) # Generate random indices for outliers | |
y[outlier_indices] = y[outlier_indices] + 20 # 20added to the y values of the outliers | |
return X,y,A,outlier_indices | |
#Calculation of residual error | |
def residuals(weights,y,A): | |
y_n=np.zeros(y.shape) | |
A_n=np.zeros(A.shape) | |
w=weights | |
for l in range(100): | |
A_n[l,:]=math.sqrt(w[l])*A[l,:] | |
y_n[l]=math.sqrt(w[l])*y[l] | |
var=np.dot(np.linalg.inv(np.dot(A_n.T,A_n)),np.dot(A_n.T,y_n)) | |
g_x=np.dot(A,var) | |
residues=y-g_x | |
val=np.linalg.norm(y_n-np.dot(A_n,var))**2 | |
residuals=np.zeros(100) | |
for i in range(100): | |
residuals[i]=np.linalg.norm(residues[i])**2 | |
return residues,residuals,var,val | |
#Implementation of GNC-GM algorithm | |
def gnc(y,A,epsilon): | |
max_iteration=1000 | |
barc2=epsilon | |
weights=np.ones(100) | |
_,res,var_0,val_0=residuals(weights,y,A) # Equivalent to the values obtained using ordinary least square method | |
r=np.max(res) | |
mu=2*r/barc2 | |
mu1=mu | |
i=1 | |
res_his=[] | |
while i<max_iteration and mu>1: | |
weights=weight_update(mu,res,barc2) | |
_,res,var,val=residuals(weights,y,A) | |
mu=mu/1.4 #1.4 taken as continuation factor | |
i=i+1 | |
return weights,var_0,val_0,i,var,val | |
N=100 | |
Outliers_cent=68 #set outlier percentage to observe the performance of the algorithm | |
X,y,A,outliers=problem(N,Outliers_cent) | |
weights,var_0,val_0,itr,var,err=gnc(y,A,epsilon) | |
eps=0.1 | |
inliers=np.where(weights>(1-eps))[0] | |
print(var_0,var,inliers,val_0,err,itr) | |
t=np.linspace(0,1,100) | |
yhat=np.dot(A,var_0) | |
plt.plot(X,y,'o') | |
plt.plot(X[inliers],y[inliers],'o',color='blue') | |
plt.plot(X[outliers],y[outliers],'o',color='r') | |
plt.plot(X,yhat,linewidth=2) | |
plt.title('Regression Fit ordinary Least square method') | |
plt.xlabel('x(input)') | |
plt.ylabel('y(true values with outliers)') | |
plt.show() | |
ygnc=np.dot(A,var) | |
plt.plot(X,y,'o') | |
plt.plot(X[inliers],y[inliers],'o',color='b') | |
plt.plot(X[outliers],y[outliers],'o',color='r') | |
plt.plot(X,ygnc,linewidth=2) | |
plt.title('Regression Fit using GNC-GM solver') | |
plt.xlabel('x(input)') | |
plt.ylabel('y(true values with outliers)') | |
plt.show() | |
"""**Linear Programming Method:**""" | |
from scipy.optimize import linprog | |
M=np.vstack((np.hstack((A,-np.eye(100))),np.hstack((-A,-np.eye(100))))) | |
b=np.concatenate((y,-y)) | |
print(M.shape,b.shape) | |
c=np.hstack((np.zeros(2),np.ones(100))) | |
reslp=linprog(c,M,b,bounds=(None,None),method="highs") | |
beta=reslp.x | |
print(beta.shape) | |
beta_new=np.array([beta[0],beta[1]]) | |
print(beta_new) | |
x_gt=np.array([6,2]) | |
yhat=np.dot(A,beta_new) | |
y_gt=np.dot(A,x_gt) | |
err_lp=np.linalg.norm(y_gt-yhat)**2 | |
print(err_lp) | |
plt.plot(X,y,'o') | |
#plt.plot(X[inliers],y[inliers],'o',color='blue') | |
plt.plot(X[outliers],y[outliers],'o',color='r') | |
plt.plot(X,yhat,linewidth=2) | |
plt.xlabel('x(input)') | |
plt.ylabel('y(true values with outliers)') | |
plt.title('Regression Fit using Linear Programming method') |