Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
{
"cells": [
{
"cell_type": "code",
"execution_count": 648,
"id": "0a8ebb46",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import time\n",
"from scipy.special import eval_legendre\n",
"from sklearn.linear_model import Ridge\n",
"def gncWeightsUpdate_TLS(w, mu, r, barc):\n",
" th1 = (mu+1)/mu * (barc**2)\n",
" th2 = (mu)/(mu+1) * (barc**2)\n",
" for k in range(len(r)):\n",
" if (r[k])**2 >= th1:\n",
" w[k] = 0\n",
" elif (r[k])**2 <= th2:\n",
" w[k] = 1\n",
" else:\n",
" w[k] = barc*np.sqrt(mu*(mu+1))/abs(r[k])-mu\n",
" assert 0 <= w[k] <= 1, f'weights {w[k]} calculation wrong!'\n",
" return w\n",
"\n",
"def GNC_TLS(N, data, LS_solver, residual_func,barc, max_iter=1000,mu_factor=1.4,tol=1e-6):\n",
" w = np.ones(N)\n",
" x = LS_solver(w,data)\n",
" r = residual_func(x,data)\n",
" rmax, rsum = r_max_sum(r,w)\n",
" \n",
" #mu initialization\n",
" mu = barc**2/(2*(rmax**2)-barc**2)\n",
" \n",
" for iter in range(max_iter):\n",
" w = gncWeightsUpdate_TLS(w,mu,r,barc)\n",
" x= LS_solver(w,data)\n",
" r= residual_func(x,data)\n",
" rmax,rsum_new = r_max_sum(r,w)\n",
" \n",
" #check for convergence\n",
" if iter>1 and np.abs(rsum_new-rsum)<=tol:\n",
" break\n",
" rsum = rsum_new\n",
" \n",
" mu = mu * mu_factor\n",
" return x\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "55d4a2a7",
"metadata": {},
"outputs": [],
"source": [
"def gncWeightsUpdate_GM(w, mu, r, barc):\n",
" for k in range(len(r)):\n",
" w[k] = (mu*barc/(r[k]**2+mu*barc))**2\n",
" assert 0 <= w[k] <= 1, f'weights {w[k]} calculation wrong!'\n",
" return w\n",
"\n",
"def GNC_GM(N, data, LS_solver, residual_func,barc, max_iter=1000,mu_factor=1.4,tol=1e-6):\n",
" w = np.ones(N)\n",
" x = LS_solver(w,data)\n",
" r = residual_func(x,data)\n",
" rmax, rsum = r_max_sum(r,w)\n",
" \n",
" #mu initialization\n",
" mu = 2*(rmax**2)/(barc**2)\n",
" \n",
" for iter in range(max_iter):\n",
" w = gncWeightsUpdate_GM(w,mu,r,barc)\n",
" x= LS_solver(w,data)\n",
" r= residual_func(x,data)\n",
" rmax,rsum_new = r_max_sum(r,w)\n",
" \n",
" #check for convergence\n",
" if iter>1 and np.abs(rsum_new-rsum)<=tol:\n",
" break\n",
" rsum = rsum_new\n",
" if mu==1:\n",
" break\n",
" else:\n",
" mu = max(1,mu/mu_factor)\n",
" return x"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "10302438",
"metadata": {},
"outputs": [],
"source": [
"def least_sq_solver(w, data):\n",
" X = data[0]\n",
" y = data[1]\n",
" W = np.diag(w)\n",
" # analytic expression for weighted least squares\n",
" return np.linalg.inv(X.T @ W @ X) @ X.T @ W @ y\n",
"\n",
"def residual_fn(β, data):\n",
" X = data[0]\n",
" y = data[1]\n",
" return y - X @ β"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "c533c782",
"metadata": {},
"outputs": [],
"source": [
"def r_max_sum(r,w):\n",
" rmax = max(np.abs(r))\n",
" rsum = sum(w[i]*(r[i]**2) for i in range(len(r)))\n",
" return rmax, rsum"
]
},
{
"cell_type": "code",
"execution_count": 169,
"id": "ce2f0af7",
"metadata": {},
"outputs": [],
"source": [
"def ridge_regression(X, y, alpha):\n",
" \"\"\"\n",
" Perform Ridge regression on X and y using regularization strength alpha.\n",
" \n",
" Parameters:\n",
" X (ndarray): Input feature matrix with shape (n_samples, n_features)\n",
" y (ndarray): Target vector with shape (n_samples,)\n",
" alpha (float): Regularization strength\n",
" \n",
" Returns:\n",
" ndarray: Ridge regression coefficients with shape (n_features,)\n",
" \"\"\"\n",
" # Add a column of ones to X for the intercept term\n",
" #X = np.column_stack([np.ones(len(X)), X])\n",
" #print(X)\n",
" # Fit the Ridge regression model\n",
" model = Ridge(alpha=alpha, fit_intercept=False)\n",
" model.fit(X, y)\n",
" \n",
" # Return the coefficients without the intercept term\n",
" return model.coef_"
]
},
{
"cell_type": "code",
"execution_count": 309,
"id": "498dc983",
"metadata": {},
"outputs": [],
"source": [
"def ransac(X, y, n, k, t, d):\n",
" \"\"\"\n",
" Fit a model to data using the RANSAC algorithm.\n",
" \n",
" Parameters:\n",
" X (ndarray): a numpy array with shape (N, D) containing the input data\n",
" y (ndarray): a numpy array with shape (N,) containing the output data\n",
" model (function): a function that fits the model to data and returns the model parameters\n",
" n (int): the minimum number of data points required to fit the model\n",
" k (int): the maximum number of iterations allowed in the algorithm\n",
" t (float): the threshold value for inliers\n",
" d (int): the minimum number of inliers required to accept a fitted model\n",
" \n",
" Returns:\n",
" model_params (ndarray): a numpy array containing the parameters of the best model\n",
" \"\"\"\n",
" \n",
" best_model_params = None\n",
" best_score = 0\n",
" \n",
" for i in range(k):\n",
" sample_indices = np.random.choice(X.shape[0], size=n, replace=False)\n",
" X_sample = X[sample_indices]\n",
" y_sample = y[sample_indices]\n",
" \n",
" model_params = np.linalg.lstsq(X_sample, y_sample, rcond=None)[0]\n",
" y_hat = model_params.dot(X.T)\n",
" residuals = y - y_hat\n",
" inlier_indices = np.abs(residuals) < t\n",
" score = sum(inlier_indices)\n",
" if score > d and score > best_score:\n",
" best_model_params = model_params\n",
" best_score = score\n",
" if best_model_params is None:\n",
" raise ValueError(\"RANSAC failed to find a good model.\")\n",
" return best_model_params"
]
},
{
"cell_type": "code",
"execution_count": 697,
"id": "af41ec8d",
"metadata": {},
"outputs": [],
"source": [
"#We test our models on 16 different data sets with the following parameters varying\n",
"n=3 #dimen\n",
"N=300 # number of data points\n",
"barc=0.1 #noise tolerance\n",
"outlier_ratio = 0.5"
]
},
{
"cell_type": "code",
"execution_count": 698,
"id": "f820c556",
"metadata": {},
"outputs": [],
"source": [
"beta_gt = np.random.randn(n)\n",
"\n",
"X_first_col = np.random.randn(N)\n",
"X = np.zeros((N,n))\n",
"legendre_X = np.zeros((N,n))\n",
"\n",
"for i in range(n-1):\n",
" X[:,i] = X_first_col**(n-1-i)\n",
" legendre_X[:,i] = eval_legendre(n-1-i,X_first_col)\n",
"X[:, n-1] = 1\n",
"\n",
"legendre_X[:, n-1] = 1\n",
"# create some noisy measurements\n",
"y = np.dot(X, beta_gt) + barc * (2 * np.random.rand(N) - 1)\n",
"\n",
"# add some outliers to 20% of data\n",
"for i in range(N):\n",
" if np.random.rand() < outlier_ratio:\n",
" y[i] += 1.0 + np.random.rand()\n",
" else:\n",
" continue\n",
"\n",
"# collect all the necessary data\n",
"data = (X, y)\n"
]
},
{
"cell_type": "code",
"execution_count": 699,
"id": "abde9539",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 1.37181668, -0.88623036, 0.11407872])"
]
},
"execution_count": 699,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#ground truth\n",
"beta_gt"
]
},
{
"cell_type": "code",
"execution_count": 700,
"id": "584fe8a5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Elapsed time: 0.023368 seconds\n"
]
}
],
"source": [
"#linear regression\n",
"start_time = time.perf_counter()\n",
"beta_ls = np.linalg.lstsq(X, y, rcond=None)[0]\n",
"end_time = time.perf_counter()\n",
"elapsed_time = end_time - start_time\n",
"print(f\"Elapsed time: {elapsed_time:.6f} seconds\")"
]
},
{
"cell_type": "code",
"execution_count": 701,
"id": "dd187e2c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.7210799988167256"
]
},
"execution_count": 701,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.linalg.norm(beta_ls-beta_gt)"
]
},
{
"cell_type": "code",
"execution_count": 702,
"id": "0e38df05",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Elapsed time: 0.017270 seconds\n"
]
}
],
"source": [
"#ridge regression\n",
"start_time = time.perf_counter()\n",
"beta_ridge=ridge_regression(X,y,0.1)\n",
"end_time = time.perf_counter()\n",
"elapsed_time = end_time - start_time\n",
"print(f\"Elapsed time: {elapsed_time:.6f} seconds\")"
]
},
{
"cell_type": "code",
"execution_count": 703,
"id": "69af8dfb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.7208729809607424"
]
},
"execution_count": 703,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.linalg.norm(beta_ridge-beta_gt) "
]
},
{
"cell_type": "code",
"execution_count": 707,
"id": "7ba08258",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Elapsed time: 0.821359 seconds\n"
]
}
],
"source": [
"#RANSAC\n",
"start_time = time.perf_counter()\n",
"ransac_t =8*np.mean(abs(barc*(2*np.random.rand(N)-1)))\n",
"beta_ransac = ransac(X, y, int(N/10), 5*N, ransac_t, (0.9-outlier_ratio)*N)\n",
"end_time = time.perf_counter()\n",
"elapsed_time = end_time - start_time\n",
"print(f\"Elapsed time: {elapsed_time:.6f} seconds\")"
]
},
{
"cell_type": "code",
"execution_count": 708,
"id": "12b39397",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.27565025691837164"
]
},
"execution_count": 708,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.linalg.norm(beta_ransac-beta_gt)"
]
},
{
"cell_type": "code",
"execution_count": 709,
"id": "e01bcb30",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Elapsed time: 0.010629 seconds\n"
]
}
],
"source": [
"#GNC_GM\n",
"start_time = time.perf_counter()\n",
"beta_gm = GNC_GM(N,data,least_sq_solver,residual_fn,barc)\n",
"end_time = time.perf_counter()\n",
"elapsed_time = end_time - start_time\n",
"print(f\"Elapsed time: {elapsed_time:.6f} seconds\")"
]
},
{
"cell_type": "code",
"execution_count": 710,
"id": "174772ef",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.7150871505500008"
]
},
"execution_count": 710,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.linalg.norm(beta_gm-beta_gt) "
]
},
{
"cell_type": "code",
"execution_count": 711,
"id": "f89c8fea",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Elapsed time: 0.006195 seconds\n"
]
}
],
"source": [
"#GNC_TLS\n",
"start_time = time.perf_counter()\n",
"beta_tls = GNC_TLS(N,data,least_sq_solver,residual_fn,barc)\n",
"end_time = time.perf_counter()\n",
"elapsed_time = end_time - start_time\n",
"print(f\"Elapsed time: {elapsed_time:.6f} seconds\")"
]
},
{
"cell_type": "code",
"execution_count": 712,
"id": "6df4171e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.6902399684265232"
]
},
"execution_count": 712,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.linalg.norm(beta_tls-beta_gt) "
]
},
{
"cell_type": "code",
"execution_count": 713,
"id": "0dba15c0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Elapsed time: 0.001391 seconds\n"
]
}
],
"source": [
"#Legendre\n",
"start_time = time.perf_counter()\n",
"beta_legendre = np.linalg.lstsq(legendre_X, y, rcond=None)[0]\n",
"end_time = time.perf_counter()\n",
"elapsed_time = end_time - start_time\n",
"print(f\"Elapsed time: {elapsed_time:.6f} seconds\")"
]
},
{
"cell_type": "code",
"execution_count": 714,
"id": "326181e3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.2640257603834213"
]
},
"execution_count": 714,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.linalg.norm(beta_legendre-beta_gt)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ab73da8e",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "35e46e2f",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}