GNC_MW.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "# the Python code is inspired from a Julia code: https://github.com/dev10110/GraduatedNonConvexity.jl\n",
    "\n",
    "\n",
    "import numpy as np\n",
    "from scipy.special import eval_legendre\n",
    "import time\n",
    "from sklearn.linear_model import Ridge\n",
    "\n",
    "def rmax_rsum(r,w):\n",
    "    rmax = max(np.abs(r))\n",
    "    rsum = sum(w[i]*(r[i]**2) for i in range(len(r)))\n",
    "    return rmax, rsum"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [],
   "source": [
    "def least_sq_solver(w,data):\n",
    "    X=data[0]\n",
    "    y = data[1]\n",
    "    W = np.diag(w)\n",
    "    return np.linalg.inv (X.T @ W @ X) @ X.T @ W @ y\n",
    "\n",
    "def residual_fn(beta, data):\n",
    "    X = data[0]\n",
    "    y = data[1]\n",
    "    return y-X@beta\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "# TLS\n",
    "def TLS_weightupdate(w, r, c_bar, mu):\n",
    "    threshold1 = (mu+1)*(c_bar**2)/mu\n",
    "    threshold2 = (mu)*(c_bar**2)/(mu+1)\n",
    "    for i in range(len(r)):\n",
    "        rsq = r[i]**2\n",
    "        if rsq >= threshold1:\n",
    "            w[i] = 0\n",
    "        elif rsq <= threshold2:\n",
    "            w[i] = 1\n",
    "        else:\n",
    "            w[i] = c_bar *np.sqrt(mu*(mu+1))/abs(r[i])-mu\n",
    "    return w\n",
    "#TLS is beebn updated\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "def gnc_tls (N, data, LSQ_fn, RES_fn, c_bar, max_iterations = 1000, mu_factor = 1.4, rtol = 1e-6):\n",
    "    \n",
    "    w = np.ones(N)\n",
    "    x = LSQ_fn(w, data)\n",
    "    rs = RES_fn(x,data)\n",
    "    rmax,rsum = rmax_rsum(rs, w)\n",
    "    \n",
    "    mu = c_bar**2/(2*(rmax**2)-c_bar**2)\n",
    "    \n",
    "    for i in range (max_iterations):\n",
    "        w = TLS_weightupdate(w,rs,c_bar,mu)\n",
    "        x = LSQ_fn(w,data)\n",
    "        rs = RES_fn(x,data)\n",
    "        rmax,rsum_new = rmax_rsum(rs, w)\n",
    "        \n",
    "        if i > 1 and np.abs(rsum_new-rsum)<=rtol:\n",
    "            break\n",
    "        rsum = rsum_new\n",
    "        \n",
    "        mu = mu*mu_factor\n",
    "        return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "# GM\n",
    "def GM_weightupdate(w, r, c_bar, mu):\n",
    "    for i in range(len(r)):\n",
    "        w[i] = (mu*c_bar/(r[i]**2+mu*c_bar))**2\n",
    "    return w\n",
    "\n",
    "def gnc_gm (N, data, LSQ_fn, RES_fn, c_bar, max_iterations = 1000, mu_factor = 1.4, rtol = 1e-6):\n",
    "    \n",
    "    w = np.ones(N)\n",
    "    x = LSQ_fn(w, data)\n",
    "    rs = RES_fn(x,data)\n",
    "    rmax,rsum = rmax_rsum(rs, w)\n",
    "    \n",
    "    mu = (rmax**2)*2/(c_bar**2)\n",
    "    \n",
    "    for i in range(max_iterations):\n",
    "        w = GM_weightupdate(w,rs,c_bar,mu)\n",
    "        x = LSQ_fn(w,data)\n",
    "        rs = RES_fn(x,data)\n",
    "        rmax,rsum_new = rmax_rsum(rs, w)\n",
    "        \n",
    "        if i > 1 and np.abs(rsum_new-rsum)<=rtol:\n",
    "            break\n",
    "            \n",
    "            \n",
    "        rsum = rsum_new\n",
    "        \n",
    "        \n",
    "        if mu == 1:\n",
    "            break\n",
    "        else:\n",
    "            mu = max(1, mu/mu_factor)\n",
    "        return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "def ridge_regression(X, y, a):\n",
    "    ridge_model = Ridge(alpha=a, fit_intercept=False)\n",
    "    ridge_model.fit(X, y)\n",
    "    return ridge_model.coef_\n",
    "\n",
    "def fit_ransac_iteration(X, y, min_data, threshold):\n",
    "    sample_number = np.random.choice(X.shape[0], size=min_data, replace=False)\n",
    "    X_ = X[sample_number]\n",
    "    y_ = y[sample_number]\n",
    "    model_para = np.linalg.lstsq(X_, y_, rcond=None)[0]\n",
    "    yh = model_para.dot(X.T)\n",
    "    res = y - yh\n",
    "    inlier_index = np.abs(res) < threshold\n",
    "    score = sum(inlier_index)\n",
    "    return score, model_para\n",
    "\n",
    "def ransac_fit(X, y, min_data, max_interations, threshold, min_inlier):\n",
    "    high_score = 0\n",
    "    best_model_para = None\n",
    "\n",
    "    for i in range(max_interations):\n",
    "        score, model_para = fit_ransac_iteration(X, y, min_data, threshold)\n",
    "\n",
    "        if score > min_inlier and score > high_score:\n",
    "            best_model_para = model_para\n",
    "            high_score = score\n",
    "\n",
    "    if best_model_para is None:\n",
    "        raise ValueError(\"ransac does not find a fitting model\")\n",
    "\n",
    "    return best_model_para\n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 1.37686942, -1.02925953])"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def generate_data(number_data, dim, c_bar, outlier_ratio, beta_gt):\n",
    "    X_col = np.random.randn(number_data)\n",
    "    legendre_X = np.zeros((number_data, dim))\n",
    "    X = np.zeros((number_data, dim))\n",
    "\n",
    "    for i in range(dim - 1):\n",
    "        X[:, i] = X_col**(dim - 1 - i)\n",
    "        legendre_X[:, i] = eval_legendre(dim - 1 - i, X_col)\n",
    "    X[:, dim - 1] = 1\n",
    "    legendre_X[:, dim - 1] = 1\n",
    "\n",
    "    y = np.dot(X, beta_gt) + c_bar * (2 * np.random.rand(number_data) - 1)\n",
    "\n",
    "    for i in range(number_data):\n",
    "        if np.random.rand() < outlier_ratio:\n",
    "            y[i] += 1.0 + np.random.rand()\n",
    "        else:\n",
    "            continue\n",
    "\n",
    "    return X, y\n",
    "\n",
    "beta_gt = np.random.randn(dim)\n",
    "number_data = 1000  # Set the number_data value here\n",
    "dim = 2  # Set the dim value here\n",
    "c_bar = 0.1  # Set the c_bar value here\n",
    "outlier_ratio = 0.5\n",
    "ransac_outlier_const = 0.8# Set the outlier_ratio value here\n",
    "\n",
    "X, y = generate_data(number_data, dim, c_bar, outlier_ratio, beta_gt)\n",
    "data = (X, y)\n",
    "beta_gt\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time: 0.000434 seconds\n"
     ]
    }
   ],
   "source": [
    "time_start = time.perf_counter()\n",
    "beta_ls = np.linalg.lstsq(X,y,rcond = None)[0]\n",
    "time_end = time.perf_counter()\n",
    "delta_time = time_end - time_start\n",
    "print(f\"Elapsed time:{delta_time: .6f} seconds\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7394326560193102"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.linalg.norm(beta_ls-beta_gt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time: 0.001301 seconds\n"
     ]
    }
   ],
   "source": [
    "time_start = time.perf_counter()\n",
    "beta_ls = ridge_regression(X,y,0.1)\n",
    "time_end = time.perf_counter()\n",
    "delta_time = time_end - time_start\n",
    "print(f\"Elapsed time:{delta_time: .6f} seconds\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time: 46.595417 seconds\n"
     ]
    }
   ],
   "source": [
    "time_start = time.perf_counter()\n",
    "ransac_t = 5*np.mean(abs(c_bar*(2*np.random.rand(number_data-1))))\n",
    "beta_ransac = ransac_fit(X, y, int(number_data/10), 5*number_data, ransac_t, (ransac_outlier_const-outlier_ratio)*number_data)\n",
    "time_end = time.perf_counter()\n",
    "delta_time = time_end - time_start\n",
    "print(f\"Elapsed time:{delta_time: .6f} seconds\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.45178359954303987"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.linalg.norm(beta_ransac-beta_gt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time: 0.025135 seconds\n"
     ]
    }
   ],
   "source": [
    "time_start = time.perf_counter()\n",
    "beta_tls = gnc_tls(number_data,data,least_sq_solver,residual_fn,c_bar)\n",
    "time_end = time.perf_counter()\n",
    "delta_time = time_end - time_start\n",
    "print(f\"Elapsed time:{delta_time: .6f} seconds\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7266316579249202"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.linalg.norm(beta_tls-beta_gt) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time: 0.000348 seconds\n"
     ]
    }
   ],
   "source": [
    "time_start = time.perf_counter()\n",
    "beta_legendre = np.linalg.lstsq(legendre_X, y, rcond=None)[0]\n",
    "time_end = time.perf_counter()\n",
    "delta_time = time_end - time_start\n",
    "print(f\"Elapsed time:{delta_time: .6f} seconds\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.5959401801476658"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.linalg.norm(beta_legendre-beta_gt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}