.ipynb_checkpoints/GPR_Optimizer-checkpoint.py

#import sys
import time
import numpy as np
#import os
import pickle
from matplotlib.colors import LogNorm
import matplotlib.pyplot as plt

from skopt import Optimizer
from skopt.learning.gaussian_process.kernels import ConstantKernel, Matern, Sum, RBF, WhiteKernel
from skopt.learning import GaussianProcessRegressor
from skopt.plots import plot_evaluations, plot_convergence, plot_objective, plot_gaussian_process, plot_histogram, plot_objective_2D
#from skopt.benchmarks import branin, hart6
#from skopt.space.space import Real, Integer

#from functools import partial

#from skopt import gp_minimize
#from skopt import callbacks
#from skopt import gp_minimize, forest_minimize, dummy_minimize
#from skopt import expected_minimum
#from skopt.callbacks import CheckpointSaver
#from skopt.learning import ExtraTreesRegressor

from scipy.stats import multivariate_normal

class GPR_Optimizer(Optimizer):
    """GPR_Optimizer is based on Optimizer class of skopt library. 
    scikit-optimize.github.io/stable
    This class allows you to insert the noise for each data point. 
  
    1D Example:
        opt = GPR_Optimizer(dimensions=[(-2.,2.)],  # parameter space
                     length_scale=[1], length_scale_bounds=[(.01,10.)], 
                     noise_level=0.1, noise_level_bounds=(0.01,1), 
                     n_initial_points=10, 
                     kappa=10)
        next_x = opt.ask() # The first n_initial_points will be random, then the GPR optimizer starts.
        f_val = objective(next_x)  #objective is the cost function
        res = opt.tell(next_x, f_val, error, fit=True) # Report the value.
   """
    
    
    def __init__(self, dimensions, length_scale, length_scale_bounds, noise_level=0.1, noise_level_bounds=[0.01,1], n_initial_points=10, x_initial=[],
                 acq_func="LCB", kappa=2*1.96, xi=2*0.01, alpha=[], normalize_y=False, sigma_value_bounds=(1e-3,1e3), sigma_value=1**2,
                 n_restarts_optimizer=2, acq_optimizer='lbfgs', verbose=True):
        '''
        Initialize the GaussianProcessRegressor.
        
        I use RBF + WhiteKernel. The default kernel is "1.0 * RBF(1.0)". 
        Kernel's hyperparameters are optimized during fitting at tell. Another common 
        option is kernel = 1.0 * Matern(length_scale=1.0, length_scale_bounds=(1e-1, 1.0), nu=2.5) 
        which is a generalization of RBF, and equiv when nu = inf, but takes longer to compute.
        alpha (default 1e-10) is variance = noise**2 added to diagonal kernel durin fitting.
        alpha can be a constant, where then it is the same for all points, or an array 
        the same length as data.  Some sources say it is equiv to white noise added
        by a WhiteKernel.  But WhiteKernel has a hyperparameter for the noise which
        is tuned every time opt.tell(...) is called.
        If you set noise = "gaussian", then it will add a WhiteKernel. I below
        add a WhiteKernel manually in order to set the noise_level_bounds manually.
        Note also that ConstantKernel(constant_value=1**2, constant_value_bounds=constant_value_bounds)*RBF() is equivalent to 1 * RBF(),
        expect you can explicitely set the starting value and bounds, which by default are 1.0 and (1e-5, 1e5)
        
        Parameters
        ----------
        dimensions : list of bounds for each parameters
            Example: [(-2.,2.), ..., ndims]
            Dimensions can be list of (0., 1.) for real, and (0,1) for integers. Other options available like categories.   
            
        length_scale : initial length_scale. 
            Can be float or list of length ndims
        
        length_scale_bounds: bounds for each scales
            Can be [1e-3,1] or list of bounds for each parameter
        
        noise_level : float, initial value of noise hyperparameter for WhiteKernel, which is Gaussian noise added to the RBF kernel.
            Example: 1e-3 or None
            If noise_level = 0 or None, then WhiteKernel is not included, and the noise only comes from alpha.
            NEW:  When normalize_y = True, then this is actually noise/std(y).  So it is fractional noise over hte std of your data. 
            
        nonise_level_bounds : bounds
            Example: [1e-3, 1]
        
        n_initial_points: number of initial random points.  
            Sampling method determined by  'initial_point_generator' in opimtizer.py
        
        acq_func : Acquisition function that determines the next best point. 
            # LCB = lower confidence bound. Uses kappa, not xi. 
            # PI= probabilit of improvement. PI doesn't care how much better predictions are than previous values, since it gives equal reward. Uses kappa, not xi.
            # EI = expective improvement. Uses xi, which is the desired improvement. Does not use kappa. 
            # "gp_hedge" uses all three, then favors the ones that improve. 
            # Advice: if exploration is priority, use LCB.  If exploitation, then use PI.  For balanced, use EI. 
    
        kappa : Controls exploration for local optimization.  kappa high (even inf) favors exploration
            1.96 default. kappa contros how much of the variance in the predicted value should be taken into account. 
            kappa is used by LCB and .  xi is used for EI and is goal of improvement to shoot for over the previous best value. 
            Code currently uses this kappa to set both. Maybe in future make xi its own variable. 
          
        xi : float 0 to 1,  parameter for EI acquisition function
            Determines the desired improvement, for x=0.01 is 1% change.
            
        alpha  : diagonal of the covariance matrix, which is the variance = err^2
            float or array-like, optional (default: 1e-10)    
            The tell function can update a list of alphas that give the variance of eery point. 
            If is too small then will have trouble with optimizer model fits. 
            ONLY USE alpha BY ENTERING IT WITH tell() OR NORMALIZE_Y WILL MESS IT UP
         
        x_initial : [] or [ (0,0) ] for 2D.  list of initial points to be taken first. 
        
        constant_value : inintial constant for RBF kernel.
            The kernel is  sigma^2  exp(- |x - x'|^2/2l^2  ).   The constant is sigma^2.  It allows larger outputs, increaseing the flexibiliyt of the GPR model. Important to set similar to expected scale.
        
        constant_value_bounds : bounds for constant_value

        acq_optimizer : 'lbfgs' or 'sampling'.   For finding hyperparameters. 
            Takes 10,000 points of hyperparameters, including current setting. 'sampling' uses best. 
            'lbfgs' takes best 'n_restart_optimizer' points and performs gradient based lbfgs optimization,
            then picks best. 'lgbfgs' is slow for high dimensions.
        
        n_restarts_optimizer :  4,  see acq-optimizer. If 'lbfgs' selected, number of points to perform lbfgs on. 
        
        verbose : Default True,  whether to print while running

        normalize_y : 'True',  when normalize_y true, then the y data has mean subtracted and divided by std at every call of tell() right before it fits new  
        hp's.      # That normalization is not used to change noise level.  So noise level above should be interpreted at noise_level/std(y).  That would be 
        the same for alpha, 
        # expect I now always enter alpha=error^2 with tell, and I have changed tell() to normalize alpha by std as well.  So just enter the alpha by using
        # the  tell(x,y,y_err)  , where y_err is in same units as y.  If no noise, still put small value of y_err.  Cannot use y_err =None or 0!!!!!!
        # if you really have no noise, still use small value of y_err, which is necessary for the fitting to converge easily.  
)
        '''
        
        # Define the constant value bounds for the amplitude parameter
        #constant_value_bounds = (1e-3, 1e3)  # Square since the kernel uses the square of the value, default = 1e-5 to 1e5

        # constant value is variance, so square sigmas
        constant_value = sigma_value**2
        constant_value_bounds = ( sigma_value_bounds[0]**2, sigma_value_bounds[1]**2)
        
        if noise_level == 0 or noise_level is None:
            # If no noise, don't add WhiteKernel
            kernel = ConstantKernel(constant_value=constant_value, constant_value_bounds=constant_value_bounds) * RBF(length_scale=length_scale, length_scale_bounds=length_scale_bounds)
        else:
            # If noise is present, add WhiteKernel
            kernel = ConstantKernel(constant_value=constant_value, constant_value_bounds=constant_value_bounds) * RBF(length_scale=length_scale, length_scale_bounds=length_scale_bounds) + WhiteKernel(noise_level=noise_level, noise_level_bounds=noise_level_bounds)   #starting hp. fitting at each run. 
        gpr = GaussianProcessRegressor(kernel=kernel,
                        alpha=alpha, 
                        normalize_y=normalize_y,  #XXXXXXXXXXXXXXXXXXXXXXXX
                            # Subtracts mean and normalize std to 1. It is done at beginning of fit, before alpha. 
                            # in _gpr.py of sklearn,      y = (y - self._y_train_mean) / self._y_train_std
                            # Can access them at opt.models[-1].y_train_std_
                            # Also weird because adjusts y data, but doesn't change theta or alpha. 
                            # So if normalization changes a lot, then the hyperparameer and alpha will have to change.
                            # Whether the target values y are normalized, i.e., the mean of the
                            # observed target values become zero. This parameter should be set to
                            # True if the target values' mean is expected to differ considerable from
                            # zero. When enabled, the normalization effectively modifies the GP's
                            # prior based on the data, which contradicts the likelihood principle;
                            # normalization is thus disabled per default.
                        #noise="gaussian",  # noise="gaussian" adds WhiteKernel, but I added manually. "off" doesn't work, so need to not include.
                        n_restarts_optimizer=n_restarts_optimizer 
                            # how many The number of restarts of the optimizer for finding the kernel's hyperparameters. The first run
                            # of the optimizer is performed from the kernel's initial parameters,
                            # the remaining ones (if any) from thetas sampled log-uniform randomly
                            # from the space of allowed theta-values. If greater than 0, all bounds
                            # must be finite. Note that n_restarts_optimizer == 0 implies that one
                            # run is performed.
                                    )
  
        super().__init__(
                        dimensions,   # dimensions,
                        base_estimator= gpr, #"GP", #gpr, # default "GP"  # base estimator, also GP, RF, ET, GBRT
                        n_initial_points=n_initial_points,  #default 10
                        acq_optimizer=acq_optimizer, #'sampling', 'lbfgs', or auto (which chooses between two). 'lbfgs' is slow for high dim and many points
                        initial_point_generator="lhs", # random (default), sobol, halton, hammersly, lhs (latin hypercube sampling), grid.  See optimizer.py for more details. 
                        acq_func=acq_func, #(randomly choose LCB, EI, or PI)   #"gp_hedge", "LCB", "EI", "PI".  
                        acq_func_kwargs = {"xi":xi, "kappa":kappa}, # kappa for LCB, xi for EI.  
                        acq_optimizer_kwargs = {"n_points":10000,  "n_jobs":4}  # settings to speed up tell(). n_jobs sets number of jobs in Parallel.  sampling of hp space is always done with 'n_points' points.  If lbfgs, then the 'n_restarts_optimizer' best points of those are used as start for 20 iterations of lbfgs optimizaation. #"n_restarts_optimizer":1,
                        )   
        
        
        self.error = []  # List of errors of measured points. Must float or be an array of length of number data points.
        self.x_initial = x_initial # initial points to run
        self.verbose = verbose # Whether to print statements
        
        self.mean_for_y_norm = 0 # mean and std for normalizing all the y data.  This is so that you don't have to change ConstantValue. 
        self.std_for_y_norm = 1 
        
    def ask(self, *args):
        ''' Same as optimizer class.
        # Obtains next point. If < n_initial_point, then from initial_point_generator.
        Otherwise it uses the acq_func to find the next point uses either LCB, EI, or PI. 
        Default is "gp_hedge" method which uses all three methods with gains based on which
        improves. 
        If called twice in a row,then gives same value until tell() is called.
        
        '''
        # Run poitns from x_intiial until list is empty. Removes the element of x_initial. 
        if self.x_initial:  # Check if the list is not empty
            next_x = self.x_initial.pop(0)  # Remove and get the first element
            # first_element is now the removed element
            # x_initial is now the list without its first element
            print("Removed element:", next_x)

        else:
            next_x = super().ask(*args) # very fast
                    
        #print("next_x: ", next_x)
        return next_x
        
    def tell(self, next_x, f_val, error=None, fit=True):
        ''' Reports a measured value. Inherits Optimizer class, but also allows you to put in point dependence noise, if eg the noise is different
        between points or you know the noise from repeated measurements.
        next_x : list, parameters for measured point
        f_val : measured value
        error : If error included, then uses it to define point specific alpha. If error=0 and then all noise is in initial alpha parameter and WhiteKernel.
        fit : Whether to refit hyperparameters at the end
        Note:  tell() is most time consuming.  Can speed up by reducing 'n_restarts_optimizer' and maybe sampling method. 
        If normalize_y = True,  then gpr.py will normalize y data by subtracting mean and dividing by std every time tell() is called. 
        You can access the std and mean from the last iteratio with self.models[-1].y_train_std_
        It does not normalize noise or alpha.  I have setup alpha such that alpha = error^2/ std^2 so we can keep error 
        in the units of the cost that we enter. 
        
        '''
        
        # normalize
        f_val = self.normalize_y(f_val)
        if error is not None:
            error = error/self.std_for_y_norm # normalize

        # update alpha
        if error is not None:
            self.error = np.append(self.error, error )

            # if self.models: # check if not empty
            #     std = self.models[-1].y_train_std_  # the last used standard deviation of y data
            # else:
            
            if len(self.yi)>3:
                std = np.std(self.yi) # standard dev of the y data.  Use in tell() in gpr.py to normalize data
                self.base_estimator_.alpha= self.error**2 / std**2  #use this to update alpha during scan.  the fit always clones the base estimtator. 

        #self.update_next()   # use this if x different from latest ask value. 
        start_time = time.time()
        res = super().tell(next_x, f_val, fit=fit)
        end_time = time.time()
        if np.abs(end_time - start_time) > 1:
            print("elapsed time for tell: ", end_time - start_time)
        

        if len(self.models) != 0:
            hp = np.exp(self.models[-1].kernel_.theta)  # get latest hyperparameters
            likelihood = self.models[-1].log_marginal_likelihood_value_
            if self.verbose:
                print("[hp], LML: ", hp, likelihood)

        return res

    def normalize_y(self, y, inverse=False):
        # Subtract mean and divide by std in order to make y data have std = 1, and mean = 0. 
        # This is so ConstantValue can always be close to 1. 
        mean =  self.mean_for_y_norm 
        std =  self.std_for_y_norm
        
        if inverse is False:
            return (y - mean)/std
        if inverse is True:
            return y*std + mean
 
   ############# PLOTTING ###############################

    def get_hyperparameters(self):
        ''' Get list of all hyperparameters and likelihoods for all models.
        theta is np.log(hyperparameter), so I take exp^(theta).  
        The hyperparameters are (annoyingly) in opt.models[i].kernel_.theta
        To see the latest kernel with parameters:   opt.models[-1].kernel_
        This shows you what each hyperparameter is. 
        To see just the np.log() of value, use  np.exp(opt.models[-1].kernel_.theta)
        I think these parameters are arrange left to right from  print(opt.models[-1].kernel_)
        '''
        opt = self
        hp = [np.exp(opt.models[i].kernel_.theta) for i in range(len(opt.models))]  # has n_iter - n_initial_ponit + 1 models.
        likelihood = [opt.models[i].log_marginal_likelihood_value_  for i in range(len(opt.models))] 
        print(opt.models[-1].kernel_) # # Print latest kernel
        
        return hp, likelihood
    
    def plot_hyperparameters(self):
        ''' plot the hyperparameters vs iteration'''
        hp, likelihood = self.get_hyperparameters()


        # create an array of indices from 0 to the length of the list
        lst = hp
        x = np.arange(len(lst))

        # loop over the number of elements in each array (assuming they are all the same length)
        for i in range(len(lst[0])):
            # create an empty array to store the ith element of each array
            y = np.empty(len(lst))
            # loop over the list of arrays and fill the y array with the ith element
            for j in range(len(lst)):
                y[j] = lst[j][i]
            # plot x vs y with a label
            if i==0:
                # Constant value hyperparameter
                label='sigma'  # this is sqrt of ConstantValue
                y = np.sqrt(np.abs(y)) #
            elif i==len(lst[0])-1:
                label='noise'
                #y = y*self.std_for_y_norm
            else:
                label = f'length scale {i+1-1}'
            plt.plot(x, y, label=label)
            plt.yscale('log')
            plt.xlabel('run (after initial)')
            plt.ylabel('hyperparameter')
            plt.title(str(self.models[-1].kernel_))
            plt.grid()

        # add a legend and show the plot
        plt.legend()
        plt.show()

    #def predict
        # Predict output for X.
        # In addition to the mean of the predictive distribution, also its
        # standard deviation (return_std=True) or covariance (return_cov=True),
        # the gradient of the mean and the standard-deviation with respect to X
        # can be optionally provided.

    def plot_LML(self):
        '''  Plots the log marginal likelihood noise level and length scale for 1D scan.
        Only works for 1D. 
        This was taken from a scikit learn example.''' 
        opt = self
        hp_final = np.exp(opt.models[-1].kernel_.theta)
        print("final noise parameters:", hp_final )
        gpr =  opt.models[-1]
        print(gpr.kernel_)

        length_scale = np.logspace(-2, 2, num=50)
        noise_level = np.logspace(-2, 1, num=50)
        length_scale_grid, noise_level_grid = np.meshgrid(length_scale, noise_level)

        log_marginal_likelihood = [
            gpr.log_marginal_likelihood(theta=np.log([hp_final[0], scale, noise]))  # insert first hp here (amp of RBF)
            for scale, noise in zip(length_scale_grid.ravel(), noise_level_grid.ravel())
        ]
        log_marginal_likelihood = np.reshape(
            log_marginal_likelihood, newshape=noise_level_grid.shape
        )

        vmin, vmax = (-log_marginal_likelihood).min(), 4*50  # set max height here.
        level = np.around(np.logspace(np.log10(vmin), np.log10(vmax), num=50), decimals=1)
        plt.contour(
            length_scale_grid,
            noise_level_grid,
            -log_marginal_likelihood,
            levels=level,
            norm=LogNorm(vmin=vmin, vmax=vmax),
        )
        plt.colorbar()
        plt.xscale("log")
        plt.yscale("log")
        plt.xlabel("Length-scale")
        plt.ylabel("Noise-level")
        plt.title("Log-marginal-likelihood")
        plt.show()


    def plot_convergence(self):
        # from skopt.plots
        # if normalize_y = True, this is still in unnormalized units
        res = self.get_result()
        plot_convergence(res) # plot at every iteration the best point.  I want to plot each point. 

    def plot_objective(self):
        # from skopt.plots
        # if normalize_y = True, this is still in unnormalized units
        res = self.get_result()
        plot_objective(res) # plot at every iteration the best point.  I want to plot each point. 
 
    def plot_histogram(self):
        # from skopt.plots
        # if normalize_y = True, this is still in unnormalized units
        res = self.get_result()
        plot_histogram(res) # plot at every iteration the best point.  I want to plot each point. 

    def plot_gaussian_process(self):
        # from skopt.plots
        res = self.get_result()
        #plot_gaussian_process(res) # plot at every iteration the best point.  I want to plot each point. 
        plot_gaussian_process(res, #objective=objective, 
                          #noise_level=0.001,
                          show_next_point=False,
                          show_acq_func=True)  

    
    def plot_evaluations(self):
        # from skopt.plots
        res = self.get_result()
        plot_evaluations(res) # plot at every iteration the best point.  I want to plot each point. 
    
    
    def plot_1D_with_objective(self, objective_wo_noise, noise_level):
        '''  Plotting for 1D functions with known objective. This is for benchmarking. 
        '''
        opt = self
        res = opt.get_result()
        print("best point:", res.x, res.fun)
        # print(res.x_iters)
        # print(res.func_vals)
    
        # % Plotting
        plot_args = {"objective": objective_wo_noise,
                     "noise_level": noise_level, "show_legend": True,
                     "show_title": True, "show_next_point": False,
                     "show_acq_func": True}
        
        _ = plot_gaussian_process(opt.get_result(), objective=objective_wo_noise,
                                  noise_level=noise_level,
                                  show_next_point=False,
                                  show_acq_func=True)   # **plot_args
        plt.show()
        
        if 1:
            plt.figure()
            _ = plot_convergence( opt.get_result()  ) # plot at every iteration the best point.  I want to plot each point. 
            plt.figure()
            _ = plot_evaluations(opt.get_result())
            plt.figure()
            _ = plot_objective(opt.get_result())  # plots the gpr function (add error)
            
        # plot poits vs iteration
        plt.figure()
        plt.plot(opt.yi,'.')  #opt.Xi,
        plt.xlabel("Iteration")
        plt.ylabel("Function Value")
        plt.show()

    def save(self, file = 'hloop.pkl'):
        with open(file, 'wb') as f:
            pickle.dump(self, f)
            
    def load(self, file):
        with open(file, 'rb') as f:
            hloop_restored = pickle.load(f)
        return hloop_restored
    
    
######### BENCHMARKING ###############################

def calculate_distances(points, single_point):
    """
    Calculate the Euclidean distances from a list of points to a single point.

    :param points: List of points (each point is a list of coordinates).
    :param single_point: A single point (list of coordinates).
    :return: List of distances.
    """
    # Convert lists to numpy arrays for efficient computation
    points_array = np.array(points)
    single_point_array = np.array(single_point)

    diff = points_array - single_point_array # Calculate the differences and square them
    squared_diff = diff ** 2

    distances = np.sqrt(np.sum(squared_diff, axis=1))     # Sum the squared differences and take the square root
    
    return distances

def multivariate_gaussian_signal(x, ndims=1, amplitude=1, sigma=0.1, noisetype=None, background_noise=None, N_gaussian = 1):
    '''Simulates noisy data using multivariate distribution with covariance matrix. 
    Gives single peak. Shape of peak is given by covariance matrix. 
    For noise, 
        shot noise (poisson distribution),
        gaussian noise proportional to sqrt(signal)/ N_gaussian. For example from adding several poissoning signals together. 
        constant gaussian noise with amplitude 'background noise'
    
    Parameters :   
        x : list of points
        ndims : 1-5, Number of dimensions 
        amplitude : amplitude of the gaussian, so max value. 
        sigma = 0.1 # standard deviation,  FWHM = 2.35*sigma
        noisetype : None / 'shot' / 'gaussian'
        background_noise : None, or 1. constant gaussian noise amplitude. 
        N_gaussian : 10, if noisetype = 'gaussian', number of averages.
       
    Output : 
        negative float.  Neg so works with minimizer. 
        '''
    
    # 1D Gaussian objective, shot noise 
    if 1:
        # Peak at center with no covariance
        mean = np.zeros(ndims)  # Assuming the mean vector is zero
        covariance_matrix = np.diag(sigma**2 * np.ones(ndims))
    elif 0:
        # Randomize means and covariance
        mean = np.random.randn(ndims)  # Assuming the mean vector is zero
        A = np.random.randn(ndims, ndims)
        covariance_matrix = sigma**2 * np.dot(A, A.T) # to make a positive semi-definite matrix, which is what a covariance matrix must be. 
    
    
    normalizing_factor = 1 / np.sqrt((2 * np.pi) ** ndims * np.linalg.det(covariance_matrix)) # normalization to make max 1
    val = multivariate_normal(mean, covariance_matrix).pdf(x) / normalizing_factor
    val = amplitude * val  

    if noisetype=='shot':    
        # Generate random number from Poisson distribution with mean of 'val'
        n = 1  # Number of points to generate
        mu = np.abs(val)  # Mean of the Poisson distribution
        val = np.random.poisson(mu, n)[0]
    elif noisetype=='gaussian':
        # Gaussian noise proportional to  sqrt(signal)/N
        # This for example would model the average of N shot noise signals, each with a max signal of amplitude. 
        val = val + np.sqrt(np.abs(val))/ N_gaussian * np.random.randn()
    
    if background_noise is not None:
        # Constant gaussian noise
        val = val + background_noise * np.random.randn()

    val = np.abs(val)
    return -val # because minimizing


def ci_wilson(self, k, n, z=1.96, eqmode=0):
    ''' Calculates the Binomial Proportion Confidence Interval using the Wilson Score method without continuation correction
    # Equations eqmode == 1 from: https://en.wikipedia.org/w/index.php?title=Binomial_proportion_confidence_interval&oldid=1101942017#Wilson_score_interval
    # Equations eqmode == 0 from: https://www.evanmiller.org/how-not-to-sort-by-average-rating.html
    # The results should be close to:
    #    from statsmodels.stats.proportion import proportion_confint
    #    proportion_confint(k, n, alpha=0.05, method='wilson')
    #z=1.44 = 85%, 1.96 = 95% '''
    if n == 0:
        return 0
    p_hat = float(k) / n  #mean
    z2 = z**2
    if eqmode == 0:
        ci_l = (p_hat + z2/(2*n) - z*np.sqrt(max(0.0, (p_hat*(1 - p_hat) + z2/(4*n))/n))) / (1 + z2 / n)
    else:
        ci_l = (1.0 / (1.0 + z2/n)) * (p_hat + z2/(2*n)) - (z / (1 + z2/n)) * np.sqrt(max(0.0, (p_hat*(1 - p_hat)/n + z2/(4*(n**2)))))
    if eqmode == 0:
        ci_u = (p_hat + z2/(2*n) + z*np.sqrt(max(0.0, (p_hat*(1 - p_hat) + z2/(4*n))/n))) / (1 + z2 / n)
    else:
        ci_u = (1.0 / (1.0 + z2/n)) * (p_hat + z2/(2*n)) + (z / (1 + z2/n)) * np.sqrt(max(0.0, (p_hat*(1 - p_hat)/n + z2/(4*(n**2)))))
    return [ci_l, ci_u]


def binary_simulation(self, mu, Nruns):
    ''' Simulates binary data with Nruns of either 0 or 1 and mean mu.
    mu:  between 0 and 1
    Nruns: number of runs
    # Outputs simulated mean, Wilson 86% confint [lcb, ucb], and the max sigma.'''
    data = np.random.binomial(1, mu, Nruns)
    mean = np.mean(data)
    num_ones = np.count_nonzero(data == 1) 
    confint = self.ci_wilson(num_ones,data.size, z=1.44) # 1.44 for 86% conf, 1.96 for 95%
    sigma = np.max(abs(mean - confint)) # take max of lc and uc bounds
    return mean, sigma, confint


# def gaussian_benchmark(self, x, noise_level=0.1, Nruns=None):
#     ''' x can have any length, and np array or list.
#     If Nruns=None, then gaussian noise with noise_level.
#     Otherwise, noise is from binary simulation with mean and Nruns runs. 
#     ''' 
#     def objective1D(z):
#         sigma = 0.5
#         return 1/(np.sqrt(2 * np.pi)*sigma) * np.exp(-z**2 / (2 * sigma**2))# need to make better function
#     v_objective = np.vectorize(objective1D)
#     y = np.prod( v_objective(x) ) 
    
#     if Nruns == None:
#         # Random gaussian noise
#         y = y + np.random.randn() * noise_level
#         error = 0
#     else:
#         mean, error, confint = self.binary_simulation(y, Nruns) # mu is between 0 and 1
    
#     return -y, error  # make it negative