diff --git a/MCMCsampler.m b/MCMCsampler.m new file mode 100644 index 0000000..11b09fe --- /dev/null +++ b/MCMCsampler.m @@ -0,0 +1,131 @@ +% MCMCsampler.m - performs markov chain monte carlo sampling of parameters +% +% /--------------------------------------------------------------\ +% | Date: 02/16/2022 | +% | Author: Jonathan Cody | +% | Affiliation: Pienaar Computational Systems Pharmacology Lab | +% | Weldon School of Biomedical Engineering | +% | Purdue University | +% \--------------------------------------------------------------/ +% +% TOODOO +% -> validate with FitBOUNDS as inf +% -> switch everything to linear scale? +% +% This function uses the results file from 'calibrator.m' to setup parallel +% tempering MCMC. Chains are initialized from 'calibrator.m' parameter +% results. Log-uniform prior distributions are assumed (i.e. known +% parameter distributions prior to performing MCMC). These are bounded +% according to 'setup.FitBOUNDS'. +% +% NOTE: Cost function must be negative log likelihood (see 'costFun_NLL'). +% +% This uses functions from a 3rd-party toolbox (see address for citation) +% https://github.com/ICB-DCM/PESTO +% +% 'performPT.m' is called to perform parallel tempering MCMC +% https://github.com/ICB-DCM/PESTO/blob/master/private/performPT.m +% +% 'performPT.m' has function 'updateStatistics' added as subfunction +% https://github.com/ICB-DCM/PESTO/blob/master/private/updateStatistics.m +% +% 'PTOptions.m' is referenced for 'performPT.m' default options +% https://github.com/ICB-DCM/PESTO/blob/master/%40PTOptions/PTOptions.m +% +%% ======================================================================== +% INPUTS +% ======================================================================== +% +% resultsFile = file name for 'calibrator.m' results file +% +% InitialRanks = indices in 'Results' from which to pull initial parameter +% vectors (each vector will initialize one chain) +% (EX: InitialRanks = 1:5 will use 5 best parameter sets) +% +% numSample = MCMC sample size (aka number of iterations) +% +% saveEach = save period for MCMC (used to continue algorithm if stopped) +% (EX: saveEach = 5000 will save every 5000 iterations) +% (NOTE: saving too frequently will slow down algorithm) +% +%% ======================================================================== +% OUTPUTS +% ======================================================================== +% +% res.par = matrix of log values of sampled parameters +% +% res.logPost = evaluations of cost function ( *-1 ) +% +% res.opt = settings for MCMC file 'performPT.m' (see file) +% +%% ======================================================================== +% FUNCTION +% ======================================================================== +function res = MCMCsampler(resultsFile,InitialRanks,numSamples,saveEach) + +% load data files generated by 'calibrator.m' ----------------------------- + +warning off +load(resultsFile,'Results') +setup = Results(1).setup ; +warning on + +% define 'logPostHandle' input for 'performPT.m' -------------------------- + +% logPostHandle takes in log-scale of sampled parameters, converts them to +% linear scale, inserts them into the full parameter vector, and evaluates +% costFunction (similar to calibrator.m). Negative cost is returned. +logPostHandle = @(LSP) targetFun(LSP,setup) ; + + function negModelCost = targetFun(LSP,setup) + + try + warning off + ParameterFit = setup.GuessBOUNDS(1,:) ; + ParameterFit(setup.SampleIndex) = 10.^LSP ; + negModelCost = -setup.costFunction(ParameterFit) ; + warning on + catch + negModelCost = inf ; + end + end + +% define 'par' input for 'performPT.m' ------------------------------------ + +SampleBOUNDS = setup.FitBOUNDS(:,setup.SampleIndex) ;% parameter bounds +par.min = log10(SampleBOUNDS(1,:))' ;% parameter log min +par.max = log10(SampleBOUNDS(2,:))' ;% parameter log max +par.number = length(setup.SampleIndex) ;% parameter number + +% define 'opt' input for 'performPT.m' ------------------------------------ + +opt.mode = 'text' ;% only print statements +opt.debug = false ;% do not include debug outputs +opt.saveEach = saveEach ;% save every 'saveEach' iterations +opt.saveFileName = [resultsFile '_MCMC'] ; + +numberChains = length(InitialRanks) ;% number of chains +theta0 = zeros(numberChains,par.number) ;% initial points +for n = 1:numberChains + theta0(n,:) = Results(InitialRanks(n)).ParameterFit(setup.SampleIndex); +end +theta0 = log10(theta0) ; + +opt.sigma0 = cov(theta0) ;% initial covariance matrix +opt.theta0 = theta0' ;% initial points +opt.nIterations = numSamples ;% number of iterations +opt.PT.nTemps = numberChains ;% number of chains + +opt.PT.exponentT = 4 ;% see 'performPT' for description +opt.PT.alpha = 0.51 ; +opt.PT.temperatureNu = 1e3 ; +opt.PT.memoryLength = 1 ; +opt.PT.regFactor = 1e-6 ; +opt.PT.temperatureEta = 100 ; +opt.PT.maxT = 5e4 ; + +% run MCMC ---------------------------------------------------------------- + +res = performPT( logPostHandle, par, opt ) ; + +end \ No newline at end of file diff --git a/calibrator.m b/calibrator.m new file mode 100644 index 0000000..de50f9d --- /dev/null +++ b/calibrator.m @@ -0,0 +1,177 @@ +%% calibrator.m = calibrates model parameters via multi-start local-search +% +% /--------------------------------------------------------------\ +% | Date: 10/28/2021 | +% | Author: Jonathan Cody | +% | Affiliation: Purdue University | +% | Weldon School of Biomedical Engineering | +% | Pienaar Computational Systems Pharmacology Lab | +% \--------------------------------------------------------------/ +% +%% ======================================================================== +% INPUTS +% ======================================================================== +% +% setup = structure variable with fields given below +% +% setup.GuessBOUNDS = array for boundaries of initial parameter guesses --- +% EXAMPLE: GuessBOUNDS(:,p) = [minimum maximum] +% p = index for parameter +% NOTE: Enter equal minimum and maximum to fix a parameter +% +% setup.FitBOUNDS = array for boundaries of parameter fitting ------------- +% EXAMPLE: FitBOUNDS(:,p) = [minimum maximum] +% p = index for parameter +% NOTE: Enter -inf or inf for unbounded fitting of that parameter +% +% setup.costFunction = handle for cost function file ---------------------- +% Must take parameter vector and output [ model cost , extra outputs ] +% Extra outputs are optional (can be set to []) +% EXAMPLE: see 'costFun_NLL.m' +% +% setup.timeLimit = number of minutes allowed for each local search +% +% filePrefix = string to prefix to file names ----------------------------- +% EXAMPLE: filePrefix = 'myprefix' +% +% numberGuesses = number of initial parameter guesses --------------------- +% +% numberWorkers = number of parallel processes ---------------------------- +% NOTE: Enter [] or 0 for serial +% +%% ======================================================================== +% OUTPUTS +% ======================================================================== +% +% Results = structure array with fields given below ----------------------- +% Orded with lowest cost result first. +% EXAMPLE: Results(n).ParameterFit = nth lowest cost parameter set +% NOTE: Results is saved in file 'prefix_Results.mat' +% +% ParameterGuess = parameter guess used to initialize optimization +% +% ParameterFit = fitted parameters obtained from optimization +% +% modelCost = model cost when using fitted parameters +% +% extraOutput = extra model outputs (as defined by costFunction) +% +% modelCalls = number of times optimization algorithm called model +% +% loopMinutes = minutes to run optimization algorithm +% +% errorMessage = error message output by optimization algorithm +% +% setup = copy of 'setup' structure (Inputs) with one addition... +% +% setup.SampleIndex = index in GuessBOUNDS that references parameters +% that were sampled (i.e. GuessBOUND(1,p) ~= GuessBOUND(2,p) ) +% +%% ======================================================================== +% FUNCTION +% ======================================================================== +function Results = calibrator(setup,filePrefix,numberGuesses,numberWorkers) + +% Declare the index in 'GuessBOUNDS' for sampled parameters +setup.SampleIndex = find(setup.GuessBOUNDS(1,:)~=setup.GuessBOUNDS(2,:)) ; +% Declare 'setupFunction' for 'looper.m' (see 'calibrator_setup' below) +setupFunction = @() calibrator_setup(numberGuesses,setup.GuessBOUNDS) ; +% Declare 'loopFunction' for 'looper.m' (see 'calibrator_loop' below) +loopFunction = @(result) calibrator_loop(result,setup) ; + +% Execute calibration using 'looper.m' +Results = looper(filePrefix,setupFunction,loopFunction,... + numberWorkers,'modelCost') ; + +% Save 'setup' to 'Results' file +Results(1).setup = setup ; +save([filePrefix '_Results.mat'],'Results') + +end +%% ======================================================================== +% 'calibrator_setup' function +% ======================================================================== +function Results = calibrator_setup(numberGuesses,GuessBOUNDS) + +% Call 'sampler' to generate cell vector of parameter sets and vector +% of sample parameter indices (within 'GuessBOUNDS') +ParameterGuess = sampler(GuessBOUNDS,numberGuesses) ; + +% Initialize 'Results' structure array with variable field 'ParameterGuess' +Results = struct('ParameterGuess',ParameterGuess) ; + +% Include elements in 'Results' to store calibration results +[Results(:).ParameterFit] = deal([]) ; +[Results(:).modelCost] = deal([]) ; +[Results(:).extraOutput] = deal([]) ; +[Results(:).modelCalls] = deal([]) ; + +end +%% ======================================================================== +% 'calibrator_loop' function +% ======================================================================== +function result = calibrator_loop(result,setup) + +% Load variables from 'result' and 'setup' structures +ParameterGuess = result.ParameterGuess ; +GuessBOUNDS = setup.GuessBOUNDS ; +FitBOUNDS = setup.FitBOUNDS ; +costFunction = setup.costFunction ; +timeLimit = setup.timeLimit ; +SampleIndex = setup.SampleIndex ; + +% Extract sampled parameters from 'ParameterGuess' (leaving fixed) +% and convert to log-scale +LogGuess = log10( ParameterGuess(SampleIndex) ) ; + +% Extract sampled parameter fit boundaries from 'FitBOUNDS' (leaving fixed) +% and convert to log-scale (avoiding -Inf entries) +LogBOUNDS = FitBOUNDS(:,SampleIndex) ; +LogBOUNDS(LogBOUNDS~=(-Inf)) = log10( LogBOUNDS(LogBOUNDS~=(-Inf)) ) ; + +% Initialize number of model calls and start calibration timer +modelCalls = 0 ; +calibrationTime = tic ; + +% Fit parameters using optimization function 'fmincon' (see MATLAB help) +% based on target function 'targetfun' (see below) +LogFit = fmincon( @(P)targetFun(P),LogGuess,[],[],[],[],... + LogBOUNDS(1,:),LogBOUNDS(2,:),[],... + optimoptions(@fmincon,'Display','none') ) ; + +% Retrieve model cost, parameter values, costs for each data type, and +% model output for fitted parameter set +[modelCost,ParameterFit,extraOutput] = targetFun(LogFit) ; + +% Store results in structure 'result' +result.ParameterFit = ParameterFit ; +result.modelCost = modelCost ; +result.extraOutput = extraOutput ; +result.modelCalls = modelCalls ; + +% ------------------------------------------------------------------------ +% 'targetfun' function +% ------------------------------------------------------------------------ +function [modelCost,ParameterFit,extraOutput] = targetFun(P) + + % If time limit is exceeded, throw error + if toc(calibrationTime) >= 60*timeLimit + error('Optimization algorithm timed out.') + end + + % Increment number of model calls + modelCalls = modelCalls + 1 ; + + % Declare parameter vector + ParameterFit = GuessBOUNDS(1,:) ; + ParameterFit(SampleIndex) = 10.^(P) ; + + % Use a try-catch statement to continue if an error is thrown + try + [modelCost,extraOutput] = costFunction(ParameterFit) ; + catch + modelCost = nan ; + extraOutput = [] ; + end +end +end \ No newline at end of file diff --git a/calibrator_plot.m b/calibrator_plot.m new file mode 100644 index 0000000..518b5e3 --- /dev/null +++ b/calibrator_plot.m @@ -0,0 +1,351 @@ +%% calibrator_plot.m = plots results from calibrator.m +% +% /--------------------------------------------------------------\ +% | Date: 08/09/2022 | +% | Author: Jonathan Cody | +% | Affiliation: Purdue University | +% | Weldon School of Biomedical Engineering | +% | Pienaar Computational Systems Pharmacology Lab | +% \--------------------------------------------------------------/ +% +% TOODOO +% -> check that parameter plotting works for unbounded fitting +% -> switch plotting to use 'gridplot' functions +% +%% ======================================================================== +% INPUTS +% ======================================================================== +% +% Results = structure array output by 'calibrator.m' ---------------------- +% +% plotSetup = structure array with fields given below --------------------- +% +% Xdata/Ydata = cell arrays for data x-values and y-values +% EXAMPLE: X{i} = [column vector] and Y{i} = [column vector] +% +% modelFunction = handle for calling model function file +% NOTE: Must return a matrix where each column is a different output +% EXAMPLE: [ data type 1 column vector , data type 2 column vector ] +% +% CensorID = logical array showing censored data (same size as Ydata) +% (use 'prepper.m' to generate) +% +% Fits2Plot = ranks of fits to plot against data (default: top 5 fits) +% EXAMPLE: = [2 4 7] will plot 2nd, 4th, and 7th best fits +% +% nFits2Sum = number of parameter sets to summarize (default: top 50) +% +% PlotGrid = x/y arrangement of sub plots (default: automatic) +% EXAMPLE: = [1 3] will make 3 plots in one column +% +% XLabels = {'x1-axis label','x2-axis label',etc} +% +% YLabels = {'y1-axis label','y2-axis label',etc} +% +% XRANGE = y-axis ranges [ y1min y1max ; y2min y2max ; etc ] +% +% YRANGE = y-axis ranges [ y1min y1max ; y2min y2max ; etc ] +% +% PlotOuts = output types to plot (of model) +% EXAMPLE: = 1:2 will plot output 1 and output 2 +% +% plotPars = 1 will include plot of parameters (otherwise not) +% +% SubjectID = numeric array showing data subject (same size as Ydata) +% (use 'prepper.m' to generate) +% +% filePrefix = string to prefix to file names ----------------------------- +% +%% ======================================================================== +% OUTPUTS +% ======================================================================== +% +% Creates and saves plot of model outputs against data +% +% Also plots parameter values (log-normalized to parameter range) +% +%% ======================================================================== +% FUNCTION +% ======================================================================== +function calibrator_plot(Results,plotSetup,figName) + +% Load required variables from input structures +Xdata = plotSetup.Xdata ; +Ydata = plotSetup.Ydata ; +modelFunction = plotSetup.modelFunction ; +GuessBOUNDS = Results(1).setup.GuessBOUNDS ; +SampleIndex = Results(1).setup.SampleIndex ; + +% Declare default optional variables (overwrite, if specified) +CensorID = [] ;% logical array showing censored data (same size as Ydata) +Fits2Plot = 1:5 ;% ranks of fits to plot (default: top 5 fits) +nFits2Sum = 50 ;% number of parameter sets to summarize (default: top 50) +PlotGrid = [] ;% x/y arrangement of sub plots (default: automatic) +PlotOuts = 1:length(Ydata) ;% outputs to plot +plotPars = 1 ;% plot parameters +SubjectID = [] ;% numeric array showing data subject (same size as Ydata) +XLabels = [] ;% {'x1-axis label','x2-axis label',etc} +YLabels = [] ;% {'y1-axis label','y2-axis label',etc} +XRANGE = [] ;% x-axis ranges [ x1min x1max ; x2min x2max ; etc ] +YRANGE = [] ;% y-axis ranges [ y1min y1max ; y2min y2max ; etc ] +xPrecise = .1 ;% space between x values for plotting + +if isfield(plotSetup,'CensorID') ; CensorID = plotSetup.CensorID ; end +if isfield(plotSetup,'Fits2Plot') ; Fits2Plot = plotSetup.Fits2Plot ; end +if isfield(plotSetup,'nFits2Sum') ; nFits2Sum = plotSetup.nFits2Sum ; end +if isfield(plotSetup,'PlotGrid') ; PlotGrid = plotSetup.PlotGrid ; end +if isfield(plotSetup,'PlotOuts') ; PlotOuts = plotSetup.PlotOuts ; end +if isfield(plotSetup,'plotPars') ; plotPars = plotSetup.plotPars ; end +if isfield(plotSetup,'SubjectID') ; SubjectID = plotSetup.SubjectID ; end +if isfield(plotSetup,'XLabels') ; XLabels = plotSetup.XLabels ; end +if isfield(plotSetup,'YLabels') ; YLabels = plotSetup.YLabels ; end +if isfield(plotSetup,'XRANGE') ; XRANGE = plotSetup.XRANGE ; end +if isfield(plotSetup,'YRANGE') ; YRANGE = plotSetup.YRANGE ; end +if isfield(plotSetup,'xPrecise') ; xPrecise = plotSetup.xPrecise; end + +% Reformat data variables (if needed) +if ~iscell(Xdata) ; Xdata = {{Xdata}} ; end +if ~iscell(Ydata) ; Ydata = {{Ydata}} ; end + +if ~isempty(SubjectID) + for n = 1:length(Ydata) + nSub = SubjectID{n}(end) ; + Xtemp = Xdata{n} ; Xdata{n} = cell(1,nSub) ; + Ytemp = Ydata{n} ; Ydata{n} = cell(1,nSub) ; + if ~isempty(CensorID) + CIDtemp = CensorID{n} ; CensorID{n} = cell(1,nSub) ; + end + for m = 1:nSub + indx = SubjectID{n} == m ; + Xdata{n}{m} = Xtemp(indx) ; + Ydata{n}{m} = Ytemp(indx) ; + if ~isempty(CensorID) + CensorID{n}{m} = CIDtemp(indx) ; + end + end + end +end + +% ------------------------------------------------------------------------ +% Declare variables for creating summary figure +% ------------------------------------------------------------------------ + +% Declare 'FitIndex' for identifying successful fits within 'Results' +ModelCosts = cat(2,Results(:).modelCost) ; +FitIndex = ~isnan(ModelCosts) ; +numberFits = sum(FitIndex) ; +FitIndex = find(FitIndex) ; + +% If the number of parameter sets to be included in the summary figure was +% not specified or if the number specified is greater than the actual +% number of fits found, include all the fits found +if isempty(nFits2Sum) || numberFits < nFits2Sum + nFits2Sum = numberFits ; +end + +% If the number of plots specified is greater than the actual number of +% fits found, plot all the fits found +if numberFits < length(Fits2Plot) + Fits2Plot = Fits2Plot(1:numberFits) ; +end +nPlots = length(Fits2Plot) ; + +% If not specified with 'PlotGrid' input, declare the number or rows & +% columns of subplots based on the number of data types +if isempty(PlotGrid) + PlotGrid = [2 1] ; + while PlotGrid(1)*PlotGrid(2) < length(Ydata) + 1 + PlotGrid = PlotGrid + [0 1] ; + if PlotGrid(1)*PlotGrid(2) >= length(Ydata) + 1 + break + end + PlotGrid = PlotGrid + [1 0] ; + end +end + +% Declare plotted symbols and lines for data, model, and parameter plots +DataLines = {'k:o' 'k:^' 'k:s' 'k:d' 'k:p' 'k:h'}; +CensorLines = { 'o' '^' 's' 'd' 'p' 'h'}; +ModLines = {'r-' 'g-' 'b-' 'm-' 'c-' 'y-' }; +ParLines = {'r:o' 'g:o' 'b:o' 'm:o' 'c:o' 'y:o'}; + +% Define a grayscale based on model fit costs +maxCostDiff = Results(nFits2Sum).modelCost - Results(1).modelCost ; +for n = 1:nFits2Sum + costDiff = Results(n).modelCost - Results(1).modelCost ; + GrayScale{n} = [1 1 1]*0.8*(costDiff / maxCostDiff) ; +end + +% Convert fitted parameter values into log-normalized form +NormGuessRange = log10(GuessBOUNDS(2,SampleIndex)) ... + - log10(GuessBOUNDS(1,SampleIndex)) ; +NormFITS = zeros(nFits2Sum,length(SampleIndex)) ; +for n = 1:nFits2Sum + NormFITS(n,:) = ( log10(Results(n).ParameterFit(SampleIndex)) ... + - log10(GuessBOUNDS(1,SampleIndex)) )... + ./ NormGuessRange ; +end + +% Generate Xeval using minimum and maximum of data x-values +xMin = min(Xdata{1}{1}) ; +xMax = max(Xdata{1}{1}) ; +for n = 1:length(Xdata) + for m = 1:length(Xdata{n}) + xMin = min([ min(Xdata{n}{m}) xMin ]) ; + xMax = max([ max(Xdata{n}{m}) xMax ]) ; + end +end +if ~isempty(XRANGE) + xMin = min(XRANGE(:,1)) ; + xMax = max(XRANGE(:,2)) ; +end +Xeval = xMin:xPrecise:xMax ; + +% Evaluate model at Xeval +YPlots = cell(nPlots,1) ; +ErrorSkips = zeros(nPlots,1) ; +for m = 1:nPlots + try + Pars = Results(FitIndex(Fits2Plot(m))).ParameterFit ; + YPlots{m} = modelFunction(Xeval,Pars) ; + catch + ErrorSkips(m) = 1 ; + end +end + +% ------------------------------------------------------------------------ +% Generate summary figure +% ------------------------------------------------------------------------ + +disp('Plotting figure.') ; disp(' ') + +% Declare figure filename and generate figure +figName = [figName '_Plot'] ; +fig = figure('Name',figName,'Color',[1 1 1],... + 'defaultAxesColorOrder',[0 0 0;0 0 0]) ; + +p = 0 ;% plot index + +% For each output requested... +for n = PlotOuts + + % Generate subplot and layer all subsequent plots in that subplot + p = p + 1 ; + subplot(PlotGrid(1),PlotGrid(2),p) + hold on + + % Plot each data series for that type with markers and lines while + % hollowing out censored data points + for m = 1:length(Ydata{n}) + lineIndex = wrapindex(m,length(DataLines)) ; + plot( Xdata{n}{m},Ydata{n}{m},DataLines{lineIndex},... + 'MarkerFaceColor',[0 0 0],'MarkerSize',5 ) + if ~isempty(CensorID) + indx = CensorID{n}{m} ; + plot( Xdata{n}{m}(indx),Ydata{n}{m}(indx), ... + CensorLines{lineIndex}(end), ... + 'MarkerFaceColor',[1 1 1],'MarkerSize',5 ) + end + end + + % If only a subset of outputs from the summarized runs are plotted, + % then for the each parameter set indexed in 'FitIndex' and + % 'Fits2Plot', plot each model output with colored lines + for m = length(FitIndex(Fits2Plot)):-1:1 + if ErrorSkips(m) == 1 + disp(['Skipped plot number ' num2str(m) ' due to error.']) + continue + end + Yplot = YPlots{m}(:,n) ; + lineIndex = wrapindex(m,length(ModLines)) ; + if length(Fits2Plot) < nFits2Sum + plot(Xeval,Yplot,ModLines{lineIndex},'LineWidth',1.5) + else + % Otherwise, use grayscale lines + plot(Xeval,Yplot,'Color',GrayScale{m},'LineWidth',1.5) + end + end + + % Set axis limits and labels and font type and size + if ~isempty(XRANGE) ; xlim([XRANGE(n,1) XRANGE(n,2)]) + end + if ~isempty(YRANGE) ; ylim([YRANGE(n,1) YRANGE(n,2)]) + end + if isempty(YLabels) ; ylabel(['Response ' num2str(n)]) + else ; ylabel(YLabels{n}) + end + if isempty(XLabels) ; xlabel(['Regressor ' num2str(n)]) + else ; xlabel(XLabels{n}) + end + set(gca,'FontSize',11,'FontName','Arial','LineWidth',1.5) +end + +% If parameter plotting is requested... +if plotPars + + % Generate subplot and layer all subsequent plots in that subplot + subplot(PlotGrid(1),PlotGrid(2),length(PlotOuts)+1) + hold on + + % Generate x-values for plotting parameters corresponding to order + % of sampled parameters; generate x-axis tick labels + ParPlotXlabels = strsplit(num2str(SampleIndex)) ; + ParPlotXlabels = [ {'$'} ParPlotXlabels ] ; + ParPlotXvalues = 1:length(SampleIndex) ; + + % On the right axis, plot gridlines for parameter plot + yyaxis right + plot([0 length(ParPlotXvalues)+1],[-0.5 -0.5],'k:',... + [0 length(ParPlotXvalues)+1],[ 0 0 ],'k:',... + [0 length(ParPlotXvalues)+1],[ 0.5 0.5],'k:',... + [0 length(ParPlotXvalues)+1],[ 1 1 ],'k:',... + [0 length(ParPlotXvalues)+1],[ 1.5 1.5],'k:') + + % For each fit descending from 'numberFitSum' to run 1, plot the model + % cost and normalized parameters + for n = nFits2Sum:-1:1 + yyaxis left + plot(0,Results(n).modelCost,'o','MarkerEdgeColor',GrayScale{n},... + 'MarkerFaceColor',GrayScale{n}) + yyaxis right + plot(ParPlotXvalues,NormFITS(n,:),'*','MarkerEdgeColor',GrayScale{n}) + end + + % If only a subset of outputs from the summarized fits are plotted, + % then for the each fit, highlight model cost and parameters + if length(Fits2Plot) < nFits2Sum + for n = length(FitIndex(Fits2Plot)):-1:1 + lineIndex = wrapindex(n,length(ParLines)) ; + yyaxis left + plot(0,Results(FitIndex(Fits2Plot(n))).modelCost,... + ParLines{lineIndex},'MarkerSize',10) + yyaxis right + plot(ParPlotXvalues,NormFITS(FitIndex(Fits2Plot(n)),:),... + ParLines{lineIndex},'MarkerSize',10) + end + end + + % Format plot axes + yyaxis left ; ylabel('Model Cost (dots)') + yyaxis right ; ylabel('Normalized Parameters') + axis([0 length(ParPlotXvalues)+1 0 1]) + yticks(0:0.5:1) + xticks(0:length(ParPlotXvalues)) + xticklabels(ParPlotXlabels) + xlabel('Parameter Indices') + set(gca,'FontSize',11,'FontName','Arial','LineWidth',1.5) +end + +% Save figure and print message +saveas(fig,figName,'fig') +disp('Done.') ; disp(' ') + +% 'wrapindex' function +function index = wrapindex(index,period) + index = rem(index,period) ; + if index == 0 ; index = period ; end +end + +%#ok<*AGROW> + +end \ No newline at end of file diff --git a/costFun_NLL.m b/costFun_NLL.m new file mode 100644 index 0000000..d5a870a --- /dev/null +++ b/costFun_NLL.m @@ -0,0 +1,154 @@ +%% costFun_NLL = model cost calculator (NLL,SSE,MSE w/additional options) +% +% /--------------------------------------------------------------\ +% | Date: 06/25/2022 | +% | Author: Jonathan Cody | +% | Affiliation: Purdue University | +% | Weldon School of Biomedical Engineering | +% | Pienaar Computational Systems Pharmacology Lab | +% \--------------------------------------------------------------/ +% +%% ======================================================================== +% INPUTS +% ======================================================================== +% +% Parameters = vector of parameters for 'modelFunction' ------------------- +% +% modelFunction = handle for calling model function ----------------------- +% EXAMPLE: modelFunction = @(Time,Parameters) ... +% yourFunctionFilename(Time,Parameters,extraInputs) +% NOTE: Should return a matrix where each column is a different output +% EXAMPLE: [ data type 1 column vector , data type 2 column vector ] +% +% Xdata/Ydata = cell arrays for data x-values and y-values ---------------- +% EXAMPLE: X{i} = [column vector] and Y{i} = [column vector] +% i = index for data type (e.g. different cells or molecules) +% +%% ======================================================================== +% OPTIONS +% ======================================================================== +% NOTE: 'varargin' is used to add optional inputs +% EXAMPLE: costFun_NLL(P,modFun,X,Y,'Input Name',[Input Value]) +% +% censorSide = string inidicating direction of censoring ------------------ +% (details pending, see code) +% +% CensorIndex = logical index indicating data that is censored ------------ +% NOTE: Must be the same shape as Xdata/Ydata +% +% costFunction = string inidicating form of cost function ----------------- +% DEFAULT: negative log-likelihood +% +% TypeWeights = vector of weights to be applied to each data subject cost +% +% Xevaluate = vector of x-values to evaluate model function +% DEFAULT: all unique values in Xdata +% +% Xpost = vector of x-values corresponding to post-data region +% NOTE: Xpost is a cell array (one element for each data type) +% DEFAULT: Xpost is empty +% +% YpostFun = special cost function handles applied to post-data region +% NOTE: YpostFun is a cell array (one element for each data type) +% DEFAULT: YpostFun is empty cell array +% +%% ======================================================================== +% OUTPUTS +% ======================================================================== +% +% cost = scalar model cost ------------------------------------------------ +% +% TypeCosts = vector of costs for each data type (with TypeWeights applied) +% +%% ======================================================================== +% FUNCTION +% ======================================================================== +function [cost,TypeCosts] = ... + costFun_NLL(Parameters,modelFunction,Xdata,Ydata,varargin) + +% Reformat data variables (if needed) +if ~iscell(Xdata) ; Xdata = {Xdata} ; end +if ~iscell(Ydata) ; Ydata = {Ydata} ; end + +nTypes = length(Ydata) ;% number of data types + +% Declare optional input default values (then overwrite, if specified) +censorSide = 'left' ;% direction of censoring +CensorIndex = cell(size(Xdata)) ;% indices in 'Xdata' for censored data +costFunction = 'NLL' ;% type of cost function +TypeWeights = ones(1,nTypes) ;% cost weights for different data types +Xevaluate = unique(cat(1,Xdata{:})) ;% x-values to evaluate model function +Xpost = [] ;% x-values for post-data behavior +YpostFun = cell(size(Xdata)) ;% allowable range for post-data behavior + +for n = 1:length(varargin) +if strcmp('censorSide', varargin(n)) ; censorSide = varargin{n+1} ; end +if strcmp('CensorIndex', varargin(n)) ; CensorIndex = varargin{n+1} ; end +if strcmp('costFunction',varargin(n)) ; costFunction = varargin{n+1} ; end +if strcmp('TypeWeights', varargin(n)) ; TypeWeights = varargin{n+1} ; end +if strcmp('Xevaluate', varargin(n)) ; Xevaluate = varargin{n+1} ; end +if strcmp('Xpost', varargin(n)) ; Xpost = varargin{n+1} ; end +if strcmp('YpostFun', varargin(n)) ; YpostFun = varargin{n+1} ; end +end + +% Declare cost function handle +if strcmp(costFunction,'NLL') + % Negative-log-likelihood (assumes independent and identically normally + % distributed error for each data point) + costHandle = @(Ym,Yd) length(Ym)/2 ... + * ( 1 + log(2*pi/length(Ym)) + log(sum((Ym-Yd).^2)) ) ; +elseif strcmp(costFunction,'SSE') + % Sum of squared error + costHandle = @(Ym,Yd) sum((Ym-Yd).^2) ; +elseif strcmp(costFunction,'MSE') + % Mean of squared error + costHandle = @(Ym,Yd) mean((Ym-Yd).^2) ; +else + error('costFunction must either be "NLL","SSE", or "MSE".') +end + +% Run model +ModelOUTPUTS = modelFunction(Xevaluate,Parameters) ; + +% Calculate cost for each data type +TypeCosts = TypeWeights ; +for n = 1:nTypes + + % Interpolate model outputs for 'Xdata' from outputs for 'Xevaluate' + Ymod = interp1(Xevaluate,ModelOUTPUTS(:,n),Xdata{n}) ; + + % Account for censored data (if requested) by equating output to + % left/right- censored data points when the model is below/above data + % OR omit censored data from calibration entirely + if ~isempty(CensorIndex) + if any(CensorIndex{n}) + if strcmp(censorSide,'left' ) ; indx = Ymod<=Ydata{n} ; + elseif strcmp(censorSide,'right') ; indx = Ymod>=Ydata{n} ; + else ; indx = Ymod==Ymod ; + end + indx = indx & CensorIndex{n} ; + Ymod(indx) = Ydata{n}(indx) ; + end + end + + % Calculate cost for data type (with weights) + TypeCosts(n) = costHandle(Ymod,Ydata{n})*TypeWeights(n) ; +end + +% Applying post-data cost function (if requested) +if ~isempty(Xpost) + PostCosts = TypeWeights ; + for n = 1:nTypes + if isempty(YpostFun{n}) + PostCosts(n) = 0 ; + else + Ypost = interp1(Xevaluate,ModelOUTPUTS(:,n),Xpost{n}) ; + PostCosts(n) = YpostFun{n}(Ypost) ; + end + end + TypeCosts = [TypeCosts PostCosts] ; +end + +cost = sum(TypeCosts) ;% total cost + +end \ No newline at end of file diff --git a/looper.m b/looper.m new file mode 100644 index 0000000..ad92d3e --- /dev/null +++ b/looper.m @@ -0,0 +1,126 @@ +%% looper.m = loops through and operates on a structure array +% +% /--------------------------------------------------------------\ +% | Date: 10/28/2021 | +% | Author: Jonathan Cody | +% | Affiliation: Purdue University | +% | Weldon School of Biomedical Engineering | +% | Pienaar Computational Systems Pharmacology Lab | +% \--------------------------------------------------------------/ +% +%% ======================================================================== +% INPUTS +% ======================================================================== +% +% filePrefix = string to prefix to file names ----------------------------- +% EXAMPLE: filePrefix = 'myprefix' +% +% setupFun = handle of function that sets up 'Results' structure +% (see 'calibrator.m' for example) +% +% loopFun = handle of function that executes an operation on one element of +% 'Results' structure (see 'calibrator.m' for example) +% +%% ======================================================================== +% OPTIONS +% ======================================================================== +% NOTE: Set these inputs to [] to ignore them. +% EXAMPLE: Results = looper(filePrefix,setupFun,loopFun,[],[]) +% +% nWorkers = number of parallel processes (enter [] or 0 for serial) +% +% sortField = string indicating name of field in "Results' to sort by +% (ascending sorting) +% +%% ======================================================================== +% OUTPUTS +% ======================================================================== +% +% Results = structure array containing results of loop executions --------- +% EXAMPLE: Results(1).field = data stored in 'field' for loop 1 +% NOTE: Results is saved in file 'prefix_Results.mat' +% +% Results includes fields specified in your functions, with two additions +% +% loopMinutes = minutes to run that loop +% +% errorMessage = error message output by that loop +% +%% ======================================================================== +% FUNCTION +% ======================================================================== +function Results = looper(filePrefix,setupFun,loopFun,nWorkers,sortField) + +resultsFile = [filePrefix '_Results.mat'] ; + +% If results file is NOT in working directory, create results structure and +% folder for storing individual loop backup files (elements of results) +if ~exist(fullfile(cd,resultsFile),'file') + disp('Initializing new results structure.') ; disp(' ') + Results = setupFun() ; + [Results(:).loopMinutes] = deal([]) ; + [Results(:).errorMessage] = deal([]) ; + save(resultsFile,'Results') + mkdir('TEMP') ; + disp('Starting loops.') ; disp(' ') +% Otherwise, load previously saved results structure and add loop backup +% structures to results structure +else + disp('Loading existing results structure.') ; disp(' ') + load(resultsFile,'Results') + for n = 1:length(Results) + loopFile = fullfile('TEMP',['loop_' num2str(n) '.mat']) ; + if exist(loopFile,'file') == 2 + load(loopFile,'result') + Results(n) = result ; + end + end + disp('Resuming loops.') ; disp(' ') +end + +% Execute loops, skipping any that are already done +if isempty(nWorkers) ; nWorkers = 0 ; end +parfor (n = 1:length(Results) , nWorkers) + if isempty(Results(n).loopMinutes) + Results(n) = loop(Results(n),n,loopFun,sortField) + end +end + +% Reorder 'Results' based on field indicated by 'sortField' +if ~isempty(sortField) + [~,SortIndex] = sort(cat(1,Results(:).(sortField))) ; + Results = Results(SortIndex) ; +end + +save(resultsFile,'Results') ; + +% Delete individually saved run backups +if exist('TEMP','dir') + cd('TEMP') ; delete('loop_*.mat') ; cd('../') ; +end + +disp('Done!') ; disp(' ') + +end +%% ======================================================================== +% 'loop' function +% ======================================================================== +function result = loop(result,n,loopFun,sortField) + +loopTimer = tic ; + +% Use 'try-catch' to skip 'loopFun' if an error is thrown +try + result = loopFun(result) ; + disp(['Loop ' num2str(n) ' done @' datestr(now,'HH:MM:SS') '.']) +catch ME + if ~isempty(sortField) ; result.(sortField) = nan ; end + result.errorMessage = ME ; + disp(['Loop ' num2str(n) ' error @' datestr(now,'HH:MM:SS') '.']) +end + +result.loopMinutes = toc(loopTimer)/60 ; + +save( fullfile('TEMP',['loop_' num2str(n) '.mat']) ,'result') ; + +end \ No newline at end of file diff --git a/performPT.m b/performPT.m new file mode 100644 index 0000000..4e7e3b9 --- /dev/null +++ b/performPT.m @@ -0,0 +1,351 @@ +% Source: https://github.com/ICB-DCM/PESTO/blob/master/private/performPT.m +% +% Two edits from source made by Jonathan Cody: +% +% #1 Added function 'updateStatistics' as subfunction (line 325) +% https://github.com/ICB-DCM/PESTO/blob/master/private/updateStatistics.m +% +% #2 Progress printing frequency decreased (line 195) + +function res = performPT( logPostHandle, par, opt ) + +% performPT.m uses an Parallel Tempering algorithm to sample from an +% objective function 'logPostHandle'. The tempered chains are getting +% swapped. The temperatures are getting adapted as well as the proposal +% density covariance matrix. The proposal adaptation is done for each +% region separately to increase locale mixing. +% +% +% The options 'opt' cover: +% opt.theta0 : The initial parameter points for each of the +% tempered chains +% opt.sigma0 : The initial proposal covariance matrix of +% the parameters +% par.min and par.max : The lower and upper bounds for the +% parameters. Proposed points outside this +% area are getting rejected +% par.number : Number of parameters +% opt.nIterations : Number of desired sampling iterations +% opt.PT.nTemps : Number of tempered chains +% opt.PT.exponentT : The exponent of the power law for initial +% temperatures. Higher Values lead to more +% separated initial temperatures. +% opt.PT.alpha : Control parameter for adaption decay. +% Needs values between 0 and 1. Higher values +% lead to faster decays, meaning that new +% iterations influence the single-chain +% proposal adaption only very weakly very +% quickly. +% opt.PT.temperatureNu : Control parameter for adaption decay of the +% temperature adaption. Sample properties as +% described for opt.PT.alpha. +% opt.PT.memoryLength : Control parameter for adaption. Higher +% values suppress strong early adaption. +% opt.PT.regFactor : This factor is used for regularization in +% cases where the single-chain proposal +% covariance matrices are ill conditioned. +% Larger values equal stronger +% regularization. +% opt.PT.temperatureAdaptionScheme: Defines the temperature adaption scheme. +% Either 'Vousden16' or 'Lacki15'. +% opt.PT.swapsPerIter : Number of swaps between temperatures +% per iteration. +% +% +% It returns a struct 'res' covering: +% res.par : The Markov chain of the parameters for each temperature +% res.logPost : The objective value corresponding to parameter +% vector for each temperature +% res.acc : The cumulative acceptance rate of the chains +% res.accSwap : The acceptance rate of swaps between tempered chains +% res.propSwap : Number of times a swap between tempered chains +% was proposed +% res.sigmaScale : The scaling factor of the single-chain proposal +% covariance matrices, which is adapted to +% accomplish an overall 23% acceptance rate +% res.sigmaHist : Single-chain proposal covariance matrix +% res.temperatures : The temperatures of all tempered chains +% +% +% Written by Benjamin Ballnus 2/2017 + + +% Initialization +doDebug = opt.debug; + +saveEach = opt.saveEach; +saveFileName = opt.saveFileName; + +nTemps = opt.PT.nTemps; +nIter = opt.nIterations; +theta0 = opt.theta0; +sigma0 = opt.sigma0; +thetaMin = par.min; +thetaMax = par.max; +exponentT = opt.PT.exponentT; +alpha = opt.PT.alpha; +temperatureNu = opt.PT.temperatureNu; +memoryLength = opt.PT.memoryLength; +regFactor = opt.PT.regFactor; +nPar = par.number; +temperatureEta = opt.PT.temperatureEta; + + + +S = zeros(1,nTemps-2); + +if doDebug + res.par = nan(nPar, nIter, nTemps); + res.logPost = nan(nIter, nTemps); + res.acc = nan(nIter, nTemps); + res.accSwap = nan(nIter, nTemps, nTemps); + res.sigmaScale = nan(nIter, nTemps); + res.temperatures = nan(nIter, nTemps); +else + res.par = nan(nPar, nIter); + res.logPost = nan(nIter, 1); +end + +maxT = opt.PT.maxT; +T = linspace(1,maxT^(1/exponentT),nTemps).^exponentT; +beta = 1./T; + +% Special case of AM: necessary due to linspace behavior +if nTemps == 1 + T = 1; + beta = 1; +end + +acc = zeros(1,nTemps); +accSwap = zeros(1,nTemps-1); +propSwap = zeros(1,nTemps-1); +sigmaScale = ones(1,nTemps); +switch size(theta0,2) + case 1 + theta = repmat(theta0,[1,nTemps]); + case nTemps + theta = theta0; + otherwise + error('Dimension of options.theta0 is incorrect.'); +end +muHist = theta; + +% Regularization sigma0 +for l = 1:size(sigma0,3) + [~,p] = cholcov(sigma0(:,:,l),0); + if p ~= 0 + sigma0(:,:,l) = sigma0(:,:,l) + regFactor*eye(nPar); + sigma0(:,:,l) = (sigma0(:,:,l)+sigma0(:,:,l)')/2; + [~,p] = cholcov(sigma0(:,:,l),0); + if p ~= 0 + sigma0(:,:,l) = sigma0(:,:,l) + max(max(sigma0(:,:,l)))/1000*eye(nPar); + sigma0(:,:,l) = (sigma0(:,:,l)+sigma0(:,:,l)')/2; + end + end +end + +switch size(sigma0,3) + case 1 + sigmaHist = repmat(sigma0,[1,1,nTemps]); + case nTemps + sigmaHist = sigma0; + otherwise + error('Dimension of options.Sigma0 is incorrect.'); +end +logPost = nan(nTemps,1); +logPostProp = nan(nTemps,1); +for l = 1:nTemps + logPost(l) = logPostHandle(theta(:,l)); +end +sigma = sigmaHist; + +msg = ''; +timer = tic; dspTime = toc(timer); + +j = 0; +i = 1; + +% Reset Progress +if (saveEach ~= 0) && ~isempty(saveFileName) && ... + exist([saveFileName '.mat'],'file')==2 + switch opt.mode + case {'visual','text'} + disp('Restoring aborted run...') + end + try + load(saveFileName); + catch + disp('File corrupt.'); + end +end + +% Perform MCMC +while i <= nIter + + j = j + 1; % Relative Index for each Phase + + % Save progress - nice for grid computing + if (saveEach ~= 0) && ~isempty(saveFileName) && mod(i,saveEach)==0 + save(saveFileName); + end + + % Reporting Progress + switch opt.mode + case {'visual','text'} + if toc(timer)-dspTime > 2 % 0.5 reduced printing frequency - JC + fprintf(1, repmat('\b',1,numel(msg)-2)) ; + msg = ['Progress: ' num2str(i/(nIter)*100,'%2.2f') ' %%\n']; + fprintf(1,msg); + dspTime = toc(timer); + end + case 'silent' + end + + + % Do MCMC step for each temperature + for l = 1:nTemps + + % Propose + thetaProp(:,l) = mvnrnd(theta(:,l),sigma(:,:,l))'; + + % Check for Bounds + if (sum(thetaProp(:,l) < thetaMin) + sum(thetaProp(:,l) > thetaMax)) == 0 + + inbounds = 1; + + % Proposed posterior value + logPostProp(l) = logPostHandle(thetaProp(:,l)); + + else + inbounds = 0; + end + + % Transition and Acceptance Probabilities + if (inbounds == 1) && (logPostProp(l) > -inf) && (logPostProp(l) < inf) + logTransFor(l) = 1; + logTransBack(l) = 1; + pAcc(l) = beta(l)*(logPostProp(l)-logPost(l)) + logTransBack(l) - logTransFor(l); + if isnan(pAcc(l)) % May happen if the objective function has numerical problems + pAcc(l) = -inf; + elseif pAcc(l) > 0 % Do not use min, due to NaN behavior in Matlab + pAcc(l) = 0; + end + else + pAcc(l) = -inf; + end + + % Accept or reject + if log(rand) <= pAcc(l) + acc(l) = acc(l) + 1; + theta(:,l) = thetaProp(:,l); + logPost(l) = logPostProp(l); + end + + end + + % Update Proposal + for l = 1:nTemps + % Updating of mean and covariance + [muHist(:,l),sigmaHist(:,:,l)] = ... + updateStatistics(muHist(:,l), sigmaHist(:,:,l), ... + theta(:,l), ... + max(j+1,memoryLength), alpha); + sigmaScale(l) = sigmaScale(l)*exp((exp(pAcc(l))-0.234)/(j+1)^alpha); + + % Set sigma for the next iteration (recently added like this) + sigma(:,:,l) = sigmaScale(l)*sigmaHist(:, :, l); + sigma(:,:,l) = sigmaScale(l)*sigma(:,:,l); + + % Regularization of Sigma + [~,p] = cholcov(sigma(:,:,l),0); + if p ~= 0 + sigma(:,:,l) = sigma(:,:,l) + regFactor*eye(nPar); + sigma(:,:,l) = (sigma(:,:,l)+sigma(:,:,l)')/2; + [~,p] = cholcov(sigma(:,:,l),0); + if p ~= 0 + sigma(:,:,l) = sigma(:,:,l) + max(max(sigma(:,:,l)))*eye(nPar); + sigma(:,:,l) = (sigma(:,:,l)+sigma(:,:,l)')/2; + end + end + + end + + % Swaps between all adjacent chains as in Vousden16 + if nTemps > 1 + dBeta = beta(1:end-1) - beta(2:end); + for l = nTemps:-1:2 + pAccSwap(l-1) = dBeta(l-1) .* (logPost(l)-logPost(l-1))'; + A(l-1) = log(rand) < pAccSwap(l-1); + propSwap(l-1) = propSwap(l-1) + 1; + accSwap(l-1) = accSwap(l-1) + A(l-1); + % As usually implemented when using PT + if A(l-1) + theta(:,[l,l-1]) = theta(:,[l-1,l]); + logPost([l,l-1]) = logPost([l-1,l]); + end + end + end + + % Adaptation of the temperature values (Vousden 2016) + if nTemps > 1 + + % Vousden python Code & Paper + kappa = temperatureNu / ( j + 1 + temperatureNu ) / temperatureEta; + dS = kappa*(A(1:end-1)-A(2:end)); + dT = diff(1./beta(1:end-1)); + dT = dT .* exp(dS); + beta(1:end-1) = 1./cumsum([1,dT]); + end + + % Store iteration + if doDebug + res.par(:,i,:) = theta; + res.logPost(i,:) = logPost; + res.acc(i,:) = 100*acc/j; + res.propSwap = propSwap; + res.accSwap = accSwap; + res.ratioSwap = accSwap ./ propSwap; + res.sigmaScale(i,:) = sigmaScale; + res.sigmaHist = sigmaHist; + res.temperatures(i,:) = 1./beta; + else + res.par(:,i) = theta(:,1); + res.logPost(i) = logPost(1); + end + + i = i + 1; +end + +switch opt.mode + case {'visual','text'} + fprintf(1, repmat('\b',1,numel(msg)-2)) ; + case 'silent' +end + +function [mu, Sigma] = updateStatistics(mu, Sigma, theta, i, decay) +% updateStatistics updates the estimates of the sample mean and the sample +% covariance for use in adaptive Markov chain Monte-Carlo sampling. +% +% USAGE: +% [mu,Sigma] = updateStatistics(mu, Sigma, theta, i, decay, eps) +% +% Parameters: +% mu: current estimate of mean +% Sigma: current estimate of covariance +% i: generation +% decay: decay rate +% +% Return values: +% m: updated estimate of mean +% Sigma: updated estimate of covariance + +% Update rate +gamma = 1/(i^decay); + +% Updating of mu and Sigma +mu = (1-gamma)*mu + gamma*theta; +Sigma = (1-gamma)*Sigma + gamma*(theta-mu)*(theta-mu)'; + +end + +end \ No newline at end of file diff --git a/prepper.m b/prepper.m new file mode 100644 index 0000000..1a44261 --- /dev/null +++ b/prepper.m @@ -0,0 +1,124 @@ +%% prepper.m = normalizes a data set and provides several useful outputs +% +% /--------------------------------------------------------------\ +% | Date: 08/11/2022 | +% | Author: Jonathan Cody | +% | Affiliation: Purdue University | +% | Weldon School of Biomedical Engineering | +% | Pienaar Computational Systems Pharmacology Lab | +% \--------------------------------------------------------------/ +% +% TOODOO +% -> split this up into three functions? +% +%% ======================================================================== +% INPUTS +% ======================================================================== +% +% NOTE: for all input/output examples: +% i = index for data type (e.g. different cells or molecules) +% j = index for data subject (e.g. different patients or repetitions) +% +% Xdata/Ydata = cell arrays for data x/y-values --------------------------- +% EXAMPLE: Ydata{i}{j} = [column vector] (MUST BE COLUMNS!!!) +% +%% ======================================================================== +% OPTIONS +% ======================================================================== +% NOTE: Set these inputs to [] to ignore them. +% EXAMPLE: [Xdata,Ydata] = prepper(Xdata,Ydata,[],[],[],[]) +% +% DataLimits = cell array of values for data limits ----------------------- +% EXAMPLE: DataLimits{i} = scalar +% NOTE: does not distinguish between left-censored and right-censored +% +% CensorStandins = cell array of stand-in values for censored data -------- +% EXAMPLE: CensorStandins{i} = scalar +% NOTE: CensorStandins{i} = [] will omit censored data points +% +% NormIndices = cell array of indexes of y-values to normalize by --------- +% EXAMPLE: NormIndices{i} = [vector] +% NOTE: same indices are applied to each data subject +% NOTE: if NormIndices{i} = 0 , algorithm will use all Y(X<=0) +% +% IsLog = logical array indicating that data type is log-scale ------------ +% EXAMPLE: IsLog(i) = 1 for log-scale data (0 otherwise) +% +%% ======================================================================== +% OUTPUTS +% ======================================================================== +% +% Xdata/Ydata with censor-substitution and normalization applied ---------- +% NOTE: Data subjects are now combined into one vector +% EXAMPLE: Ydata{1} = {[sub 1],[sub 2],[sub 3]} is now ... +% Ydata{1} = [sub 1 ; sub 2 ; sub 3] +% +% SubjectIDs = numeric array indicating data subjects --------------------- +% EXAMPLE: SubjectIDs{1} = [ 1 1 1 1 1 2 2 2 2 2 ] if Ydata{1} +% contained two subjects with 5 data points each +% +% CensorIndex = logical index indicating data that is censored ------------ +% NOTE: Data subjects are combined into one vector (see above) +% +% NormFactors = cell array for normalization factors ---------------------- +% EXAMPLE: NormFactors{i}{j} = scalar +% +%% ======================================================================== +% FUNCTION +% ======================================================================== +function [Xdata,Ydata,SubjectIDs,CensorIndex,NormFactors] = ... + prepper(Xdata,Ydata,DataLimits,CensorStandins,NormIndices,IsLog) + +% Default optional inputs +if isempty(DataLimits) ; DataLimits = cell(size(Ydata)) ; end +if isempty(CensorStandins) ; CensorStandins = cell(size(Ydata)) ; end +if isempty(NormIndices) ; NormIndices = cell(size(Ydata)) ; end +if isempty(IsLog) ; IsLog = zeros(size(Ydata)) ; end + +% Percolate new variables +CensorIndex = Xdata ;% logical index indicating data that is censored +NormFactors = Xdata ;% normalization factors +SubjectIDs = cell(size(Xdata)) ;% numeric indicating data subjects + +% For each data type 'n' and each data subject 'm' +for n = 1:length(Ydata) + for m = 1:length(Ydata{n}) + + % Create index for data that is on 'DataLimits' (is censored) and + % make substitutions indicated by 'CensorStandins' + if isempty(DataLimits{n}) + CensorIndex{n}{m} = isnan(Ydata{n}{m}) ; + else + CensorIndex{n}{m} = Ydata{n}{m} == DataLimits{n} ; + end + if isempty(CensorStandins{n}) + Xdata{n}{m} = Xdata{n}{m}(~CensorIndex{n}{m}) ; + Ydata{n}{m} = Ydata{n}{m}(~CensorIndex{n}{m}) ; + else + Ydata{n}{m}(CensorIndex{n}{m}) = CensorStandins{n} ; + end + % Normalize data by mean of y-values indicated by 'NormIndices' + if isempty(NormIndices{n}) + NormFactors{n}{m} = [] ; + else + if NormIndices{n}(1) == 0 + NormIndices{n} = Xdata{n}{m} <= 0 ; + end + NormFactors{n}{m} = mean(Ydata{n}{m}(NormIndices{n})) ; + if IsLog(n) + Ydata{n}{m} = Ydata{n}{m} - NormFactors{n}{m} ; + else + Ydata{n}{m} = Ydata{n}{m} / NormFactors{n}{m} ; + end + end + + % Create array connecting data points to subjects + SubjectIDs{n} = [ SubjectIDs{n} ; m*ones(size(Ydata{n}{m})) ] ; + end + % Concatenate data subject vectors data type + Xdata{n} = cat(1,Xdata{n}{:}) ; + Ydata{n} = cat(1,Ydata{n}{:}) ; + CensorIndex{n} = cat(1,CensorIndex{n}{:}) ; +end + +end \ No newline at end of file diff --git a/sampler.m b/sampler.m new file mode 100644 index 0000000..e326107 --- /dev/null +++ b/sampler.m @@ -0,0 +1,93 @@ +%% sampler.m = samples a parameter space via latin hypercube sampling +% +% /--------------------------------------------------------------\ +% | Date: 06/25/2021 | +% | Author: Jonathan Cody | +% | Affiliation: Purdue University | +% | Weldon School of Biomedical Engineering | +% | Pienaar Computational Systems Pharmacology Lab | +% \--------------------------------------------------------------/ +% +% NOTE: assumes a log-uniform distribution of parameter values +% (additional distributions are pending) +% +%% ======================================================================== +% INPUTS +% ======================================================================== +% +% SampleBOUNDS = array for boundaries of parameter samples ---------------- +% EXAMPLE: SampleBOUNDS(:,p) = [minimum maximum] +% p = index for parameter +% NOTE: if minimum = maximum, parameter will be fixed +% +% numberSamples = number of parameter samples ----------------------------- +% EXAMPLE: numberSamples = 1000 +% +% varargin = cell indicating extra inputs (see MATLAB help) --------------- +% Any third input (3,'blue',425.89,etc) will cause the LHS sample to be +% saved to a file called 'sampleFile'. If 'sampler' is run again and +% 'sampleFile' is still in the directory, it will be loaded and used +% instead of generating a new sample. +% +%% ======================================================================== +% OUTPUTS +% ======================================================================== +% +% ParameterSets = cell array of parameter sets ---------------------------- +% EXAMPLE: ParameterSets{n}(p) = parameter value +% n = index for sample +% p = index for parameter +% NOTE: sample is taken from a log-uniform distribution +% +% SampleIndex = index in 'ParameterSets' that indicates sampled parameters +% EXAMPLE: +% ParameterSets{n}(SampleIndex) = vector of only sampled parameters +% +%% ======================================================================== +% FUNCTION +% ======================================================================== +function [ParameterSets,SampleIndex] = ... + sampler(SampleBOUNDS,numberSamples,varargin) + +% Declare the index in 'SampleBOUNDS' for sampled parameters +SampleIndex = find( SampleBOUNDS(1,:)~=SampleBOUNDS(2,:) ) ; + +% If sample file is NOT in working directory, make a sample +if ~exist(fullfile(cd,'sampleFile.mat'),'file') + + % Generate a latin hypercube sample matrix (0 to 1 values) using + % function 'lhsdesign' (see MATLAB help) + LatinSAMPLES = lhsdesign(numberSamples,length(SampleIndex)) ; + +else % Otherwise, load previously saved sample + disp('Loading existing sample.') ; disp(' ') + load('sampleFile.mat','LatinSAMPLES') +end + +% Save sample, if requested +if ~isempty(varargin) + save('sampleFile.mat','LatinSAMPLES') ; +end + +% Convert 'SampleBOUNDS' to a logarithmic scale +LogSampleBOUNDS = log10(SampleBOUNDS) ; + +% Declare 'LogSAMPLES' as logarithmically interpolated parameter values +LogSAMPLES = LogSampleBOUNDS(1,SampleIndex) + ... + ( LogSampleBOUNDS(2,SampleIndex) - ... + LogSampleBOUNDS(1,SampleIndex) ).* LatinSAMPLES ; + +% Convert 'LogSAMPLES' to linear scale +SAMPLES = 10.^LogSAMPLES ; + +% Combine sampled and non-sampled parameters into 'ParameterSETS' +ParameterSETS = repmat(SampleBOUNDS(1,:),numberSamples,1) ; +ParameterSETS(:,SampleIndex) = SAMPLES ; + +% Convert matrix 'ParameterSETS' into cell array 'ParameterSets' +ParameterSets = cell(numberSamples,1) ; +for n = 1:numberSamples + ParameterSets{n} = ParameterSETS(n,:) ; +end + +end \ No newline at end of file