From 032efec6125b9318239e1a543830bb8c2401e7f7 Mon Sep 17 00:00:00 2001
From: Nathanael I Lichti <nlichti@purdue.edu>
Date: Wed, 28 Jan 2026 10:43:41 -0500
Subject: [PATCH] Initial R script upload

---
 Beckett_et_al_2026_fsr_functions.R | 3415 ++++++++++++++++++++++++++++
 Beckett_et_al_2026_fsr_run.R       |   41 +
 2 files changed, 3456 insertions(+)
 create mode 100644 Beckett_et_al_2026_fsr_functions.R
 create mode 100644 Beckett_et_al_2026_fsr_run.R

diff --git a/Beckett_et_al_2026_fsr_functions.R b/Beckett_et_al_2026_fsr_functions.R
new file mode 100644
index 0000000..17955aa
--- /dev/null
+++ b/Beckett_et_al_2026_fsr_functions.R
@@ -0,0 +1,3415 @@
+## Top-level function: maxQuantFsr ---------------------------------------------
+
+#' Proteome-wide estimation of fractional synthesis rates (FSR)
+#' using deuterium labeling and MaxQuant output files
+#'
+#' @description
+#' `MaxQuantFsr` provides a user front-end to perform inference on protein 
+#' fractional synthesis rates, including estimation, confidence interval 
+#' calculation, and hypothesis testing for specified contrasts. The function
+#' is designed for an untargeted proteomics workflow based on initial 
+#' processing by MaxQuant, but it will work with any appropriately formatted
+#' input files.
+#'  
+#' @details
+#' Data processing reads the allPeptides.txt and msms.txt files from a folder
+#' containing maxQuant outputs, and joins them by the raw file name and MSMS 
+#' scan number. Sample data, including body water enrichment (BWE) and optional
+#' experimental treatment variables should be provided in an additional csv,
+#' txt, or xlsx file.  
+#' 
+#' After reading the data in, optional quality control filters are applied,
+#' potentially limiting estimation to peptides that appear in a given proportion
+#' of samples, peptides whose isotope patterns are reasonably correlated 
+#' between scans within samples, and proteins that are uniquely associated 
+#' with a given number of peptides.  Additionally, peptides represented in only 
+#' one scan (thus preventing correlation assessment) and peptides associated 
+#' with maxQuant protein groups (as opposed to unique proteins), may also be 
+#' excluded.
+#' 
+#' Estimation uses a mixture model.  Briefly, the theoretical baseline ($f0$) 
+#' and asymptotic ($fa$) mass isotopomer distributions for each pepetide are 
+#' calculated in each sample using given stable isotope abundances, numbers of 
+#' exchangable hydrogen atoms, and per-sample enriched deuterium levels. 
+#' Protein fractional synthetic rates (FSRs) are assumed to follow a 
+#' single-compartment exponential model (a the standard assumption in MIDA 
+#' analyses), and peptides are assumed to represent noisy observations from 
+#' their parent proteins. For a given protein $j$, the rate parameter $k_j$ is 
+#' estimated through a log-link linear model, and the proportion of the peptide 
+#' pool for sample $i$ that originated after labeling is defined as 
+#' $w_{ij} = 1 - exp(-k_j * t_i)$, where $t_i$ is the observation time for 
+#' sample $i$.  This implies that the observed mass isotopomer distribution is 
+#' given by $f_{ijt} = w_{ij} * fa_{ij} + (1 - w_{ij}) * f0_{ij}$. 
+#' 
+#' The model is fit by nonlinear least squares using the Levenberg–Marquardt 
+#' algorithm with standard errors calculated by the delta method. By default, 
+#' p-values are calculated for all pairwise comparisons among groups and then
+#' adjusted over all proteins using the FDR adjustment (WARNING: this has not
+#' been tested with more than two groups).   
+#' 
+#' @param mq_folder character value identifying the path for a folder where 
+#' the maxquant results files are located. If NULL, the user will be prompted
+#' to select a folder.
+#' @param sample_data a file path or a data frame containing sample times and 
+#' BWE estimates for each sample (i.e., each MaxQuant Raw file). If NULL, the 
+#' sample data are assumed to be in `file.path(mq_folder, "sampleData.csv")`.
+#' @param formula an optional right-hand model formula relating fractional 
+#' synthesis rate to experimental variables in `sample_data`. Note that this 
+#' formula defines the study design, \strong{not} the functional form of the 
+#' FSR model to be fit.
+#' @param model the FSR model to be fit (`"e1c"` = exponential one-compartment,
+#' currently the only option).
+#' @param contrasts a list providing a contrast specification (defaults to all 
+#' pairwise comparisons among unique experimental conditions defined by 
+#' `formula`).
+#' @param time_unit the unit on which time is recorded.
+#' @param protein_aliases an optional file or data frame containing two columns,
+#' `Alias` and `Name`, which identify proteins that should be combined or 
+#' renamed. The value in the`Name` column will be retained.
+#' @param msms_count a numeric value giving the minimum proportion of samples 
+#' with an isotope pattern for a given sequence (for quality control)
+#' @param scan_corr a numeric value giving the minimum within-sample correlation
+#' of isotope patterns for a given sequence when > 1 pattern is present (for 
+#' quality control)
+#' @param corr_method passed to `.isotopePatternCorrelation` and then to 
+#' `stats::cor` for quality control checks.
+#' @param use_single_scans logical; should peptides be used for estimation if
+#' they only appear in one scan, so that correlations cannot be calculated? 
+#' @param minSeq minimum number of uniquely associated peptides required to do
+#' FSR calculation on a protein
+#' @param unique_only logical; should estimation be restricted to uniquely 
+#' peptides that are uniquely associated with a single protein?
+#' @param mMax maximum number of isotopic peaks to consider
+#' @param p_adjustment method of p-value adjustment; see "?p.adjust"
+#' @param plot logical; should distribution plots be drawn?
+#' @param save logical; should results be saved to disk
+#' @param ncore number of of processing threads to use
+#' @param seed an optional random number seed. By default, set to 
+#' `as.integer(Sys.time())`
+#' @param .source the path to source file for the fsr analysis functions 
+#' (i.e., this file). This is used to read the files into child processes 
+#' during parallel processing if `ncore > 1`. Defaults to the name of the 
+#' published file in current working directory. 
+#' @param ... optional additional arguments passed to internal functions
+#' 
+#' @result
+#' A list of data frames containing rate estimates for each protein in each 
+#' group, tests comparing the rates in groups, and quality control information.
+#' These can be automatically saved to an Excel workbook by setting 
+#' `save = "xlsx"`.
+#'   
+#' @export
+maxQuantFsr <<- function(
+    mq_folder = NULL,     
+    sample_data = NULL,   
+    formula = ~ 1,        
+    model = "e1c",        
+    contrasts = list(),  
+    time_unit = "hour",
+    protein_aliases = NULL,  
+    msms_count = 0.5,     
+    scan_corr = 0.8,      
+    corr_method = "spearman",
+    use_single_scans = FALSE,
+    minSeq = 1,       
+    unique_only = TRUE,   
+    mMax = 10,   
+    p_adjustment = "fdr",
+    plot = FALSE,        
+    save = FALSE,
+    ncore = 1,
+    seed = NULL,
+    .source = "./Beckett_et_al_2026_fsr_functions.R",
+    ...
+){
+  # Using the dplyr package
+  if(!require(dplyr, quietly = TRUE)){
+    message("Installing required package: dplyr")
+    install.packages("dplyr")
+  }
+  library(dplyr)
+  
+  if(is.null(seed)) seed <- as.integer(Sys.time())
+  
+  # Set up I/O files 
+  timestamp <- Sys.Date()
+  if(is.null(mq_folder)){
+    mq_folder <- choose.dir(caption = "Please select a MaxQuant results folder")
+  }
+  results_folder <- file.path(mq_folder, "fsr_estimation")
+  if(!file.exists(results_folder)){
+    dir.create(results_folder)
+  }
+  # qcfile <- file.path( # now included with rest of output
+  #   results_folder,
+  #   paste0(paste("fsr_qc_report", timestamp, sep = "_"), ".txt")
+  # )
+  if(!any(isFALSE(save), save == c("csv","txt","xlsx"))){
+    stop("save must be FALSE or one of: \"csv\", \"txt\", or \"xlsx\"")
+  }
+  resfile <- file.path(
+    results_folder,
+    paste0(paste("fsr_results", timestamp, sep = "_"), ".", save)
+  )
+  plotfile <- file.path(
+    results_folder,
+    paste0(paste("plots", timestamp, sep = "_"))
+  )
+  if(is.null(sample_data)){
+    sample_data <- file.path(mq_folder,"sampleData.csv")
+  }
+  
+  # Read data
+  # allPeptides.txt has isotope patterns, and msms.txt has sequences and
+  # protein associations.  They are linked by the raw file (i.e., sample) 
+  # name and MSMS scan number.                                                 
+  message(paste0(round(Sys.time()), ": Starting analysis"))
+  message(paste0(
+    round(Sys.time()), ": Reading data files",
+    if(file.size(file.path(mq_folder, "allPeptides.txt")) > 1E8){
+      " (this may take a moment)"
+    }
+  ))
+  apep <- read.delim(file.path(mq_folder, "allPeptides.txt")) 
+  apep <- select(apep, Raw.file, MSMS.Scan.Numbers, Isotope.pattern) %>%
+    rowwise() %>%
+    reframe(
+      Raw.file = Raw.file,
+      Scan.number = strsplit(MSMS.Scan.Numbers, ";")[[1]],
+      Isotope.pattern = Isotope.pattern
+    )
+  msms <- read.delim(file.path(mq_folder, "msms.txt"))
+  msms <- select(msms, Raw.file, Scan.number, Sequence, Proteins, PEP) %>%
+    mutate(Scan.number = as.character(Scan.number))
+  smpl <- readDataFile(sample_data)
+  for(v in all.vars(formula)){
+    if(is.character(smpl[[v]])) smpl[[v]] <- as.factor(smpl[[v]])
+  }
+  
+  # Join tables and clean up detritus
+  pep <- left_join(msms, apep, by = c("Raw.file", "Scan.number")) %>%
+    filter(!is.na(Isotope.pattern)) # remove IP NAs?
+  rm(apep, msms); gc()
+  
+  # Number of biological replicates and MSMS count cutoff
+  sample_sizes <- groupedSampleSize(smpl, formula)
+  # nsample <- sample_sizes[1] #length(unique(pep$Raw.file))
+  msms_count <- if(length(sample_sizes) == 1){
+    ceiling(msms_count * sample_sizes[1])
+  } else {
+    ceiling(msms_count * max(sample_sizes[-1]))
+  }
+  
+  # Resolve protein annotations
+  protein_aliases <- readDataFile(protein_aliases)
+  pep <- dealiasProteins(pep, protein_aliases)
+  
+  # Run quality control checks and filter data
+  mq_proteins <- unique(pep$Proteins)
+  pep <- .checkMsmsDataQuality(pep, corr_method, scan_corr, 
+                               use_single_scans, msms_count, 
+                               smpl, formula, sample_sizes[1])
+  qcreport <- attr(pep, "qc")
+  attr(pep, "qc") <- NULL
+  
+  # Select best isotope pattern (from scan with lowest PEP score / sequence)
+  # and join the sample data to the MSMS data
+  pep <-  suppressMessages(
+    pep %>% group_by(Raw.file, Sequence) %>% summarize(
+      Proteins = Proteins[which.min(PEP)],
+      Isotope.pattern = Isotope.pattern[which.min(PEP)]
+    ) %>% ungroup() #%>%
+  )
+
+  # Split into groups of related proteins/peptides
+  groups <- with(
+    suppressMessages(
+      summarize(group_by(pep, Sequence), Proteins = unique(Proteins))
+    ),
+    findProteinFamilies(Sequence, Proteins, ret_index = TRUE)
+  )
+  pep <- full_join(pep, groups, by = c(Sequence = "peptide"))
+browser()
+  pep <- split(pep, f = factor(pep$group))
+  
+  message(paste0(round(Sys.time()), ": Estimating fractional synthesis rates"))
+  cl <- setupCluster(ncore, seed)              # set up parallel processing & rn seed
+  pbo <- pbapply::pboptions(use_lb = TRUE)     # use load balancing
+  on.exit(pbapply::pboptions(pbo), add = TRUE) # restore default options on exit
+  
+  if(!is.null(cl)){
+    parallel::clusterCall(cl, source, file = .source)
+    parallel::clusterCall(cl, library, package = "dplyr", character.only = TRUE)
+  }
+
+  results <- pbapply::pblapply(pep, .doMaxQuantFsr, formula, smpl, 
+                               minseq = minSeq, mMax = mMax, model= model, 
+                               contrasts = contrasts, plot = plot, 
+                               unique_only = unique_only, ..., 
+                               cl = cl)
+  plots <- .getFsrPlots(results)
+  
+  results <- .segregateProteinGroups(
+    aggregateDataFrameList(results, exclude = "plot")
+  )
+  if(!is.null(results$tests)){
+    results$tests$p.adj <- p.adjust(results$tests$p.value, p_adjustment)
+  }
+  
+  notrun <- list(`not_run` = setdiff(mq_proteins, results$rates$Protein))
+  qcreport <- c(qcreport, .writeQcAddendum(results, notrun))
+  
+  message(paste0(round(Sys.time()), ": Analysis complete"))
+  if(!isFALSE(save)){
+    if(save == "xlsx"){
+      names(qcreport)[1] <- "quality control"
+      names(notrun)[1] <- "not run"
+    }
+    writeAnalysisResults(
+      c(
+        .writeFsrReadme(time_unit, unique_only, p_adjustment, ...),
+        results,
+        qcreport[1], 
+        notrun,
+        list(session = .buildSessionInfo(ncore, seed))
+      ), 
+      resfile
+    )
+    ext <- paste0(".", save)
+    if(ext == ".xlsx"){
+      writeAnalysisResults(qcreport[-1], resfile, append = TRUE, 
+                           sheet = "quality control")
+    } else {
+      resfile <- sub(ext, paste0("_quality_control", ext), resfile)
+      writeAnalysisResults(qcreport[-1], resfile, append = TRUE, 
+                           col.names = FALSE)
+    }
+    resfile <- dirname(resfile)
+    savePlotList(plots, file.path(resfile, "plots"), scale = 3)
+    message(paste0("Results are in ", resfile))
+  }
+  if(length(plots) > 0) results$plots <- plots
+  return(results)
+}
+
+## Assorted utility functions --------------------------------------------------
+
+#' Read standard data file formats
+#' 
+#' @description
+#' A convenience function to read data files into R from a variety of
+#' standard formats, including space, tab, and comma-delimited text, as
+#' well as Excel.
+#' 
+#' @details
+#' If `x` is a character vector with only one element, it is interpreted as 
+#' a file path and the corresponding file is read into R based on its file
+#' extension (\code{\link{readxl::read_xlsx}} is used for `".xlsx"`, 
+#' \code{\link{read.csv}} for `".csv"`, and \code{\link{read.delim}} for
+#' `".txt"`).  If `x` contains a second element, it will be interpreted as
+#' a tab name or tab number if the first element points to an Excel workbook,
+#' and ignored with a warning otherwise. If `x` is (strictly, inherits from)
+#' a data frame, it will simply be returned as-is. 
+#' 
+#' @param x a character vector or a data frame; see Details.
+#' 
+#' @result
+#' A data frame.
+#' 
+#' @seealso
+#' \code{\link{readxl::read_xlsx}}, \code{\link{read.csv}}, and 
+#' \code{\link{read.delim}}
+#' 
+readDataFile <- function(x){
+  if(inherits(x, "data.frame") || is.null(x)) return(x)
+  if(!is.character(x)) 
+    stop("Data file x must be a character vector or a data frame.")
+  f <- x[1]
+  if(grepl(".xlsx", f)){
+    if(length(x) > 1) tb <- x[2] else(tb <- 1)
+    if(!is.na(suppressWarnings(as.numeric(tb)))) tb <- as.numeric(tb)
+    readxl::read_xlsx(f, tb)
+  } else {
+    if(length(x) > 1) 
+      warning("Only the first file name in data file vector will be used.")
+    if(grepl(".csv", f)){
+      read.csv(f)
+    } else {
+      if(grepl(".txt", f)){
+        read.delim(x)
+      } else {
+        stop("Data file should be one of: data frame, .xlsx, .csv, .txt")
+      }
+    }
+  }
+}
+
+#' Write the results of an analysis to a csv, txt, or xlsx file
+#' @param results a matrix, data frame, or list (possibly with > 1 data frame)
+#' @param file path to the output file, including a .csv, .txt, or .xlsx extension
+#' @param append,row.names,col.names logical >> self explanatory
+#' @param sheet passed to openxlsx::write.xlsx
+writeAnalysisResults <<- function(results, file, append = FALSE, sheet = NULL,
+                                  row.names = FALSE, col.names = TRUE){
+  if(inherits(results, c("matrix","data.frame")) || 
+     (is.vector(results) & !is.list(results))){
+    results <- list(result = results)
+  }
+  if(!is.list(results) || is.null(names(results))){
+    stop("results should be a named list; maybe try list(result = results)?")
+  }
+  if(!is.character(file)){
+    stop("file must be a character value")
+  }
+  file <- file[1]
+  fileext <- tools::file_ext(file)
+  if(!append){
+    switch(
+      fileext,
+      csv = {
+        file <- gsub(".csv", "", file, fixed = TRUE)
+        lapply(names(results), function(i){
+          write.table(
+            results[[i]], 
+            file = paste0(file, "_", i, ".csv"), 
+            sep = ",", row.names = row.names, col.names = col.names
+          )
+        })
+      },
+      txt = {
+        file <- gsub(".txt", "", file, fixed = TRUE)
+        lapply(names(results), function(i){
+          write.table(
+            results[[i]], 
+            file = paste0(file, "_", i, ".txt"), 
+            sep = "\t", row.names = row.names, col.names = col.names
+          )
+        })
+      },
+      xlsx = openxlsx::write.xlsx(results, file = file),
+      stop(paste0(
+        fileext, " is not a recognized file extension.\n",
+        "Please use .txt (tab-delimited), .csv (comma-delimited), or ",
+        ".xlsx (Excel)."
+      ))
+    )
+  } else {
+    switch(
+      fileext,
+      csv = suppressWarnings(
+        lapply(results, write.table, file = file, row.names = row.names, 
+               col.names = col.names, append = TRUE, sep = ",")
+      ),
+      txt = suppressWarnings(
+        lapply(results, write.table, file = file, row.names = row.names, 
+               col.names = col.names, append = TRUE, sep = "\t")
+      ),
+      xlsx = {
+        wb <- openxlsx::loadWorkbook(file)
+        if(!is.null(sheet)){
+          if(!any(openxlsx::sheets(wb) == sheet)){
+            openxlsx::addWorksheet(wb, sheet)
+          }
+          for(i in 1:length(results)){
+            sr <- .get_xlsx_dims(wb, sheet)[1] + 1
+            openxlsx::writeData(wb, sheet, results[[i]], startRow = sr)
+          }
+        } else {
+          for(i in names(results)){
+            openxlsx::addWorksheet(wb, i)
+            openxlsx::writeData(wb, sheet = i, results[[i]])
+          }
+        }
+        openxlsx::saveWorkbook(wb, file, overwrite = TRUE)
+      },
+      stop(paste0(
+        fileext, " is not a recognized file extension.\n",
+        "Please use .txt (tab-delimited), .csv (comma-delimited), or ",
+        ".xlsx (Excel)."
+      ))
+    )
+  }
+  invisible(TRUE)
+}
+
+#' Calculate theoretical mass isotopomer distributions for peptides 
+#' based on their amino acid sequences
+#' 
+#' @description
+#' Generates a list of theoretical isotope distributions with a baseline
+#' and asymptotic, enriched distribution for each input peptide sequence.
+#' 
+#' @details
+#' `peptidesToIsotopePatterns` is a flexible front-end for calculating 
+#' theoretical mass isotopomer distributions (MIDs, aka isotope patterns or  
+#' isotope distributions) for peptides. At minimum, users must supply a 
+#' character vector (`x`) containing the target peptide sequences. Optionally,
+#' users can also set the maximum number of output peaks (`nMax`, defaults to
+#' 10) and can supply enriched molar fractions for one or more heavy isotopes.
+#' These are supplied as a named list to the `label` argument.  For example,
+#' `label = list(H2 = 0.02)` will generate a pattern with the assumption that 
+#' deuterium has been enriched to 2%. If a `label` is supplied, the function 
+#' will return a baseline (unlabeled) MID as well as a labeled MID for each 
+#' sequence.  Otherwise, only the baseline MID is returned.   
+#' 
+#' MIDs are calculated based on the information contained in `aatable` and
+#' `fractions`.  The first of these data frames lists the amino acids. It must 
+#' include a column named `formula` that provides the molecular formula for
+#' each amino acid, and must have `row.names` property that identifies the 
+#' amino acid by its standard letter symbol. It can also include columns named 
+#' for elements (i.e., "C", "H", etc.) that provide the number of atoms from  
+#' each element that are accessible to the label (if the value is `NA`, all 
+#' atoms are assumed to be accessible). The default table, 
+#' \code{\link{amino_acids}}, only provides numeric values for hydrogen.   
+#' 
+#' The `fractions` data frame lists heavy isotopes and (at a minimum) the 
+#' molar fractions to be used in calculating the baseline MID. It must include
+#' a `row.names` attributute that identifies the isotopeby elemental symbol and 
+#' mass number (e.g., "C13"), and must include at least one column containing
+#' molar fraction data for each isotope.  Only heavy isotopes should be listed.
+#' Monoisotopes are assumed to make up the remaining potion for each element.
+#' The name of the variable containing the molar fractions should be supplied 
+#' to the argument `fraction_column`. By default, `fractions = stable_isotopes`
+#' and `fraction_column = "nist"`.  See \code{\link{stable_isotopes}} for 
+#' potential alternatives (currently, only `"iupac"`). If they have good data 
+#' available, users are encouraged to provide their own, study-specific molar
+#' fractions, as this will generally lead to more accurate MIDs and improved
+#' rate estimates.
+#' 
+#' By default, all of the isotopes listed in `fractions` will be considered in
+#' MID calculations. Specifically, the molar fractions listed in the `fractions`
+#' table are used to generate the baseline distribution. Then if a `label` is
+#' provided, it is used to replace the relevant isotopic fraction and the MID
+#' is recalculated, with the number of label-accessible atoms determined by 
+#' `aatable`. If desired, the user can use the `isotopes` argument to limit the
+#' set of isotopes included in the MID calculations (e.g., setting 
+#' `isotopes = c("C13","H2")` would only include carbon-13 and deuterium in the
+#' calculations). This is not encouraged, but may be useful for comparisons 
+#' with other software. 
+#' 
+#' @param x  character vector of amino acid sequences
+#' @param nMax the maximum number of peaks to retain in the isotope pattern 
+#' (i.e., the maximum nominal mass difference of interest + 1 for the 
+#' monoisotopic peak)
+#' @param label a named list giving enrichment values for isotopic labels
+#' (e.g. `list(H2 = 0.02)` for a 2% deuterium label)
+#' @param aatable a data frame of amino acid properties, passed to 
+#' \code{\link{.atomsToPattern}}.  See \code{\link{amino_acids}}.
+#' @param fractions data frame of isotope properties, passed to 
+#' \code{\link{.atomsToPattern}}. See \code{\link{stable_isotopes}}.
+#' @param fraction_column name of the variable in `fractions` that contains the
+#' molar fractions of different isotopes (defaults to `"nist"`). 
+#' @param isotopes character vector of stable, heavy isotopes to be considered 
+#' in the isotopomer pattern calculations. Defaults to all isotopes in 
+#' `fractions`.
+#' 
+#' @result
+#' A named list with one component for each input peptide.  Each component is
+#' also a named list with `$baseline` and (if a `label` is specified) 
+#' `$asymptotic` components, each of which are numeric vectors of length `nMax`,
+#' normalized to sum to 1.
+#' 
+peptidesToIsotopePatterns <- function(x, nMax = 10, label = list(), 
+                                      aatable = amino_acids, 
+                                      fractions = stable_isotopes,
+                                      fraction_column = "nist",
+                                      isotopes = rownames(fractions)){
+  m <- 0:(nMax - 1)
+  x <- setNames(x,x)
+  x <- strsplit(x, "")
+  fractions[["molar_fraction"]] <- fractions[[fraction_column]]
+  lapply(x, .atomsToPattern, m, label, isotopes, aatable, fractions)
+}
+
+#' Extract an isotope distribution from a list returned by 
+#' \code{peptidesToIsotopePatterns}
+#' 
+#' @description
+#' A utility to make it easier to access the isotope patterns for a 
+#' specific peptide sequence
+#' 
+#' @details
+#' This is a convenience function to access specific theoretical isotope
+#' distributions after they have been generated.  It is used internally (with 
+#' \code{\link{mapply}}) to package the theoretical distributions to be passed
+#' to the estimation functions. 
+#' 
+#' @param x a named list containing isotope patterns for one or more peptide
+#' sequences, as returned by \code{\link{peptidesToIsotopePatterns}}
+#' @param sequence a character value specifying the sequence to be extracted 
+#' (should match the name of an element in `x`); scalar numeric indices are  
+#' also accepted.
+#' @param sample the name or index of the sample whose pattern is to be 
+#' extracted
+#' @param pattern character value identifying the type of pattern to be 
+#' extracted
+#' 
+#' @result
+#' a numeric vector containing the isotope distribution
+#' 
+#' @seealso
+#' \code{\link{peptidesToIsotopePatterns}}
+#' 
+getPattern <- function(x, sequence, sample, pattern = c("baseline","asymptotic")){
+  x[[sequence]][[sample]][[pattern]]
+}
+
+#' Count the number of atoms in a compound, by element
+#' 
+#' @description
+#' Converts chemical formulas into a table of atomic counts
+#' 
+#' @details
+#' `x` is a character vector of chemical formulas of the form "C4H6N1O2" (note
+#'  that all elements include a count in the formula). `countAtoms` parses the
+#'  formulas and returns a matrix with one row per compound and one column for 
+#'  each of the elements C, H, O, N, P, S, F, Cl, Br, and I (other elements may
+#'  be added in future versions). Values in the matrix are atomic counts.
+#'  If `x` is named, the compound names will be preserved as row names in 
+#'  the output.
+#'  
+#'  @param x character vector of molecular formulas
+#'  
+#'  @result
+#'  matrix of atomic counts (see Details)
+#'    
+countAtoms <- function(x){
+  elements <- c("C","H","O","N","P","S","F","Cl","Br","I")
+  pos <- sapply(elements, stringr::str_locate_all, string = x)
+  if(is.vector(pos)) pos <- t(pos)
+  cnt <- outer(1:nrow(pos), 1:ncol(pos), .getElementCount, pos = pos, x = x)  
+  dimnames(cnt) <- list(names(x), elements)
+  cnt
+}
+
+#' Calculate the theoretical isotope pattern generated by 
+#' a given element in a compound
+#' 
+#' @description
+#' Calculates an isotope distribution for a given element based on the number 
+#' of atoms present in a compound (n), a given set of stable heavy isotope 
+#' fractions (p), and the nominal mass differences from the monoisotope for
+#' those heavy isotopes (d).
+#' 
+#' @details
+#' The elemental isotope distribution is calculated as Multinom(n, p). Note 
+#' that this function uses the vectorized \code{\link{dmultinomial}} function,
+#' not \code{\link{stats::dmultinom}}.
+#' 
+#' @param n numeric vector giving the number of atoms for each element
+#' @param p numeric vector giving isotope fractions for each element
+#' @param d numeric vector giving nominal mass differences for each isotope
+#' 
+#' @result
+#'  A numeric vector describing the isotope distribution.
+#'  
+elementalIsotopomerPattern <- function(n, p, d){
+  stopifnot(length(p) == length(d))
+  if(length(p) == 1){
+    m <- 0:n
+    setNames(dbinom(m, n, p), d * m)
+  } else {
+    p <- c(1 - sum(p), p)
+    d <- c(0, d)
+    M <- eval(parse(
+      text = paste0(
+        "expand.grid(", paste(rep("0:n", length(p)), collapse = ","), ")"
+      )
+    ))
+    M <- as.matrix(M)[rowSums(M) == n,]
+    D <- colSums(t(M) * d)
+    P <- aggregate(dmultinomial(M, n, p), list(D), sum)
+    setNames(P[,2], P[,1])
+  }
+} 
+
+#' Dealias proteins
+#' 
+#' @description
+#' Rename proteins in a dataset to resolve aliased annotations
+#' 
+#' @details
+#' To be filled in...
+#' 
+#' @param x
+#' @param alias_table
+#' @param col
+#' 
+dealiasProteins <<- function(x, alias_table = NULL, col = "Proteins"){
+  if(is.null(alias_table)) return(x)
+  pp <- x[[col]]
+  pp <- strsplit(pp, ";")
+  truenames <- with(alias_table, setNames(Name, Alias))
+  for(i in 1:nrow(alias_table)){
+    pp <- lapply(pp, function(p){
+      p[p %in% names(truenames)] <- truenames[p %in% names(truenames)]
+      paste(unique(p), collapse = ";")
+    })
+  }
+  x[[col]] <- do.call(c, pp)
+  x
+}
+
+#' Identify peptide-protein families 
+#' 
+#' @description
+#' 
+#' @details
+#' This function decomposes a vector of unique peptide sequences and a 
+#' corresponding vector of associated proteins into a list of subvectors, 
+#' where each list-element describes a family of related proteins and peptides.
+#' This means that each protein in the group is linked to at least one other 
+#' protein by a shared peptide. Peptides that are unique to a protein are 
+#' connected to the group through the protein's other associated peptides. 
+#' Peptides or proteins that appear in different list elements have no links 
+#' and can be treated independently of one another.
+findProteinFamilies <- function(peptides, proteins, sep = ";", 
+                                ret_index = FALSE){
+  stopifnot(length(proteins) == length(peptides))
+  stopifnot(is.character(peptides))
+  stopifnot(is.character(proteins))
+  if(any(table(peptides) > 1)){
+    stop("peptides cannot include duplicate values")
+  }
+  proteins <- setNames(proteins, peptides)
+  if(ret_index) refpep <- peptides
+  out <- list()
+  i = 1
+  while(length(peptides) > 0){
+    pepi <- peptides[1]
+    proti <- strsplit(proteins[1], sep)[[1]]
+    j <- 1
+    while(j <= length(proti)){
+      add <- grep(proti[j], proteins)
+      pepi <- unique(c(pepi, peptides[add]))
+      proti <- unique(c(proti, unlist(sapply(proteins[add], strsplit, sep))))
+      j = j + 1
+    }
+    if(ret_index){
+      out[[i]] <- cbind(
+        peptide = pepi, #match(pepi, refpep)
+        group = rep(i, length(pepi)) 
+      )
+    } else {
+      out[[i]] <- list(peptide = pepi, proteins = proteins[pepi])
+    }
+    proteins <- proteins[!peptides %in% pepi]
+    peptides <- peptides[!peptides %in% pepi]
+    i <- i + 1
+  }
+  if(ret_index){
+    out <- as.data.frame(do.call(rbind, out))
+    # out[order(out[,"row"]), "group"]
+  } #else {
+  out
+  # }
+}
+
+#' Construct a peptide-protein association matrix 
+#' 
+#' @description
+#' 
+#' @details
+#' Given a vector of unique peptide sequences and a corresponding vector that 
+#' identifies the proteins associated with each peptide, this function builds 
+#' a binary matrix that indicates which peptides are associated with which 
+#' proteins. The peptide vector should not contain duplicate values.  
+#' 
+#' If a peptide is associated with multiple proteins, they should all be listed
+#' in the corresponding element of `proteins`, separated by the `sep` character.
+#' The defaults are set up to accept the `Sequence` and `Proteins` columns found
+#' in MaxQuant output tables.
+#' 
+#' \strong{Caution}: running `pepToProt` on a large dataset is very 
+#' memory-inefficient.  Use \code{\link{findProteinFamilies}} first to identify 
+#' subsets of related peptides and proteins, then run `pepToProt` separately 
+#' on each family.
+#' 
+#' TODO: Sparsify this thing!
+#' 
+#' @param peptides a chracter vector of unique peptide sequences
+#' @param proteins a character vector giving the proteins associated with each 
+#' peptide, seperated within each element by `sep` if >1 protein association
+#' exists.
+#' @param sep a character separating protein names within elements of `proteins`
+#' 
+pepToProt <<- function(peptides, proteins, sep = ";"){
+  stopifnot(length(proteins) == length(peptides))
+  stopifnot(is.character(peptides))
+  stopifnot(is.character(proteins))
+  if(any(table(peptides) > 1)){
+    stop("peptides cannot include duplicate values")
+  }
+  if(!isFALSE(sep)){
+    proteins <- setNames(strsplit(proteins, sep), peptides)
+  } else {
+    proteins <- setNames(proteins, peptides)
+  }
+  uprot <- sort(unique(unlist(proteins)))
+  .P2PMAT <- matrix(0L, length(peptides), length(uprot), 
+                    dimnames = list(peptides, uprot))
+  # .P2PMAT <- SparseM::as.matrix.csr(.P2PMAT)  # this does not help much
+  sapply(peptides, function(i) {
+    .P2PMAT[i, proteins[[i]]] <<- 1L
+    invisible()
+  })
+  # SparseM::as.matrix.csr(.P2PMAT)
+  .P2PMAT
+}
+
+#' A vectorized version of the multinomial probability mass function
+#' 
+#' @description
+#' `dmultinomial` provides a vectorized version of `stats::dmulitnom`
+#'
+#' @details
+#' `x` is vector-valued, so that n observations each with k possible outcomes
+#' should be represented as an n x k matrix. `size` sets the number of draws 
+#' per row in `x` (recycled if necessary). `prob` is a vector of length k (or
+#' a matrix of length-k row vectors) giving probabilities of different outcomes
+#' (rows recycled if necessary).
+#' 
+#' Trials with different n and k values can be run by ensuring that 
+#' `ncol(x) = max(k)` and that `sum(x[i,]) == n[i]` for all `i`, and setting 
+#' `prob[i, not_possible] = 0`.
+#' 
+#' @param x an n x k numeric matrix giving the number of observed outcomes
+#' from k classes in n multinomial trials.
+#' @param size a numeric vector giving the number of draws per trial
+#' @param prob a numeric vector of length k or a (.) x k matrix giving the 
+#' probability of each outcome (or the probabilities in different trials, if
+#' `prob` is a matrix)
+#' @param log logical. Should the result be reported on the natural log scale?
+#'   
+#' @result
+#' A vector of (log) probabilities for each trial.
+#' 
+#' @seealso
+#' \code{\link{dmultinom}} for a non-vectorized version.
+
+dmultinomial <- function(x, size, prob, log = FALSE){
+  if(is.vector(x)) x <- t(x)
+  if(is.vector(prob)) prob <- t(prob)
+  lout <- max(c(length(size), nrow(x), nrow(prob)))
+  if(length(size) < lout){
+    size <- rep(size, len = lout)
+  } 
+  stopifnot(all(rowSums(x) == size))
+  stopifnot(ncol(x) == ncol(prob))
+  stopifnot(all(prob >= 0))
+  if(any(rowSums(prob) != 1)){
+    warning("Some sum(prob) != 1; they will be normalized.")
+    prob <- prob/rowSums(prob)
+  } 
+  if(nrow(prob) < lout){
+    prob <- prob[rep(1:nrow(prob), len = lout),]
+  } 
+  if(nrow(x) < lout){
+    x <- x[rep(1:nrow(x), len = lout),]
+  }
+  lprob <- lgamma(size+1) - rowSums(lgamma(x+1)) + rowSums(x * log(prob))
+  if(log) lprob else (exp(lprob))
+} 
+
+# Restructure a list of data frames
+aggregateDataFrameList <<- function(x, label = NULL, add_row_names = FALSE,
+                                    exclude = NULL){
+  if(!is.null(label)){
+    arn <- add_row_names
+    add_row_names <- TRUE
+  }
+  full <- sapply(x, length)
+  nms <- setdiff(names(x[[which.max(full)]]), exclude)
+  out <- lapply(setNames(nms, nms), function(i){
+    xi <- lapply(x, "[[", i)
+    do.call(rbind, append(xi, list(make.row.names = add_row_names)))
+  })
+  if(!is.null(label)){
+    lapply(out, function(i){
+      i <- cbind(setNames(list(rownames(i)), label[1]), i)
+      if(!arn) rownames(i) <- NULL
+      i
+    })
+  } else {out}
+}
+
+# identify experimental groups for samples in data frame x, using variables
+# in a formula, and get the sample sizes of the groups
+getDesignGroups <<- function(x, formula, ...){
+  vars <- all.vars(formula)
+  if(length(vars) == 0){
+    return(c(Total = nrow(x)))
+  }
+  formula <- formula(paste0("~", paste(vars, collapse = "+")))
+  mf <- model.frame(formula = formula, data = x, ...)
+  for(i in names(mf)){ mf[,i] <- paste0(i, ":", mf[,i])}
+  apply(mf, 1, paste, collapse = ";")
+} 
+
+groupedSampleSize <<- function(x, formula, ...){
+  cases <- getDesignGroups(x, formula, ...)
+  c(Total = length(cases), table(cases))
+}
+
+# save a list of plots to a directory
+savePlotList <<- function(plots, directory = tempdir(), format = "png", 
+                          verbose = FALSE, ...){
+  nms <- names(plots)
+  if(length(nms) > 0 && !dir.exists(directory)) dir.create(directory)
+  if(length(nms) == 0 && length(plots) > 0){
+    nms <- paste0("Rplot", seq_along(plots))
+    names(plots) <- nms
+  }
+  expr <- "
+  lapply(nms, function(i){
+    ggplot2::ggsave(
+      plots[[i]], 
+      file = file.path(directory, paste0(i, '.', format)),
+      ...
+      )
+  })
+  "
+  if(isTRUE(verbose)){
+    eval(parse(text = expr))
+  } else {
+    suppressMessages(suppressWarnings(eval(parse(text = expr))))
+  }
+  invisible(TRUE)
+}
+
+# set up a parallel processing cluster
+setupCluster <<- function(ncore = 1, seed = NULL,
+                          exports = NULL, evals = NULL, calls = NULL,
+                          cleanup = TRUE, add = FALSE, after = TRUE,
+                          ...){
+  if(is.null(seed)) seed <- as.integer(Sys.time())
+  if(ncore > 1){
+    cl <- parallel::makeCluster(ncore)
+    parallel::clusterSetRNGStream(cl, iseed = seed)
+    if(!is.null(exports)){
+      parallel::clusterExport(cl, exports, envir = parent.frame())
+    }
+    if(!is.null(evals)){
+      for(i in evals) parallel::clusterEvalQ(cl, i)
+    } 
+    if(!is.null(calls)){
+      for(i in calls) parallel::clusterCall(cl, i, ...)
+    }
+    if(!identical(parent.frame(), globalenv()) && cleanup[1]){
+      do.call(
+        on.exit, 
+        list(expr = "parallel::stopCluster(cl)", add = add, after = after)
+      )
+    }
+  } else {
+    cl <- NULL
+    if(!identical(parent.frame(), globalenv()) && cleanup[1]){
+      assign("..rnseed", .Random.seed, envir = parent.frame())
+      do.call(
+        on.exit, 
+        list(expr = "set.seed(..rnseed)", add = add, after = after)
+      ) 
+    }
+    set.seed(seed)
+  }
+  return(cl)
+}
+
+## Internal functions called by maxQuantFsr ------------------------------------
+
+#' Fit an FSR model and obtain results for each protein (or protein group) in
+#' an untargeted proteomics dataset (internal function).
+#' 
+#' @description
+#' Perform inference on FSR for all of the proteins and study conditions
+#' listed in a dataset.
+#' 
+#' @details
+#' This is an internal function. It is not intended to be called directly 
+#' by users.  
+#' 
+#' The function generates a correspondence table that links peptides (rows)
+#' to proteins (or protein groups; columns), and then performs inference for 
+#' each protein that has at least `minseq` uniquely associated peptides (if
+#' no proteins meet this requirement, a value of `NULL` is returned).
+#' 
+#' Processing proceeds as follows:
+#' \enumerate{
+#'   \item{Theoretical baseline and asymptotic distributions are generated once
+#'   for each combination of peptide sequence and enrichment value and then 
+#'   called as needed during model fitting.}
+#'   \item{For each protein (group), \code{\link{.packageFsrData}} is called to 
+#'   synthesize an input list that contains the relevant data, theoretical 
+#'   isotope distributions, and model specifications.
+#'   }
+#'   \item{The input package is passed to \code{\link{.randStart}}, which 
+#'   fits the specified model.}
+#'   \item{The fitted model object, input package, and specifications for
+#'   `contrasts` are passed to \code{\link{.formatFsrResult}}, which returns 
+#'   a list of inference results.}
+#'   \item{If requested, a plot is generated for the protein and appended to
+#'   the result list.}
+#'   \item{Results for all proteins are aggregated into tables organized by
+#'   inference type (see \strong{Results})}.
+#' }  
+#' 
+#' @param x a data frame containing (at minimum) columns titled `Sequence`, 
+#' `Proteins`, `Isotope.pattern`, and `Raw.file`
+#' @param formula a RHS model formula referencing variables in `sample_data` 
+#' (can be `~ 1` for intercept-only models)
+#' @param sample_data a data frame with annotation information for each 
+#' specimen; minimally, it must include columns for `File`, `BWE`, and `Time`
+#' @param minseq numeric value > 0; the minimum number of uniquely associated
+#' peptides required to obtain results for a protein.
+#' @param mMax numeric value >= 1; maximum number of isotopic peaks to include
+#' in calculations.  Non-integer values will be rounded up.
+#' @param model character value identifying the FSR model to be used for 
+#' rate estimation.
+#' @param contrasts a list specifying contrasts to be obtained (see 
+#' \code{\link{.buildContrastMatrix}} for specification details)
+#' @param plot logical; should a plot be obtained for the result?
+#' @param unique_only logical; if `TRUE`, rates will only be estimated for 
+#' proteins that are uniquely associated with at least `minseq` peptide 
+#' sequences. If `FALSE`, rates will also be estimated for protein groups.
+#' THE RELIABILITY AND INTERPRETATION OF GROUP-LEVEL ESTIMATES ARE NOT CLEAR. 
+#' EXERCISE CAUTION!
+#' @param ... additional arguments passed to other functions
+#' 
+#' @result
+#' A list with either 2 or three components:
+#' \describe{
+#'   \item{$rates} a data frame providing the estimated fractional synthesis 
+#'   rates for each protein (group) in each treatment condition
+#'   \item{$contrasts} a data frame providing the estimated contrasts between
+#'   specified treatment conditions for each protein (group). This component 
+#'   is missing if no contrasts were requested.
+#'   \item{$metadata} a data frame containing metadata about each protein 
+#'   (group), including its name, the algorithm used to fit the model, whether
+#'   it converged, and the number of time points, specimens, peptide sequences, 
+#'   isotope peaks, and OIDs used to fit the model. 
+#' }
+#' 
+#' @seealso
+#' \code{\link{peptidesToIsotopePatterns}} for calculation of theoretical 
+#' isotope patterns, \code{\link{.checkFsrInputs}} for data requirements,
+#' \code{\link{.packageFsrData}}, \code{\link{.randStart}},  
+#' \code{\link{.formatFsrResult}}, and \code{\link{aggregateDataFrameList}}
+#'  for details on specific processing steps.
+#' 
+.doMaxQuantFsr <<- function(x, formula, sample_data, minseq, mMax, model, 
+                            contrasts, plot, unique_only = TRUE, unit = "hour",
+                            ...){
+  sep <- if(unique_only) ";" else(FALSE)
+  p2p <- with(
+    dplyr::summarize(dplyr::group_by(x, Sequence), Proteins = unique(Proteins)),
+    pepToProt(Sequence, Proteins, sep = sep)
+  )
+  uniqueToProtein <- rowSums(p2p) == 1
+  nUniquePep <- colSums(p2p[uniqueToProtein,, drop = FALSE])
+  if(!any(nUniquePep >= minseq)) return(NULL)
+  
+  theory <- peptidesToIsotopePatterns( 
+    unique(x$Sequence), 
+    label = list(H2 = with(sample_data, setNames(BWE, File))),   # fix indexing of BWE
+    nMax = ceiling(mMax)
+  )
+  
+  if(minseq > 0){
+    RESULT <- list()
+    p2p <- p2p[uniqueToProtein, nUniquePep >= minseq, drop = FALSE]
+    for(i in colnames(p2p)){
+      peps <- rownames(p2p)[p2p[,i] == 1]
+      
+      X <- .checkFsrInputs(
+        dplyr::left_join(
+          dplyr::filter(x, x$Sequence %in% peps), sample_data, 
+          by = c(Raw.file = "File")
+        ),
+        formula,
+        contrasts
+      )
+      
+      DATA <- .packageFsrData(
+        x = X, 
+        theory = theory[peps],
+        nMax = mMax,
+        formula = formula,
+        model = model,
+        ...
+      )
+      
+      FIT <- .randStart(data = DATA, ...)
+      
+      RESULT[[i]] <- .formatFsrResult(object = FIT, data = DATA, 
+                                      contrasts = contrasts, 
+                                      ...)
+      
+      if(plot) RESULT[[i]]$plot <- .plotFsr2(DATA)
+    } 
+    
+    RESULT <- aggregateDataFrameList(RESULT)
+    
+  } else {
+    stop("minseq must be >= 1.")
+  }
+  RESULT
+}
+
+#' Retrieve functions for a specified model
+#' 
+#' @description
+#' `.setFsrModel` returns a list containing the `objective` function and
+#' `parameter` function for a specified FSR model. 
+#' 
+#' @details
+#' This is an internal function. It is not intended to be called directly 
+#' by users.  
+#' 
+#' `setFsrModel` can accept (1) a character value, (2) function, or (3) list 
+#' as input. If a character vector is given, only the first element will be 
+#' used, with a warning. If a character value is given, `.setFsrModel` calls
+#' \code{\link{base::switch}} to run the predefined model specification 
+#' function requested. Unrecognized function names will return an error.
+#' 
+#' If a function is provided, it will be called with default arguments, and the
+#' result will be checked to ensure that it meets the basic requirements for a
+#' model specification before it is returned. A similar check is performed on
+#' user-supplied list objects. See \code{\link{buildFsrModel}} for information
+#' on custom model specification.
+#' 
+#' @param model one of: a character value specifying a predefined model for 
+#' FSR estimation, a function that generates such a model, or a list object
+#' that provides functions to define the model.
+#' 
+#' @return
+#' A S3 list with two named elements, `objective` and `parameter`, each 
+#' containing a function. See \code{\link{buildFsrModel}} for details on the 
+#' contents and structure of these functions.
+#' 
+#' @seealso
+#' \code\link{.domaxQuantFsr}}, which calls this function and 
+#' \code{\link{.checkFsrModel}}, which enforces model structure.  In addition,
+#' \code{\link{buildFsrModel}} can be used to specify a custom model. 
+#' 
+.setFsrModel <<- function(model){
+  if(is.character(model)){
+    if(length(model) > 1){
+      warning("length(model) is > 1: only the first value will be used.")
+      model <- model[1]
+    }
+    .func <- switch(
+      model,
+      e1c = .ue1cGivenp(),
+      e1cp = {
+        stop("Sorry. Model e1cp has not been written yet.")
+      },
+      stop(paste0(
+        "model should be one of:\n",
+        "  \"e1c\" for one-compartment exponential with given enrichment\n",
+        "  \"e1cp\" for one-compartment exponential with estimated enrichment"
+      ))
+    )
+  }
+  if(is.function(model)) model <- model()
+  if(is.list(model)){
+    .func <- .checkFsrModel(model)
+  }  
+  return(.func)
+}
+
+#' Check the integrity of an FSR model list
+#' 
+#' @description
+#' `.checkFsrModel` performs basic sanity checks on the structure, arguments,
+#' and results generated by an FSR model list.
+#' 
+#' @details
+#' This is an internal function. It is not intended to be called directly 
+#' by users.  
+#' 
+#' `.checkFsrModel` checks its input to confirm that it is a list with (at
+#' least) four elements, named `description`, `objective`, `rate`, and 
+#' `difference`, that all elements have the correct form (a 1-element character 
+#' for `description` and functions for the other three), and that the functions 
+#' have the correct arguments. An error is returned if any of the checks fail.  
+#' Otherwise, the input is returned unchanged.
+#' 
+#' @param x an R object
+#' 
+#' @return
+#' The input model list, `x`, or an error.
+#' 
+#' @sealso
+#' \code{\link{buildFsrModel}} or model specification guidelines.
+#' 
+.checkFsrModel <- function(x){
+  OK <- TRUE
+  for(i in 1:10){
+    if(!OK){
+      stop(paste0(
+        "\nThe specified model does not meet formatting requirements.\n",
+        "Please see help(\"buildFsrModel\") for assistance."
+      ))
+    }
+    OK <- switch(
+      i,
+      is.list(x),
+      all(c("description","objective","rate","difference") %in% names(x)),
+      is.character(x$description),
+      length(x$description) == 1,
+      is.function(x$objective),
+      is.function(x$rate),
+      is.function(x$difference),
+      all(names(formals(x$objective)) == c("pars","data","estimator","...")),
+      all(names(formals(x$rate)) == c("X","pars")),
+      all(names(formals(x$difference)) == c("X","pars","contrast"))
+    )
+  }
+  return(x)
+}
+
+#' Check the integrity of inputs supplied to \code{\link{.doMaxQuantFsr}}
+#' 
+#' @description
+#' `.checkFsrInputs` runs a series of checks on the data supplied to the
+#' estimation functions and returns specific error messages if it detects
+#' any problems.
+#' 
+#' @details
+#' This is an internal function. It is not intended to be called directly 
+#' by users.  
+#'
+#' The current list of checks ensures that the inputs inherit from data.frame 
+#' and that all required variables are present and have the correct data type.
+#' Variables referenced in the model formula are included in the check (and
+#' the formula itself is checked to make sure it really is a model formula).
+#' 
+#' @param x a data frame
+#' @param formula a model formula
+#' @param contrasts a list specifying model contrasts (not currently checked)
+#' 
+#' @return
+#' The input data, `x`, or an error.
+#'  
+#' @seealso
+#' 
+.checkFsrInputs <<- function(x, formula = ~ 1, contrasts = NULL){
+  if(!inherits(x, "data.frame")) stop("x must be a data frame.")
+  if(!is(formula, "formula")) stop("formula must be a RHS model formula.")
+  vars <- all.vars(formula)
+  missingvars <- vars[!vars %in% names(x)]
+  if(length(missingvars) > 0){
+    stop(paste0(
+      "\nThe following variables are missing from the sample data:\n",
+      paste(shQuote(missingvars), collapse = ", ")
+    ))
+  }
+  if(!is.character(x[["Raw.file"]])){ 
+    stop(paste0(
+      "\nx must include a character vector named \"Raw.file\" that\n",
+      "identifies the mass spectrometry sample run (i.e., MS data file)\n)",
+      "associated with each isotope pattern."
+    ))
+  }
+  if(!is.character(x[["Isotope.pattern"]])){ 
+    stop(paste0(
+      "\nx must include a character vector named \"Isotope.pattern\"\n",
+      "that provides the observed isotope distribution with intensities\n",
+      "separated by semicolons (;)."
+    ))
+  }
+  if(!is.character(x[["Proteins"]])){
+    stop(paste0(
+      "\nx must include a character vector named \"Proteins\"\n",
+      "that provides the protein or protein group associated with\n",
+      "each isotope pattern."
+    ))
+  }
+  if(!is.character(x[["Sequence"]])){
+    stop(paste0(
+      "\nx must include a character vector named \"Sequence\"\n",
+      "that provides the peptide sequence for each isotope pattern."
+    ))
+  }
+  if(!is.numeric(x[["BWE"]])){
+    stop(paste0(
+      "\nx must include a numeric vector named \"BWE\"\n",
+      "that provides the body water enrichment for each isotope pattern."
+    ))
+  }
+  if(!is.numeric(x[["Time"]])){
+    stop(paste0(
+      "\nx must include a numeric vector named \"Time\"\n",
+      "that provides the sample collection time (post label exposure)\n",
+      "for each isotope pattern."
+    ))
+  }
+  return(x)
+}
+
+#' Package FSR inputs for analysis
+#' 
+#' @description
+#' `.packageFsrData` aggregates various inputs into a single, consistently
+#' formatted data object
+#' 
+#' @details
+#' This is an internal function. It is not intended to be called directly 
+#' by users.  
+#'
+#' `.packageFsrData` compiles sample data, observed isotope patterns, and
+#' theoretical isotope patterns into a single list object.  In the process, it
+#' parses the observed isotope patterns (passed as semi-colon separated text 
+#' values) and optionally calculates theoretical baseline and asymptotic
+#' isotope patterns. 
+#'
+#' @param x a data frame
+#' @param theory a list of theoretical isotope patterns
+#' @param nMax maximum number of isotopic peaks to consider in analysis
+#' @param formula a RHS model formula
+#' @param ... additional arguments (not currently used)
+#' 
+#' @result
+#' An S3 list with the following components:
+#' \describe{
+#'   \item{`i`}{Numeric sample (row) index. Technically, each row is a peptide
+#'   sequence within a physical sample.}
+#'   \item{`y`}{Observed isotope patterns as a numeric matrix with 
+#'   sample-peptides as rows and peaks as columns. Rows are normalized to sum
+#'   to 1.}
+#'   \item{`mask`}{A logical matrix with dimensions matching `y`, indicating 
+#'   whether each value (peak in a sample) was observed (y > 0) or not.}
+#'   `.$design %*% pars`.}
+#'   \item{`f0`}{A numeric matrix with dimensions matching `y`, giving the 
+#'   theoretical baseline isotope distribution for each sample-peptide.}
+#'   \item{`fa`}{A numeric matrix with dimensions matching `y`, giving the 
+#'   theoretical asymptotic isotope distribution for each sample-peptide.}
+#'   \item{`time`}{A numeric vector giving the time at which each sample was 
+#'   collected, relative to the beginning of label exposure.}
+#'   \item{`sample`}{A character vector identifying the physical sample for 
+#'   each observation.}
+#'   \item{`sequence`}{A character vector identifying the peptide sequence for 
+#'   each observation.}
+#'   \item{`protein`}{A character vector identifying the protein (or protein 
+#'   group) for each observation.}
+#'   \item{`model`}{A model frame returned by `model.frame(formula, data = x)`
+#'   and used to map sample-peptides to fitted parameters.
+#'   \item{`objective`}{An R function that defines the objective to be 
+#'   optimized during parameter estimation (see \code{\link{buildFsrModel}}).}
+#'   \item{`rate`}{An R function that calculates rates based on a parameter
+#'   vector and the model frame in the `model` component.} 
+#'   \item{`difference`}{An R function that calculates rate differences
+#'   using a parameter vector, the `model` component, and a contrast matrix
+#'   returned by \code{\link{.buildContrastMatrix}}.} 
+#' }
+#' 
+#' @seealso
+#' 
+.packageFsrData <<- function(x, theory = NULL, nMax = 10, formula = ~1, 
+                             model = "e1c", ...){
+  isopattern <- lapply(strsplit(x$Isotope.pattern, ";"), as.numeric)
+  y <- matrix(0, length(isopattern), nMax)
+  for(i in 1:length(isopattern)){
+    cols <- 1:min(nMax, length(isopattern[[i]]))
+    tryCatch(
+      y[i, cols] <- isopattern[[i]][cols],
+      error = function(e) browser()
+    )
+  }
+  if(is.null(theory)){
+    theory = peptidesToIsotopePatterns( 
+      x$Sequence, 
+      label = list(H2 = x$BWE),   # fix indexing of BWE
+      nMax = nMax
+    )
+  }
+  .data <- list(
+    i = 1:nrow(y),
+    y = y/rowSums(y, na.rm = TRUE),
+    mask = y > 0,
+    f0 = t(mapply(getPattern, sequence = x$Sequence, sample = x$Raw.file, #Sample, 
+                  MoreArgs = list(x=theory, pattern = "baseline"))[1:nMax,]),   # sapply(theory, "[[", "baseline"),
+    fa = t(mapply(getPattern, sequence = x$Sequence, sample = x$Raw.file, #Sample, 
+                  MoreArgs = list(x=theory, pattern = "asymptotic"))[1:nMax,]), # sapply(theory, "[[", "asymptotic"),
+    time = x$Time,
+    sample = x$Raw.file, #, #Sample
+    sequence = x$Sequence,
+    protein = x$Proteins,
+    formula = formula,
+    # unit = unit,
+    model = model.frame(formula, data = x) #design = model.matrix(object = formula, data = x),
+  )
+  .data <- append(.data, .setFsrModel(model))
+  return(.data)
+}
+
+#' Initialize parameters for optimization
+#' 
+#' @description
+#' `.initializeParameters` reads the parameters from a design matrix, `x`, and 
+#' returns a list with starting values, lower, and upper limits for 
+#' optimization.
+#' 
+#' @details
+#' This is an internal function. It is not intended to be called directly 
+#' by users.  
+#'
+#' `.initializeParameters` uses the column names from a design matrix to 
+#' construct vectors of starting values and lower and upper optimization 
+#' bounds. By default, these are set to `0`, `-Inf`, and `Inf`, respectively.
+#' However, any of these vectors can be reset to user-supplied values. 
+#'
+#' @param x a model frame (returned by \code{model.frame})
+#' @param start `NULL`, or a numeric vector of initial parameter values
+#' @param lower `NULL`, or a numeric vector of lower optimization bounds
+#' @param upper `NULL`, or a numeric vector of upper optimization bounds
+#' @param ... additional arguments (not currently used)
+#' 
+#' @result
+#' A list containing 3 elements, `start`, `lower`, and `upper`, each of which 
+#' is a numeric vector of length `ncol(x)`.
+#' 
+.initializeParameters <<- function(x, start = NULL, lower = NULL, upper = NULL, 
+                                   ...){
+  x <- #tryCatch(
+    model.matrix(formula(x), x)#,
+  #   error = function(e){
+  #     msg <- "contrasts can be applied only to factors with 2 or more levels"
+  #     if(grepl(msg, e)){
+  #      vals <- sapply(x, function(i) unique(i))
+  #      bad <- sapply(vals, length) == 1
+  #      vals <- vals[bad]
+  #      model.matrix(update(formula(x), ~ -substitute(names(vals))), x)
+  #     }
+  #   }
+  # )
+  pars <- colnames(x) 
+  if(is.null(lower)) lower <- setNames(rep(-Inf, ncol(x)), pars)
+  if(is.null(upper)) upper <- setNames(rep(Inf, ncol(x)), pars)
+  if(is.null(start)) start <- setNames(rep(0, length(pars)), pars)
+  if(any(
+    names(lower) != pars, length(lower) != length(pars), !is.numeric(lower),
+    names(upper) != pars, length(upper) != length(pars), !is.numeric(upper),
+    names(start) != pars, length(start) != length(pars), !is.numeric(start) 
+  )){
+    stop(paste0(
+      "\n\"upper\", \"lower\", and \"start\" must be numeric vectors with\n",
+      "names and lengths that match the parameter vector\n", 
+      "generated by the model formula.\n\n",
+      "Parameter names are:\n", paste(shQuote(pars), sep = ", ")
+    ))
+  }
+  .pars <- list(start = start, lower = lower, upper = upper)
+  return(.pars)
+}
+
+#' Format and output the results from fitting an FSR model
+#' 
+#' @description
+#' `.formatFsrResult` obtains, formats, and outputs statistics and metadata
+#' for a fitted model
+#' 
+#' @details
+#' This is an internal function. It is not intended to be called directly 
+#' by users.  
+#' 
+#' The fitted model object is used to obtain estimates and confidence intervals
+#' for rates and optionally for contrasts. If requested, contrasts also include
+#' raw (i.e., not corrected for multiple comparisons) t-test results for the
+#' null hypothesis that rates are equal. The results are organized into a list,
+#' along with a data frame providing metadata about the analysis.
+#' 
+#' @param object a fitted model object returned by \code{\link{.randStart}}
+#' @param data a data object returned by \code{\link{.packageFsrData}}
+#' @param contrasts a list specifying contrasts
+#' @param ... additional arguments passed to \code{\link{.doInference}}
+#' 
+#' @seealso
+#' \code{\link{.buildContrastMatrix}} for implementation of contrasts, 
+#' \code{\link{.doInference}} for inferential statistics 
+#' 
+.formatFsrResult <<- function(object, data, contrasts, ...){
+  npeaks <- rowSums(data$mask)
+  mats <- .buildContrastMatrix(data, contrasts, ...) # this is modified
+  ci <- .doInference(object, data, mats, type = "estimate", ...) # this is modified
+  contrasts <- .doInference(object, data, mats, type = "contrast", ...)  # this is modified
+  
+  .res <- list(
+    rates = cbind(Protein = unique(data$protein), ci),
+    metadata = data.frame(
+      Protein = unique(data$protein),
+      Model = data$description,
+      Class = class(object),
+      Convergence = .checkConvergence(object),
+      `Time points` = length(unique(data$time)),
+      Specimens = length(unique(data$sample)), 
+      Sequences = length(unique(data$sequence)),
+      `Max peaks` = max(npeaks),
+      `Average peaks` = mean(npeaks),
+      Measurements = length(data$time),
+      check.names = FALSE
+    )
+  )
+  if(!is.null(contrasts)){
+    .res <- append(
+      .res,  
+      list(tests = cbind(Protein = unique(data$protein), contrasts)), 
+      after = 1
+    )
+  }
+  return(.res)
+}
+
+#' Sort results into uniquely identified proteins versus protein groups
+#' 
+.segregateProteinGroups <<- function(x, sep = ";"){
+  for(i in c("rates","metadata","tests")){
+    if(is.null(x[[i]])) next
+    is_group <- grepl(sep, x[[i]]$Protein, fixed = TRUE)
+    ord <- 1:nrow(x[[i]])
+    x[[i]] <- list2DF(append(x[[i]], list(Group = is_group), after = 1))
+    x[[i]] <- x[[i]][order(is_group, ord), , drop = FALSE]
+  }
+  return(x)
+}
+
+#' Write a README file to annotate a collection of FSR results
+#' 
+.writeFsrReadme <<- function(unit, unique_only, p_adjustment, ...){
+  inferargs <- sapply(
+    .getArgs(.doInference, ..., 
+             args = c("imethod","estimator","nsim")),
+    function(i) eval(i)[1]
+  )
+  list(
+    readme = c(
+      "PROTEOME-WIDE ESTIMATION OF FRACTIONAL SYNTHESIS RATES", 
+      paste0("Analysis run on ", Sys.Date()), 
+      paste0("Units: percent turnover / ", unit),
+      paste0("Interval estimation methodology: ", inferargs["imethod"]),
+      if(inferargs["imethod"] == "simulation"){
+        paste0("Number of parametric bootstrap simulations:", inferargs["nsim"])
+      },
+      paste0("FSR estimation methodology: ", inferargs["estimator"]), 
+      paste0("P-value adjustment: ", p_adjustment), "",
+      
+      if(!unique_only){
+        c(
+          paste0("WARNING: Rate estimates for protein groups may represent ",
+                 "averaged results over mixtures "),
+          "of proteins with unknown propotions.",
+          paste0("They are likely to be inaccurate. Use caution when ", 
+                 "interpreting them, especially if "),
+          "a group includes proteins that are not closely related.", 
+          "",
+          paste0("Groups are identified by a \"TRUE\" value for \"Group\" ",
+                 "in the rates, tests, and metadata tables."),
+          ""
+        ) 
+      } else {NULL},
+      
+      "Table of contents:",
+      paste0("  - rates: point estimates, standard errors, and confidence ",
+             "intervals for each protein at"), 
+      paste0("     each experimental condition. If a condition is missing ",
+             "for a particular  protein, no data"),
+      "     were available for the protein under the missing condition.",
+      "     or protein group, by experimental condition if appropriate.", 
+      paste0("  - tests: point estimates, standard errors, confidence ",
+             "intervals, and "),
+      paste0("     p-values for requested contrasts. Missing contrasts could ",
+             "not be estimated due to missing data."), 
+      paste0("  - metadata: supplementary information about each protein ",
+             "(or protein group)"),
+      "     and the model fit, including:",
+      "       > A verbal description of the model that was used to estimate FSR",
+      "       > The class of the optimization algorithm used to fit the model",
+      "       > Algorithm convergence status (see below)",
+      "       > The number of unique time points for which data were available",
+      "       > The number of unique specimens for which data were available",
+      "       > The number of peptide sequences used for estimation",
+      "       > The average number of isotopic peaks per sequence",
+      "       > The maximum number of isotopic peaks per sequence",
+      "       > The total number of observations used for estimation ",
+      "           (max 1 observation per time point per specimen per sequence)",
+      paste0("  - quality control: information about the results of quality ",
+             "control filters"),
+      "     imposed on the raw data:",
+      "       > Filter settings",
+      paste0("       > Counts of observations, peptides, proteins, and ",
+             "protein groups that remained"),
+      "          in the analysis after filters were imposed",
+      paste0("       > Counts of proteins flagged as contaminants or bad ",
+             "reads by MaxQuant"),
+      paste0("       > Information on isotope pattern peak counts and ",
+             "inter-scan correlations, by raw file"),
+      paste0("       > A frequency distribution for the proportion of ",
+             "peptides that appeared in"),
+      "          only 1 sample, in 2 samples, etc.",
+      paste0("       > An alphabetized list of all proteins that are ",
+             "mentioned in the input files"),
+      "          for which rates were not estimated",
+      paste0("  - not run: a list of proteins and protein groups that were ",
+             "present in"),
+      "     the input files but did not meet QC standards",
+      paste0("  - session: technical information about the computing ",
+             "environment,"),
+      "     needed to replicate the results",
+      "",
+      "Interpretation notes:","",
+      "Confidence intervals and p-values are not corrected for multiple testing.",
+      "Corrections are coming soon....", "",
+      "If Convergence is FALSE, the estimation algorithm failed to reach a solution ",
+      "and no confidence intervals or p-values will be provided. Nonconvergence ",
+      "can occur because (a) one or more peptide sequences was misidentified, ",
+      "(b) because the time interval used for data collection was not appropriate ",
+      "for the true FSR of the protein (i.e., tunover is very near 0% or 100%), or ",
+      "(c) because the estimation algorithm was started at bad initial values. By ",
+      "default, 100 random restarts are used to minimize occurrence of case (c). ",
+      "To change this, use max_restarts = <a number>."
+    )
+  )
+}
+
+#' Construct a contrast matrix
+#' 
+#' @description
+#' `.buildContrastMatrix` constructs matrices for the evaluation of contrasts
+#' or trends. It also outputs a matrix for case-wise point estimates.
+#' 
+#' @details
+#' This is an internal function. It is not intended to be called directly 
+#' by users. However, user-level functions do pass contrast specifications
+#' to this function.  
+#' 
+#' Contrasts are specified by the `contrasts` argument, which accepts a named
+#' list with up to two components, `$groups` and `$at`. These specify the factor 
+#' levels and covariate values for cases of interest, respectively. Both 
+#' components can be specified as a character vector or a list, and the 
+#' function's behavior changes depending on these specifications:
+#' 
+#' \describe{
+#'   \item{If `$groups` is a character vector}{Values should be the names of  
+#'   one or more categorical variables in the experimental design. Cases will 
+#'   be defined using all possible combinations of unique values from these 
+#'   variables, and all pairwise combinations of cases will be constructed.}
+#'   \item{If `$groups` is a list}{The list must be named, with list names 
+#'   corresponding to variable names, and each component of the list should 
+#'   be a character vector giving the factor levels to be included in the 
+#'   contrast matrix.}
+#'   \item{If `$at` is a charcater vector and `$groups` is `NULL`}{`$at`
+#'   should specify the names of one or more numeric design variables.  Cases
+#'   will be defined based on the quantiles specified in the `probs` 
+#'   argument.}
+#'   \item{If `$at` is a charcater vector and `$groups` is not `NULL`}{`$at`
+#'   should specify the names of one or more numeric design variables.  Cases
+#'   will be evaluated at the mean value for each `$at` variable.}
+#'   \item{If `$at` is a list}{The list must be named, with list names 
+#'   corresponding to numeric variable names. Each component of the list should 
+#'   be a numeric vector giving the values at which cases should be evaluated.}
+#' }
+#' 
+#' Currently, contrasts are defined over all pairwise comparisons of cases. 
+#' 
+#' NOTE: this function has had only limited testing. Unexpected behaviors are
+#' possible.  It may be replaced in the future by a function utilizing the 
+#' \code{\link{emmeans}} package.  
+#' 
+#' @param data a data object returned by \code{\link{.packageFsrData}}
+#' @param contrasts a list specifying contrasts
+#' @param probs quantile probabilities at which to evaluate numeric covariates 
+#' @param ... additional arguments (not currently used)
+#' 
+#' @result
+#' A list with two entries:
+#' \describe{
+#'   \item{`$pmat`}{a matrix with one row for each unique case class (i.e., 
+#'   design matrix cell), used to generate estimates for rates in different 
+#'   groups}
+#'   \item{`$cmat`}{a matrix with one row for each requested contrast}
+#' }
+#' 
+#' @seealso
+#' \code{\link{.formatFsrResult}} and \code{\link{.doInference}} for usage
+#' context. The \code{\link{emmeans}} package for a similar (but much more
+#' mature) implementation. 
+#' 
+.buildContrastMatrix <<- function(data, contrasts, probs = c(0,0.25,0.5,0.75,1), 
+                                  ...){
+  formula <- formula(data$model)
+  vars <- all.vars(formula)
+  if(is.character(contrasts$groups)){
+    names(contrasts$groups) <- contrasts$groups
+    if(any(!names(contrasts$groups) %in% vars)) 
+      stop("\"groups\" must be variables in \"formula\"")
+    if(any(!names(contrasts$at) %in% vars)) 
+      stop("\"at\" must be variables in \"formula\"")
+  }
+  vals <- lapply(
+    setNames(vars, vars), 
+    function(i){
+      v <- data$model[[i]]
+      if(is.numeric(v)){
+        if(i %in% names(contrasts$groups))
+          stop("use \"at = ...\" to specify numeric variables in \"contrasts\"")
+        if(i %in% names(contrasts$at)){
+          contrasts$at[[i]]
+        } else {
+          if(is.null(contrasts$groups)){
+            quantile(v, probs)
+          } else {
+            mean(v)
+          }
+        }
+      } else{
+        if(is.character(contrasts$groups) | !i %in% names(contrasts$groups)){
+          unique(v)
+        } else {
+          v[v %in% contrasts$groups[[i]]]
+        }
+      }  
+    }
+  )
+  
+  vals <- lapply(vals, sort)
+  
+  cases <- do.call(expand.grid, vals)
+  if(length(vars) > 0){
+    X <- model.matrix(formula, cases)
+  } else {
+    X <- model.matrix(formula, data$model[1,])
+  }
+  labels <- lapply(names(cases), function(i) paste0(i, ".", cases[[i]]))
+  if(length(labels) == 0) labels <- list("rate")
+  rownames(X) <- apply(as.data.frame(labels), 1, paste, collapse = ":")
+  if(all(length(vars) > 0, !isFALSE(contrasts), nrow(X) >= ncol(X))){
+    pairs <- gtools::combinations(nrow(X), 2)
+    rownames(pairs) <- paste(rownames(X)[pairs[,2]],"-",rownames(X)[pairs[,1]])
+  } else {
+    pairs <- NULL
+  }
+  list(
+    pmat = X,
+    cmat = pairs
+  )
+}
+
+#' Obtain inferential statistics for an FSR model
+#' 
+#' @description
+#' `.doInference` obtains confidence intervals and hypothesis testing results
+#' for an FSR model.
+#' 
+#' @details
+#' This is an internal function. It is not intended to be called directly 
+#' by users.  
+#' 
+#' The fitted model object is used to obtain estimates and confidence intervals
+#' for rates if `type = "estimate"`, or estimates, intervals, and t-test 
+#' results if `type = "contrast"`.  Confidence intervals and p-values may be 
+#' calculated using the delta method (`imethod = "delta"`, the default) or by
+#' parametric bootstrap `"simulation"` using `nsim` replicates.  Estimation can 
+#' be performed using either a version of the usual MIDA estimator based on the 
+#' monoisotopic peak (`estimator = "mida"`), or a full mixture model 
+#' (`"mixture"`). Both estimators are corrected for biases that may occur due 
+#' to truncation of the observed isotope distribution (OID). They will give 
+#' identical point estimates, but may give different confidence intervals 
+#' (I NEED TO CHECK THIS).
+#' 
+#' If `estimator = "mixture"`, the covariance matrix and residual degrees of 
+#' freedom are adjusted under the assumption that each OID provides only one 
+#' bit of information (i.e., the peaks within an OID are \strong{NOT} mutually 
+#' independent).  This assumption may be conservative in some cases, but 
+#' simulations suggest that it yields intervals with  empirical coverage that
+#' approximates the nominal `level`.  
+#' 
+#' @param object a fitted model object returned by \code{\link{.randStart}}
+#' @param data a data object returned by \code{\link{.packageFsrData}}
+#' @param matrices a list returned by \code{\link{.buildContrastMatix}} 
+#' containing a case matrix (`$pmat`) and a contrast matrix (`$cmat`)
+#' @param type the type of result desired (see Details)
+#' @param estimator the estimator used in the optimization (see Details)
+#' @param imethod methodology used to calculate confidence intervals and 
+#' p-values (see Details)
+#' @param nsim number of simulations if `imethod = "simulation"` (ignored 
+#' otherwise)
+#' @param level confidence level for interval calculations
+#' @param ... additional arguments (currently not used)
+#' 
+#' @result
+#' A data frame with the following entries:
+#' \describe{
+#'   \item{`parameter`}{the name of the estimated quantity (e.g. a treatment or 
+#'   contrast)}
+#'   \item{`estimate`}{point estimate}
+#'   \item{`se`}{standard error of the estimate}
+#'   \item{`lo%`}{lower confidence bound, corresponding to the labeled quantile
+#'    (e.g., `2.5%`)}
+#'   \item{`hi%`}{upper confidence bound, corresponding to the labeled quantile 
+#'   (e.g. `97.5%`)}
+#'   \item{`df`}{residual degrees of freedom used or the t-test (contrasts 
+#'   only)}
+#'   \item{`t.value`}{t-statistic (contrasts only)}
+#'   \item{`p.value`}{p-value for the 2-sided test of the null hypothesis that 
+#'   t = 0 (contrasts only)}
+#' }
+#' If the fitted model `object` has not converged, all entries except the 
+#' `parameter` name and point `estimate` will be `NA`.
+#' 
+#' @seealso
+#' \code{\link{.buildContrastMatrix}} for implementation of contrasts, 
+#' \code{\link{.formatFsrResult}} for usage context, and 
+#' \code{\link{.checkConvergence}} for convergence checks. 
+#' 
+.doInference <<- function(object, data, matrices, 
+                          type = c("estimate","contrast"), 
+                          estimator = c("mixture","mida"), 
+                          imethod = c("delta","simulation"), 
+                          nsim = 10000, level = 0.95, ...){
+  a <- c((1 - level)/2, (1 + level)/2)
+  cinames <- paste0(format(a*100, justify = "none", trim = TRUE), "%")
+  outnames <- c("parameter","estimate","se", cinames, "df","t.value","p.value")
+  p <- coef(object)
+  np <- length(p)
+  pmat <- matrices$pmat
+  
+  switch(
+    match.arg(type), 
+    estimate = {
+      cmat <- NULL
+      pnames <- rownames(pmat)
+      outnames <- outnames[1:5]
+      ifun <- data$rate
+    },
+    contrast = {
+      cmat <- matrices$cmat
+      if(is.null(cmat)) return(NULL)
+      pnames <- rownames(cmat)
+      ifun <- data$difference
+    }
+  )
+  fit <- as.vector(ifun(pmat, p, cmat))
+  
+  if(.checkConvergence(object)){
+    
+    switch(
+      match.arg(estimator),
+      mixture = {
+        rdf <- df.residual(object) - nrow(data$y)
+        cvmat <- vcov(object) * df.residual(object)/rdf
+      },
+      mida = , mida_dr = {
+        rdf <- df.residual(object)
+        cvmat <- vcov(object)
+      }
+    )
+    
+    switch(
+      match.arg(imethod),
+      delta = {
+        dfit <- numericDeriv(
+          quote(ifun(X, pars, contr)), 
+          theta=c("pars"), 
+          rho = list2env(list(X = pmat, pars = p, contr = cmat))
+        )
+        grd <- attr(dfit, "gradient")
+        se <- sqrt(diag(grd %*% cvmat %*% t(grd)))
+        ci <- fit + se %o% qt(a, rdf)
+        tv <- fit/se
+        pv <- 2 * (1 - pt(abs(tv), rdf))
+      },
+      simulation = {
+        b <- mvtnorm::rmvt(nsim, cvmat, delta = p, df = rdf)
+        theta <- ifun(X = pmat, pars = t(b), contr = cmat)
+        se <- apply(theta, 1, sd)
+        ci <- t(apply(theta, 1, quantile, a))
+        tv <- fit/se
+        pv <- 2 * min((pv <- sum(y >= 0)/nsim), 1 - pv)
+      }
+    )
+    
+    ci <- setNames(as.data.frame(ci), cinames) 
+    
+    data.frame(
+      parameter = pnames,
+      estimate = fit,
+      se = se,
+      ci,
+      df = rdf,
+      t.value = tv,
+      p.value = pv,
+      check.names = FALSE
+    )[,outnames]
+    
+  } else {
+    
+    ci <- setNames(as.data.frame(t(rep(NA, length(a)))), cinames)
+    
+    data.frame(
+      parameter = pnames,
+      estimate = fit,
+      se = NA,
+      ci,
+      df = NA,
+      t.value = NA,
+      p.value = NA,
+      check.names = FALSE
+    )[,outnames]
+    
+  }
+}
+
+#' Check the convergence of a fitted FSR model
+#' 
+#' @describtion
+#' Checks to make sure that the fitted model object has actually converged on
+#' a useable result. Allows for checking of different model classes in the 
+#' future.
+#' 
+#' @details
+#' This is an internal function. It is not intended to be called directly 
+#' by users.  
+#' 
+#' `.checkConvergence` is a simple wrapper that evaluates the class of its
+#' input `object` and then runs a specified check (or series of checks) on the
+#' `object` to make sure it meets minimal convergence criteria. An error is 
+#' returned if the `object` does not inherit from a recognized class.  
+#' 
+#' Currently, the only class recognized by `.checkConvergence` is `"nls.lm"`.
+#' Models of this class are required to have an exit code < 4 (see `??nls.lm`)
+#' and a non-singular, positive definite (i.e., solvable) Hessian.
+#' 
+#' @param object a fitted model object
+#' 
+#' @result
+#' A logical value indicating whether the convergence check was passed, 
+#' or an error. 
+#' 
+.checkConvergence <<- function(object){
+  switch(
+    class(object),
+    "nls.lm" = all(
+      object$info < 4,
+      !isFALSE(
+        tryCatch(
+          solve(object$hessian), 
+          error = function(e) FALSE
+        )
+      )
+    ),
+    "\"object\" is not from a recognized class."
+  )
+}
+
+#' Parameter optimization with random restarts
+#' 
+#' @describtion
+#' `.randStart` repeatedly calls \code{\link{minpack.lm::nls.lm}} with
+#' random starting values to build confidence in the stability of the 
+#' resulting solution.
+#' 
+#' @details
+#' This is an internal function. It is not intended to be called directly 
+#' by users.  
+#' 
+#' \code{\link{minpack.lm::nls.lm}} will occasionally return a local rather
+#' than global minimum.  To minimize this risk, the optimizer is called up to
+#' `max_restarts` times with different, random starting values, and the result 
+#' with the smallest objective function is returned. To reduce needless 
+#' computation, the function may quit searching after `nquit` tries fail to 
+#' improve model deviance. Optionally, it may also quit with an error if more
+#' than `nerr` sequential tries fail to produce a model with a sensible Hessian.
+#' 
+#' @param pars a numeric parameter vector (actual values are ignored)
+#' @param data a data list returned by \code{\link{.packageFsrData}}
+#' @param fn the objective function to be optimized (NOW IN DATA)
+#' @param lower a vector of lower optimization bounds with the same length
+#' and names as `par`
+#' @param upper a vector of upper optimization bounds with the same length
+#' and names as `par`
+#' @param nquit search will end after `nquit` successive tries without 
+#' improvement
+#' @param max_restarts maximum number of restarts
+#' @param tol NO LONGER USED
+#' @param nerr search will quit with an error if `nerr` successive tries
+#' fail to converge on a solution
+#' @param silent logical; messages are suppressed if `TRUE` 
+#' @param ... additional arguments passed to \code{\link{minpack.lm::nls.lm}}
+#' 
+#' @result
+#' A fitted \code{\link{nls.lm}} object, or an error.
+#' 
+#' @seealso
+#' \code{\link{minpack.lm::nls.lm}}
+#' 
+.randStart <<- function(data, nquit = 3, max_restarts = 100, tol = 1e-8, 
+                        nerr = max_restarts, silent = TRUE, ...){  
+  FN <- data$objective
+  PARS <- .initializeParameters(data$model, ...)
+  tmax <- max(data$time)
+  # X <- with(data, model.matrix(formula, model))
+  # u <- apply(X, 1, paste, collapse="")
+  # X <- X[match(unique(u), u),]
+  if("(Intercept)" %in% names(PARS$start)){
+    lwr <- c(-2.5, rep(0, length(PARS$start)-1))
+    upr <- -1 * lwr
+  } else {
+    lwr <- -2.5
+    upr <- 2.5
+  }
+  pars <- PARS$start
+  starts <- replicate(max_restarts, runif(length(pars), lwr, upr)) 
+  if(is.vector(starts)) starts <- t(starts)
+  dv <- Inf
+  errors <- 0
+  for(i in 1:max_restarts){
+    pars[] <- starts[,i]
+    mod <- try(
+      minpack.lm::nls.lm(pars, lower = PARS$lower, upper = PARS$upper, 
+                         fn = FN, data = data, ...),
+      silent = TRUE
+    )
+    check <- try(solve(mod$hessian), silent = TRUE)
+    if(inherits(check, "try-error")){ 
+      errors <- errors + 1
+      if(errors > nerr & !is.finite(dv)){
+        stop(paste0(nerr, " errors without progress:\n", mod))
+      }
+    } else {
+      errors <- 0
+    }
+    
+    if(is.na(dv > deviance(mod))) browser()
+    
+    if(dv > deviance(mod)){
+      dv <- deviance(mod)
+      winner <- mod
+      nbest <- 0
+    } else {
+      nbest <- nbest + 1
+      if(nbest == nquit) break
+    }
+  }
+  if(!silent) message(paste0(
+    "Winner after ", i, " restarts (", nbest, " wih no improvement)."
+  ))
+  return(winner)
+} 
+
+#' Normalization of observed and theoretical isotope distributions
+#' 
+#' @describtion
+#' Provides several normalization methods, including baseline normalization,
+#' internal proportional normalization, and internal maximal normalization.
+#' WARNING: internal proportional methods (i.e., to self) are not valid for 
+#' comparisons of distributions truncated at different quantiles.
+#' 
+#' @details
+#' This is an internal function. It is not intended to be called directly 
+#' by users.  
+#' 
+#' Given a data object with observed (OID) and theoretical distributions, this
+#' function will normalize the OID for each sample (`data$y`) and the 
+#' theoretical asymptotic distribution (`data$fa`) to the theoretical baseline 
+#' distribution (`data$f0`). The default `baseline` method corrects for the 
+#' potential truncation of the OIDs and allows MIDA to yield unbiased estimates
+#' for FSR.
+#' 
+#' Alternative methods are provided for reference only and \strong{will} 
+#' produce biased results if OIDs are significantly truncated. The `proportion`
+#' method normalizes each sample OID, `$fa`, and `$f0` so that the peaks 
+#' within the pattern sum to 1. The `maximum` method normalizes each pattern 
+#' relative to its largest peak, so that the maximum is equal to 1.      
+#' 
+#' @param data a data list returned by \code{\link{.packageFsrData}}
+#' @param prop_new a numeric vector giving the proportion of newly synthesized
+#' peptide in the sample(s) (i.e., the mixing fraction for `fa` and `f0`)
+#' @param normalization the normalization method to be used.
+#' @param ... additional arguments (currently unused) 
+#' 
+#' @result
+#' A new copy of `data` in which `$y`, `$f0`, and `$fa` are normalized .
+#' 
+#' @seealso
+#' \code{\link{.ueicGivenp}} for an example of use context
+#' 
+.normalizeIsotopePatterns <- function(data, prop_new = NULL, 
+                                      normalization = "baseline", 
+                                      ...){
+  switch(
+    normalization,
+    baseline = {
+      stopifnot(all(prop_new >= 0, prop_new <= 1))
+      ftmsk <- with(data, ((1 - prop_new) * f0 + prop_new * fa) * mask)
+      denom <- with(data, rowSums(f0 * mask))
+      data$f0 <- with(data, (f0 * mask)/denom)
+      data$fa <- with(data, (fa * mask)/denom)
+      data$y <- with(data, (y * mask)/rowSums(y * mask) * rowSums(ftmsk)/denom)
+    },
+    proportion = {
+      data$f0 <- with(data, (f0 * mask)/rowSums(f0 * mask))
+      data$fa <- with(data, (fa * mask)/rowSums(fa * mask))
+      data$y <- with(data, (y * mask)/rowSums(y * mask))
+    },
+    maximum = {
+      rowMax <- function(x, na.rm = FALSE) apply(x, 1, max, na.rm = na.rm)
+      data$f0 <- with(data, (f0 * mask)/rowMax(f0 * mask))
+      data$fa <- with(data, (fa * mask)/rowMax(fa * mask))
+      data$y <- with(data, (y * mask)/rowMax(y * mask))
+    },
+    stop(paste0(
+      shQuote(normalization), " is not a recognized normalization.\n",
+      "   Please use \"baseline\", \"proportion\", or \"maximum\"."
+    ))  
+  )
+  data
+}
+
+#' One-compartment exponential FSR model with given enrichment
+#' 
+#' @describtion
+#' Generates objective, rate, and difference functions for fitting and 
+#' interpreting the one-compartment exponential model.
+#' 
+#' @details
+#' This is an internal function. It is not intended to be called directly 
+#' by users.  
+#' 
+#' DESCRIBE THE MODEL HERE.      
+#' 
+#' @result
+#' A named list with components `$objective`, `$rate`, and `$difference`, 
+#' each containing an R function, and `model_name`, containing a single 
+#' string-value description. `$objective` provides for calculation of the 
+#' residuals for a given dataset and hyperparameter vector. `$rate` converts
+#' hyperparameters into parameters (i.e., rates) for cases based on a given 
+#' design matrix. `$difference` performs the same conversion, but returns
+#' the difference (i.e., contrast) between two specified cases.
+#' 
+#' @seealso
+#' \code{\link{buildFsrModel}} or model specification guidelines.
+#' 
+.ue1cGivenp <<- function(){
+  list(
+    description = "exponential, 1 compartment, enrichment given",
+    objective = { # objective function to be optimized
+      function(pars, data, estimator = c("mixture","mida"), ...){ 
+        estimator <- match.arg(estimator)
+        mmat <- with(data, model.matrix(formula, model))
+        khat <- with(data, as.vector(rate(mmat, pars)))   # linpred k here.
+        frac_new <- with(data, 1 - exp(-khat * time))     # note the complement! so w is P(new) instead of P(old)
+        data <- .normalizeIsotopePatterns(data, frac_new, ...)
+        resid <- with(data, {            
+          switch(
+            estimator,
+            mixture = {
+              yhat <- ((1 - frac_new) * f0 + frac_new * fa) * mask 
+              ((y - yhat)/sqrt(yhat))[mask]   # Pearson residual with n = 1
+            },
+            mida = {
+              yhat <- fa[,1] + (f0[,1] - fa[,1]) * exp(-khat * time)
+              (y[,1] - yhat)
+            },
+            mida_dr = {
+              i <- apply(abs(fa - f0), 1, which.max)
+              frac_new <- sapply(seq_along(i), function(ii) 
+                (-log(1-(y[,ii] - f0[,ii])/(fa[,ii] - f0[,ii]))) 
+              )
+              k <- frac_new/time
+              (k - khat)
+            }
+          )
+        })
+        return(resid)
+      }
+    },
+    rate = { # function to obtain target parameters from hyperparameters
+      function(X, pars, contr = NULL)
+        exp(X %*% pars) # rates
+    }, 
+    difference = { # function to obtain contrasts from hyperparameters
+      function(X, pars, contr = NULL)
+        exp(X[contr[,2],] %*% pars) - exp(X[contr[,1],] %*% pars) # contrasts
+    }
+  )
+}
+
+#' Use a residual function to build an objective function
+#' 
+#' @description
+#' Given an R function that returns a vector of residuals for a model, 
+#' return a new function with the same formal arguments that returns a
+#' scalar objective value.
+#' 
+#' @details
+#' `.makeObjectiveFunction` provides a utility to simplify switching between
+#' optimizers that expect a residual vector (e.g., 
+#' \code{\link{minpack.lm::nls.lm}}) and optimizers that expect a scalar
+#' objective function (e.g., \code{\link{optim}}).  Currently, it only
+#' returns the sum-of-squared residuals, but other objective functions
+#' can easily be added using a \code{\link{switch}} with an argument. 
+#' 
+#' @param resid_fun an R function. Technically, any function will work, but  
+#' only functions that return numeric vectors will produce usable output.
+#' 
+#' @eamples
+#' .makeObjectiveFunction(rnorm)
+#' 
+.makeObjectiveFunction <- function(resid_fun){
+  args <- formals(resid_fun)
+  argstring <- paste(
+    paste0(names(args), "=", names(args)),
+    collapse = ", "
+  )
+  resid_fun <- deparse(substitute(resid_fun))
+  cmd <- paste0(
+    "function(){ 
+       err <- ", resid_fun, "(", argstring, ")
+       sum(err^2)
+    }"
+  )
+  func <- eval(parse(text = cmd))
+  formals(func) <- args
+  return(func)
+}
+
+#' Plot the normalized isotope patterns for a protein
+#' 
+#' @description
+#' Uses \code{\link{ggplot2}} to generate plots showing the normalized 
+#' observed isotope distributions overlaid against the envelope created by
+#' the baseline and asymptotic distributions.  The plot is faceted by
+#' peptide sequence. Line colors indicate different experimental conditions.
+#'  
+#' @details
+#' This is an internal function. It is not intended to be called directly 
+#' by users.  
+#' 
+#' The envelope between the baseline and asymptotic functions is shown in
+#' grey, and the boundaries are labeled f_0 for the baseline and f_a for
+#' the asymptotic distribution.  Experimental results show one line for
+#' each sample, color-coded by experimental condition.  Experimental data
+#' should fall within the envelope.  If they do not, one or more of the 
+#' following problems may be responsible:
+#' \enumerate{
+#'   \item{The peptide sequence may have been misidentified.}
+#'   \item{The molar fractions used to calculate the baseline isotope 
+#'   distribution may not be representative for the problem sample(s).}
+#'   \item{The body water enrichment used to calculate the asymptotic
+#'   distribution may not be accurate.}
+#' } 
+#' Note that issue 1 should occur idiosyncratically, while issues 2 and 3 
+#' will affect all of the data for one or more samples.  
+#' 
+#' Aesthetic note: Although isotope distributions are discrete, `.plotFsr2` 
+#' represents them with continuous lines. This is a deliberate choice intended
+#' to make it easier to distinguish the overall patterns in the data. 
+#' 
+#' @param data an input object generated by \code{\link{.packageFsrData}}
+#'
+#' @result
+#' A list containing the gg-object with instructions to draw the plot.
+#' 
+.plotFsr2 <<- function(data){
+  data$y[data$y == 0] <- NA
+  df <- data.frame(
+    Sample = data$sample,
+    Sequence = data$sequence,
+    Condition = if(ncol(data$model) > 0){
+      interaction(data$model)
+    } else {
+      data$sample
+    },
+    f0 = c(data$f0),
+    fa = c(data$fa),
+    y = c(data$y),
+    m = c(col(data$y)) - 1
+  )
+  lbls <- dplyr::reframe(
+    dplyr::group_by(df, Sequence),
+    x = -0.3,
+    y = c(max(f0), max(fa)),
+    label = c("italic(f)[0]", "italic(f)[a]"),
+    col = c("navyblue", "goldenrod")
+  )
+  plt <- ggplot2::ggplot(df, ggplot2::aes(x=m)) + 
+    ggplot2::geom_ribbon(ggplot2::aes(ymin = f0, ymax = fa), alpha = 0.1) + 
+    ggplot2::geom_line(ggplot2::aes(y=f0), color = "navyblue", alpha=0.4) +
+    ggplot2::geom_line(ggplot2::aes(y=fa), color = "goldenrod", alpha=0.4) +
+    ggplot2::geom_line(ggplot2::aes(y=y, color = Condition, group = Sample)) +
+    ggplot2::labs(x = "Peak index", y = "Normalized intensity") +
+    ggplot2::scale_x_continuous(breaks = unique(df$m)) +
+    ggplot2::facet_wrap(.~Sequence) +
+    ggplot2::geom_text(ggplot2::aes(x=x,y=y,label=label), data=lbls, 
+                       parse = TRUE, size = 6, family = "serif",
+                       color = lbls$col) +
+    ggplot2::theme_bw(base_size = 14)
+  list(plt)
+}
+.getFsrPlots <<- function(x){
+  pp <- lapply(lapply(x, "[[", "plot"), "[", 1, 1)
+  nms <- sapply(pp, names)
+  has_plot <- !sapply(nms, is.null)
+  setNames(
+    sapply(pp, function(i) i[[1]])[has_plot],
+    nms[has_plot]
+  )
+}
+
+
+#' Perform quality control checks and report results 
+#' 
+#' @description
+#' Performs quality control checks to make sure that data meet 
+#' specified standards.
+#' 
+#' @details
+#' This is an internal function. It is not intended to be called directly 
+#' by users.  
+#' 
+#' `.checkMsmsDataQuality` checks for (and removes) peptides flagged by
+#' MaxQuant as reverse reads or common contaminants.  Next, it counts the 
+#' number of isotope patterns reported for each peptide, and removes peptides 
+#' with no patterns or peptides whose patterns are insufficiently consistent
+#' within a sample (if > 1 scan-level pattern is available).  The required 
+#' level of consistency is set by `mincorr` and evaluated by a correlation 
+#' `method`. At the sequence level, peptides are required to have pattern data 
+#' in at least `mincount` samples. Results of the quality control checks 
+#' are output to a summary `file`.
+#'  
+#' @param pep data frame of peptide data
+#' @param method correlation method; see \code{\link{stats::cor}}
+#' @param mincorr numeric value giving the minimum required interscan 
+#' correlation
+#' @param mincount numeric value giving the required number of samples with
+#' isotope pattern data.
+#' @param N total number of samples
+#' @param file character value specifying a file path for the quality control 
+#' summary
+#' 
+#' @result
+#' A new version of the `pep` data frame that only contains peptides that 
+#' passed the quality control checks.  As a side effect, a summary of the 
+#' quality control checks is saved to disk.
+#' 
+.checkMsmsDataQuality <<- function(pep, method, mincorr, use_single_scans,
+                                   mincount, sample_data, formula, N){
+  message(paste0(round(Sys.time()), ": Running quality control filters"))
+  msg <- c(
+    "  - Removing MaxQuant-flagged contaminants and reverse reads",
+    paste0(
+      "  - Minimum required correlation among isotope patterns within ",
+      "a sample: ", mincorr),
+    paste0(
+      "  - Minimum required number of samples/design cell with ",
+      "isotope pattern data for peptide: ",  mincount)
+  )
+  message(paste(msg, collapse = "\n"))
+  
+  # Basic counts
+  totals <- .countSampPepProt(pep, "Raw")
+  
+  # Reverse read and contaminant removal
+  bad_qc <- with(pep, c(
+    contaminants = sum(grepl("CON__", Proteins, fixed = TRUE)),
+    reverse_reads = sum(Proteins == "")
+  ))
+  pep <- dplyr::filter(pep, !grepl("CON__", Proteins, fixed = TRUE), Proteins != "")
+  totals <- cbind(totals, .countSampPepProt(pep, "- Bad reads"))
+  
+  # Sample-sequence quality control checks 
+  # (require at least one pattern, and consistency among scans within a sample)
+  suppressMessages({
+    sampseq_qc <- pep %>% dplyr::group_by(Raw.file, Sequence) %>%
+      dplyr::summarize(
+        m = sum(!is.na(Isotope.pattern)),
+        r = .isotopePatternCorrelation(Isotope.pattern, method)
+      )
+    if(use_single_scans){
+      pep <- dplyr::full_join(pep, sampseq_qc, by = c("Raw.file","Sequence")) %>%
+        dplyr::filter(m == 1 | r >= mincorr) 
+    } else {
+      pep <- dplyr::full_join(pep, sampseq_qc, by = c("Raw.file","Sequence")) %>%
+        dplyr::filter(m >= 1, r >= mincorr) 
+    }
+    totals <- cbind(totals, .countSampPepProt(pep, " - Inconsistent scans"))
+  })
+  # Sequence-level quality control checks
+  # (require patterns for a minimum percentage of samples)
+  suppressMessages({
+    seq_qc <- pep %>% 
+      dplyr::mutate(..case = .getDesignCases(sample_data, formula, Raw.file)) %>%
+      dplyr::group_by(Sequence) %>% 
+      dplyr::summarize(n = max(table(..case[match(unique(Raw.file), Raw.file)]))) #length(unique(Raw.file)))
+    pep <- dplyr::full_join(pep, seq_qc, by = "Sequence") %>% 
+      dplyr::filter(n >= mincount)
+    totals <- cbind(totals, .countSampPepProt(pep, "- Low pattern count"))
+  })
+  # Write quality control summary and return data
+  qcrep <- .writeQcSummary(msg, totals, bad_qc, sampseq_qc, seq_qc, N)
+  attr(pep, "qc") <- qcrep
+  pep
+}
+
+#' Internal functions called by `.checkMsmsDataQuality`
+#' 
+#' @description
+#' These are internal functions that handle repetitive tasks related to
+#' quality control reporting. They are not intended to be called by users.
+#' 
+#' @details
+#' `.countSampPepProt` counts the number of samples, peptides, proteins, and 
+#' protein groups in a data frame. `.isotopePatternCorrelation` calculates
+#' the correlations between different scans for a given isotope pattern.
+#' `.writeQcSummary` formats the quality control summary and writes it to disk. 
+#'  
+#' @param x data frame of peptide data (`.countSampPepProt`) or a character 
+#' vector of delimited isotope patterns (`.isotopePatternCorrelation`) 
+#' @param name a variable name for the summary line output by 
+#' `.countSampPepProt`
+#' @param method a correlation method passed to \code{\link{stats::cor}}
+#' @param sep character separating peak intensities within an isotope pattern
+#' @param ret_min logical; if `TRUE` and >2 scans are present, the minimum 
+#' pairwise correlation is returned.  Otherwise the entire matrix is retuned
+#' @param file file path for the QC summary 
+#' @param pars a message describing the quality control parameters used
+#' @param totals data frame giving counts at each stage of the procedure 
+#' @param bad data frame giving counts of reverse reads and contaminants
+#' @param sampseq data frame with counts from the sample-sequence level checks
+#' @param seq data frame with counts from the sequence level checks
+#' @param N total number of samples
+#' @param file character value specifying a file path for the quality control 
+#' summary
+#'
+#' @seealso
+#' \code{\link{.checkMsmsDataQuality}}
+#' 
+.countSampPepProt <- function(x, name){
+  x <- with(x, data.frame(c(
+    samples = length(Raw.file),
+    peptides = length(unique(Sequence)),
+    proteins = length(unique(unlist(sapply(Proteins, strsplit, ";")))),
+    protein_groups = length(unique(Proteins))
+  )))
+  names(x) = name
+  x
+}
+.isotopePatternCorrelation <- function(x, method = "spearman", 
+                                       sep = ";", ret_min = TRUE){
+  if(length(x) == 1) return(NA)
+  y <- strsplit(na.omit(x), sep)
+  if(length(y) <= 1) return(NA)
+  L <- sapply(y, length)
+  for(i in 1:length(y)){
+    y[[i]] <- as.numeric(y[[i]])
+    if(L[i] < max(L)) y[[i]][(L[i]+1):max(L)] <- 0
+  }
+  X <- do.call(cbind, y)
+  R <- cor(X, method = method)
+  # if(any(is.na(R))) browser()
+  if(ret_min){
+    R[which.min(abs(R))]
+  } else {
+    R
+  }
+}
+.getDesignCases <<- function(samples, formula, ids){
+  getDesignGroups(samples, formula)[match(ids, samples$File)]
+}
+.getGSS <- function(ids, samples, formula){
+  n <- groupedSampleSize(dplyr::filter(samples, File %in% ids), formula)
+  if(length(n) == 1) n else(max(n[-1]))
+}
+.writeQcSummary <<- function(pars, totals, bad, sampseq, seq, N){
+  totals <- cbind(Level = rownames(totals), totals)
+  sampseq <- dplyr::summarize(
+    sampseq, 
+    `m = 0` = sum(m == 0),
+    `m = 1` = sum(m == 1),
+    `r < 0` = sum(r < 0, na.rm = TRUE),
+    `r in (0,0.6]` = sum(r >= 0 & r < 0.6, na.rm = TRUE),
+    `r in (0.6,0.8]` = sum(r >= 0.6 & r < 0.8, na.rm = TRUE),
+    `r in (0.8,0.9]` = sum(r >= 0.8 & r < 0.9, na.rm = TRUE),
+    `r in (0.9,0.99]` = sum(r >= 0.9 & r < 0.99, na.rm = TRUE),
+    `r >= 0.99` = sum(r >= 0.99, na.rm = TRUE)
+  ) %>% 
+    dplyr::bind_rows(., dplyr::bind_cols(
+      data.frame(Raw.file = c("Total (n)", "Total (%)", "Total (% | m > 1)")), 
+      dplyr::bind_rows(
+        colSums(.[,-1]), 
+        round(100 * colSums(.[,-1])/sum(.[,-1]), 2),
+        round(100 * colSums(.[,-(1:3)])/sum(.[,-(1:3)]), 2)
+      )
+    ))
+  seq <- seq %>% count(n, name = "Count") %>% 
+    dplyr::mutate(`%` = round(100 * Count/sum(Count), 2)) 
+  
+  report <- list(
+    `quality_control` =  c(
+      paste(rep("-", 80), collapse = ""), "QUALITY CONTROL SUMMARY:", 
+      paste(rep("-", 80), collapse = ""), "", pars, 
+      "","Sample, Peptide, and Protein counts after checks:",
+      paste(rep("-", 49), collapse = "")
+    ),
+    totals = totals,
+    header2 = c(
+      "","Counts of bad reads flagged by MaxQuant:", 
+      paste(rep("-", 40), collapse = "")
+    ),
+    bad = t(bad),
+    header3 = c(
+      "","Sample-sequence level (scans/sequence/sample):", 
+      "[m = number of isotope patterns, r = correlation among patterns if m > 1]",
+      paste(rep("-", 73), collapse = "")
+    ),
+    sampseq = sampseq,
+    header4 = c(
+      "", paste0("Sequence level (samples/sequence, of possible ", N, "):"), 
+      "[number of peptides that have patterns in n samples]",
+      paste(rep("-", 52), collapse = "")
+    ),
+    seq
+  )
+  return(report)
+}
+
+.writeQcAddendum <<- function(results, notrun){
+  ingroups <- with(
+    results$metadata, 
+    unique(sapply(strsplit(Protein[Group], ";"), "[[", 1))
+  )
+  c(
+    "", 
+    "Final protein counts:",
+    paste0("FSR estimation was attempted for ", 
+           sum(!results$metadata$Group), " uniquely identified proteins"),
+    paste0("and ", sum(results$metadata$Group), 
+           " protein groups that include an additional ", length(ingroups),
+           " possible proteins"),
+    paste0("(any given protein in a group may or may ",
+           "not be present in the sample)."),
+    paste0("Estimation succeeded for ", 
+           with(results$metadata, sum(Convergence & !Group)), 
+           " uniquely identified proteins and ",
+           with(results$metadata, sum(Convergence & Group)), 
+           " protein groups."),
+    paste0("Convergence failed in ", sum(!results$metadata$Convergence),
+           " attempts."), 
+    paste0("FSR estimation was not attempted for ", length(notrun[[1]]), 
+           " proteins or protein groups that appear in the MaxQuant"),
+    "results but failed to meet quality control standards.", "",
+    "A list of unestimated proteins is available under \"not_run\"."
+  )
+}
+
+#' Internal functions for FSR simulation
+#' 
+#' @description
+#' These functions are used to generate simulated datasets (`.genFsrSim`),
+#' to analyze them (`.fitFsrSim`), and to evaluate error statistics for the
+#' simulation (`.appendErrorStats`).
+#' 
+#' @details
+#' These are internal functions. They are not intended to be called directly 
+#' by users.  
+#' 
+#' `.genFsrSim` expects a data frame `X` that includes (at a minimum) columns
+#' for `Subject`, `Protein`, `Sequence`, `Rate`, `Delta`, `Time`, `Count`, 
+#' `Trap_factor`, and `Noise_threshold`, as produced internally by 
+#' \code{\link{simFsr}}.  These columns define the parameters that 
+#' \code{\link{simulateIsotopePattern}} needs to generate an observed isotope 
+#' distribution (OID) for each row in `X`.  `.genFsrSim` also expects a
+#' named parameter list, `pars`, that must include entries for `bwe_mean`, 
+#' `bwe_sd`, `maxpeaks`, `baserate` and `delta`. The last two entries should
+#' correspond to the `Rate` and `Detla` values in `X`.  
+#' 
+#' Simulation proceds as follows:
+#' \enumerate{
+#'   \item{A body water enrichment (BWE) value is drawn for each unique 
+#'   `Subject` (i.e., biological replicate) in `X`, where 
+#'   $BWE \sim N(bwe_mean, bwe_sd)$.}
+#'   \item{Theoretical baseline and asymptotic distributions are generated for 
+#'   each row in `X` based on the `Sequence` and `BWE` (to reduce computation, 
+#'   theoretical distributions are first generated for each unique combination 
+#'   `Sequence` and `BWE`, and then looked up for each row). Theoretical 
+#'   distributions are stored in `X` as a list column named `Theory`.}
+#'   \item{An OID is simulated for each row in `X` based on its `Sequence`, 
+#'   theortical distributions, and `BWE`, `Time`, true fractional synthesis rate 
+#'   (` = Rate + Delta`), and observation model parameters (`maxpeaks`, `Count`,
+#'    `Trap_factor`, and `Noise_threshold`). The simulated OID is appended to
+#'    `X` as a column named `Isotope.pattern` and stored as a character vector 
+#'    containing semi-colon (;) separated peak values for each row (this mimics
+#'    the format used by MaxQuant).}
+#'    \item{A modified version of `X` with simulated OID and theoretical 
+#'    distributions is returned.}
+#' }
+#' 
+#' `.fitFsrSim` expects the data frame generated by `.genFsrSim` and parameter 
+#' list `pars`. It determines whether any `Delta != 0` exist, and generates a
+#' model formula as appropriate (if nonzero `Delta`s are present, there must 
+#' also be a column in `X` called `Treatment` to distinguish "treatment" and
+#' "control" groups (TODO: Generate this automatically inside `.fitFsrSim`)).
+#' `X` is then segmented into peptide and sample tables, mimicking input from 
+#' MaxQuant, and then passed to \code{\link{.doMaxQuantFsr}} for model fitting.  
+#' Finally, `.appendErrorStats` is called to append the true rates (or rate 
+#' contrasts), and estimation error, along with an indicator reporting whether 
+#' the true value falls inside the estimated confidence interval.  
+#' 
+#' @param X a data frame (see Details)
+#' @param pars a named list of simulation parameters (see Details)
+#' @param ... additional arguments passed to \code{\link{.doMaxQuantFsr}}
+#' @param x a list of estimation results returned by 
+#' \code{\link{.doMaxQuantFsr}}
+#' 
+#' @result
+#' 
+#' @seealso
+#' 
+.genFsrSim <<- function(X, pars, ...){
+  `%>%` <- magrittr::`%>%`
+  enrichment <- function(id, mean, sd){
+    uid <- unique(id)
+    setNames(rnorm(length(uid), mean, sd), uid)[id]
+  }
+  X <- dplyr::mutate(X, BWE = enrichment(Subject, pars$bwe_mean, pars$bwe_sd))
+  useq <- unique(X$Sequence)
+  ubwe <- unique(X$BWE)
+  THEORY <- peptidesToIsotopePatterns(useq, label = list(H2 = ubwe), 
+                                      nMax = pars$maxpeaks)
+  getTheory <- function(sequence, bwe) THEORY[[sequence]][[bwe]]
+  X <- dplyr::rowwise(X) %>%
+    dplyr::mutate(
+      Theory = list(getTheory(Sequence, match(BWE, ubwe))),
+      Isotope.pattern = simulateIsotopePattern(
+        theory = Theory, 
+        model = pars$model,
+        rate = Rate + Delta, 
+        time = Time,
+        count = Count,
+        trap_factor = Trap_factor,
+        noise_threshold = Noise_threshold, 
+        collapse = ";"
+      )      
+    ) %>%
+    dplyr::ungroup()
+  X
+}
+.fitFsrSim <<- function(X, pars, ...){ 
+  `%>%` <- magrittr::`%>%`
+  if(any(X$Delta != 0)){
+    formula <- ~Treatment
+  } else {
+    formula <- ~1
+  }
+  pep <- dplyr::transmute(X, Raw.file = File, Proteins = Proteins, 
+                          Sequence = Sequence, Isotope.pattern = Isotope.pattern)
+  smpl <- dplyr::select(X, File, Subject, BWE, Time, Treatment) %>%
+    dplyr::slice(match(unique(File), File))
+  res <- .doMaxQuantFsr(x = pep, formula = formula, sample_data = smpl, 
+                        minseq = 1, mMax = pars$maxpeaks, model = pars$model, 
+                        contrasts = list(), level = 0.95, plot = FALSE, ...)
+  res <- .appendErrorStats(res, pars)
+  res
+}
+.appendErrorStats <<- function(x, pars){
+  tables <- c("rates","tests")
+  for(what in tables){
+    if(!what %in% names(x)) next
+    truth <- switch(
+      what,
+      rates = with(pars, baserate + delta),
+      tests = with(pars, diff(baserate + delta))
+    )
+    x[[what]]$truth <- truth
+    x[[what]]$error <- x[[what]]$estimate - truth
+    x[[what]]$in_ci <- x[[what]]$`2.5%` < truth & truth <= x[[what]]$`97.5%`
+  }
+  return(x)
+}
+
+#' Internal functions for isotope pattern calculations
+#' 
+#' @description
+#' These functions are used to generate theoretical isotope patterns from 
+#' peptide sequences and data on isotopic molar fractions.
+#' 
+#' @details
+#' These are internal functions. They are not intended to be called directly 
+#' by users.  
+#' 
+#' \describe{
+#'   \item{`.atomsToPattern`}{organizes the calculation of baseline and labeled 
+#'  mass isotope patterns for a given peptide sequence.}
+#'  \item{`.labeledEIP`}{implements the actual calculation of baseline and 
+#'  enriched elemental isotope distributions, and then combines them to return
+#'  molecular isotope distributions.}
+#'  \item{`.truncateElementalIsotopePattern`}{calculates the multinomial isotope 
+#'  distribution for a given element based on a given molar fraction and number 
+#'  of atoms, and truncates its tail. The theoretical patterns include n + 1 
+#'  peaks (n = the number of atoms), but most higher-order peaks are too small 
+#'  to be detected. Given a maximum of `nMax` peaks of interest, the function 
+#'  truncates the higher-order peaks (index > `nMax`). This greatly reduces the 
+#'  sample space dimension for the resulting joint distribution in downstream 
+#'  calculations, while preserving the relative probabilities of the remaining 
+#'  peaks. This is important because `.calcPattern` has complexity on the order
+#'  of O(n^N), where n is the average number of peaks/element and N is the 
+#'  number of elements.}
+#'  \item{`.calcPattern`}{calculates the mass isotopomer distribution for a 
+#'  compound with (possibly) labeled and unlabeled components. Given a set of 
+#'  N isotope pattern vectors for N elements in a compound, where vector i 
+#'  describes the probability for each of K nominal mass differences from the 
+#'  monoisotope due to element i, the sample space for potential total mass 
+#'  differences over the compound is described by a pair of tensors.  The first
+#'  (`.MASSPATTERN`) describes the probabilities of disjoint outcomes and 
+#'  results from a cumulative outer product of the pattern vectors. The second
+#'  (`.MASSDELTA`) describes the corresponding numbers of extra neutrons in 
+#'  the compound, and results from a cumulative outer sum of the pattern 
+#'  labels. Summation of the probabilities over each delta value yields the 
+#'  compound-level mass isotopomer pattern.}
+#'  \item{`.getElementCount`}{counts the number of atoms of each element in a
+#'  molecular formula}
+#' }
+#'  
+#' 
+#' @param x a character vector providing an amino acid sequence (one amino acid
+#' per element) (`.atomsToPattern`) or a vector of molecular formulas 
+#' (`.getElementCount`)
+#' @param m a numeric vector of peak indices starting at 0
+#' @param label a named list of isotopic labels with the form 
+#' `list(isotope = enriched_fraction_value)`
+#' @param isotopes a character vector identifying isotopes to be used in 
+#' calculations. Values must occur in `rownames(fractions)`.
+#' @param aatable data frame of amino acid properties; see 
+#' \code{\link{amino_acids}}. Row names must identify the amino acid by
+#' symbol ("A" for alanine, "R" for arginine, etc.), and variables must include
+#' a `"formula"` providing the molecular composition.
+#' @param fractions data frame of stable heavy isotopes; see 
+#' \code{\link{stable_isotopes}}. Row names must identify the isotopes (e.g., 
+#' "C13") and variables must include `element` and `molar_fraction`.  In 
+#' standard usage, the second variable is added by the calling wrapper, 
+#' `peptidesToIsotopePatterns`.
+#' @param lbl an element from `label`
+#' @param aatable a dta frame of amino acid properties
+#' @param elem a character value identifying a target element (e.g., "C")
+#' @param n a named numeric vector giving the elemental composition of the 
+#' target peptide (as illustration, methane would be `c(C = 1, H = 4)`).
+#' @param nMax the number of isotopic peaks to include in the calculation 
+#' (= max(m) + 1) 
+#' @param unlabeled a named list of numeric vectors providing the element-wise
+#' mass isotope distributions for an unlabeled peptide
+#' @param labeled a named list of numeric vectors providing the element-wise
+#' mass isotope distributions for an isotopically labeled peptide
+#' @param pos a matrix-list specifying the text position of a given elemental 
+#'  symbol in a molecular formula
+#' @param i row indices for `pos`
+#' @param j column indices for `pos`
+#' 
+.atomsToPattern <- function(x, m, label, isotopes, aatable, fractions){
+  if(all(length(label)>0, any(!names(label) %in% isotopes))){
+    isotopes <- c(label, isotopes)
+  }
+  if(any((badiso <- !isotopes %in% rownames(fractions)))){
+    stop(paste0(
+      "The following isotopes are missing from enrichment data: \n",
+      paste(isotopes[badiso], collapse = ", ")
+    ))
+  }
+  nMax <- max(m)+1
+  elements <- unique(gsub("\\d", "", isotopes))
+  elements <- setNames(elements, elements)
+  n0 <- colSums(countAtoms(aatable[x,"formula"])[, elements, drop = FALSE])
+  n0 <- n0[n0 > 0]
+  base_pm0 <- lapply(elements[names(n0)], .truncateElementalIsotopePattern,
+                     n = n0, fractions = fractions, nMax = nMax)
+  
+  if(length(label) > 0){
+    environment(.labeledEIP) <- environment()
+    apply(as.data.frame(label), 1, .labeledEIP, aatable=aatable[x,,drop = FALSE])
+  } else {
+    list(
+      baseline = .calcPattern(m, base_pm0),
+      asymptotic = NULL
+    )
+  }
+}
+.labeledEIP <- function(lbl, aatable){ 
+  elems <- unique(gsub("\\d", "", names(lbl)))
+  elems <- setNames(elems, elems)
+  nL <- colSums(aatable[x, elems, drop = FALSE]) 
+  n0[elems] <- n0[elems] - nL
+  pm0 <- lapply(elements[names(n0)], .truncateElementalIsotopePattern, 
+                n = n0, fractions = fractions, nMax = nMax)
+  isos <- names(lbl)
+  fractions[isos, "molar_fraction"] <- lbl[]
+  pmL <- lapply(elems, .truncateElementalIsotopePattern, n = nL,
+                fractions = fractions, nMax = nMax)
+  list(
+    baseline = .calcPattern(m, base_pm0),
+    asymptotic = .calcPattern(m, pm0, pmL)
+  )
+}
+.truncateElementalIsotopePattern <- function(elem, n, fractions, nMax, 
+                                             warn = 0.05){
+  with(subset(fractions, element == elem), {
+    pe <- elementalIsotopomerPattern(n[elem], molar_fraction, delta)
+    # pe[nMax + 1] <- sum(pe[-(1:nMax)])
+    if((sumpe <- sum(pe[-(1:nMax)])) > warn){
+      
+      # browser()
+      
+      warning(paste0("Total truncated probability for ", elem, " is ", 
+                     round(sumpe, 3), ".\n Consider a larger nMax."))
+    }
+    pe[1:nMax]
+  })
+}
+.calcPattern <- function(m, unlabeled = list(), labeled = list()){
+  .MASSPATTERN <- NULL
+  if(length(unlabeled) > 0){
+    .MASSPATTERN <- unlabeled[[1]]
+    .MASSDELTA <- as.numeric(names(unlabeled[[1]]))
+    if(length(unlabeled) > 1){
+      for(i in 2:length(unlabeled)){
+        .MASSPATTERN <- outer(.MASSPATTERN, unlabeled[[i]], "*")  # use sum(log())?
+        .MASSDELTA <- outer(.MASSDELTA, as.numeric(names(unlabeled[[i]])), "+")
+      }
+    }
+  }
+  if(length(labeled) > 0){
+    if(length(.MASSPATTERN) == 0){
+      .MASSPATTERN <- labeled[[1]]
+      .MASSDELTA <- as.numeric(names(labeled[[1]]))
+    }
+    for(i in 1:length(labeled)){
+      .MASSPATTERN <- outer(.MASSPATTERN, labeled[[i]], "*")  # use sum(log())?
+      .MASSDELTA <- outer(.MASSDELTA, as.numeric(names(labeled[[i]])), "+")
+    }
+  }
+  P <- aggregate(c(.MASSPATTERN), list(c(.MASSDELTA)), sum)
+  Pm <- setNames(P[,2], P[,1])[as.character(m)]
+  Pm/sum(Pm)
+}
+.getElementCount <- Vectorize(function(i,j, pos, x){
+  ans <- 0
+  for(v in pos[i,j][[1]][,2] + 1){
+    if(!grepl("\\D", substr(x[i],v, v))){
+      for(vv in v+0:4){
+        if(grepl("\\D", substr(x[i],v, vv))){
+          break
+        }
+      }
+      ans <- as.numeric(substr(x[i], v, vv - 1))
+    } else {
+      next
+    }
+  }
+  ans
+}, c("i","j"))
+
+## Simulation Functions --------------------------------------------------------
+#' Simulate an experiment measuring fractional synthesis rate (FSR)
+#' 
+#' @description
+#' Runs `n` replicate simulations of an FSR experiment using a specified 
+#' experimental design, set of proteins/peptides, and noise parameters. 
+#' 
+#' @details
+#' Given a user-specified set of protein-peptide pairs, study design, true
+#' rate parameters, and noise parameters, `simFsr` generates `n` replicate
+#' simulations for the specified experiment and then summarizes the resulting 
+#' error statistics. Optionally, it can also return the simulated datasets,  
+#' either with the analysis results or by themselves.  
+#' 
+#' \strong{Arguments}
+#' 
+#' Inputs for the simulation are as follows:
+#' \describe{
+#'   \item{Proteins and peptides}{are defined by `sequences`, which expects a 
+#'   named list of character vectors.  Each component is a name-value pair in
+#'   which the name identifies a protein or protein group, and the value is
+#'   a vector of peptide sequences associated with that protein.} 
+#'   \item{The experimental design}{is determined by the arguments `nreps`, 
+#'   `ntimes`, `delta`, and `repeated`.  The number of time points is set by 
+#'   `ntimes`, with values `1:ntimes`.  If `repeated = FALSE`, the simulation 
+#'   uses a simple randomized design with `nreps` cases for each treatment at 
+#'   each time point. Alternatively, if `repeated = TRUE`, measurements are
+#'   repeated for each subject at each time point.  Setting `delta = 0` yields
+#'   a single-sample design in which FSR is estimated for each protein in a 
+#'   single population.  Otherwise, an independent two-sample design is used,
+#'   and the analysis includes a test for the null hypothesis of no difference 
+#'   between the populations.}
+#'   \item{True, average rate parameters}{are determined by `baserate` and 
+#'   `delta`, where `baserate` defines the FSR for the control condition and 
+#'   `delta` defines the true effect of the treatment. In other words, the FSR 
+#'   for the treatment condition is defined as `baserate + delta`.}
+#'   \item{Average body water enrichment (BWE)}{in the study population is 
+#'   controlled by `bwe_mean`}
+#'   \item{Noise components}{include `bwe_sd`, `rate_sd`, `count`, `trap_factor`,
+#'   and `noise threshold`. Each unique subject (i.e., biological replicate) has
+#'   a BWE value drawn from $N(`bwe_mean`, `bwe_sd`)$, and an individual
+#'   fractional synthesis rate drawn independently for each protein from 
+#'   $N(`mu`, `rate_sd`)$, where `mu = baserate` or `baserate + delta`, 
+#'   depending on treatment.  Th arguments `count`, `trap_factor`, and 
+#'   `noise_threshold` parameterize a censored multinomial-(quasi-)Poisson
+#'   observation model for the observed isotope patterns in the simulated
+#'   data. See \code{\link{simulateIsotopePattern}} for details.}
+#' }
+#' In addition to the arguments listed above, `simFsr` accepts several control
+#' arguments that modify outputs or influence processing:
+#' \describe{
+#'   \item{`model`}{determines the rate model used to generate the data. See 
+#'   \code{\link{simulateIsotopePattern}} for additional details.}
+#'   \item{`maxpeaks`}{determines the number of peaks in the theoretical
+#'   baseline and asymptotic isotope distributions.  It should be large 
+#'   enough that the baseline distribution approaches 100% coverage.}
+#'   \item{`ncore`}{sets the number of processing cores to be used. If 
+#'   `ncore > 1`, simulation and analysis are parallelized using the
+#'   \code{\link{parallel}} and \code{\link{pbapply}} packages.}
+#'   \item{`seed`}{sets an overall random number seed for the simulation.}
+#'   \item{`analyze`}{determines whether fitting will occur.  If analyze is 
+#'   `FALSE`, only the simulated data will be returned (assuming 
+#'   `data = TRUE`).}
+#'   \item{`data`}{controls whether or not the simulated data themselves are 
+#'   returned.}
+#'   \item{`...`}{additional arguments may be passed to `.genFsrSim` or
+#'   `.fitFsrSim`, and from them on to downstream function calls.}
+#' }
+#' 
+#' \strong{Processing}
+#' 
+#' Processing proceeds as follows:
+#' \enumerate{
+#'   \item{`sequences` is parsed to generate vector of protein names}
+#'   \item{A data frame, `X` is generated that includes one row for each 
+#'   sample-sequence combination, where a "sample" is a physical specimen 
+#'   (i.e., one biological replicate at one time point). The data frame 
+#'   includes all of the information needed to determine treatment values and 
+#'   synthesis rates for each row.}
+#'   \item{Proteins, treatments ("control" vs "treatment" as determined by 
+#'   `X$Delta` == 0 or `delta`), subject id's, and file names (i.e., imaginary  
+#'   MS data files) are appended to `X`.}
+#'   \item{Arguments are collected into a list object named `pars` to be passed
+#'   to `.genFsrSim` and `.fitFsrSim`.}
+#'   \item{The random number generator and parallel processing culster are 
+#'   initialized, if needed. If `ncore > 1` and `seed = NULL`, `seed` is first
+#'   set by a call to \code{\link{Sys.time()}}. Then RNG streams on individual 
+#'   processing cores are initialized by a call to 
+#'   \code{\link{parallel::clusterSetRNGStream}}. When `simFsr` exits (i.e.,
+#'   \code{\link{on.exit}}), the parallel processing cluseter is automatically
+#'   stopped and (if `ncore == 1`) the random number seed is restored to its
+#'   original value.}
+#'   \item{Depending on whether `data == TRUE`, `.genFsrSim` and `.fitFsrSim`
+#'   are called sequentially (via \code{\link{pbapply::pbreplicate}} and 
+#'   \code{\link{pbapply::pbsapply}}) or through a single call to 
+#'   \code{\link{pbapply::pbreplicate}}.  The later option discards datasets
+#'   as they are used, so it minimizes memory requirements and communications
+#'   between processes.}
+#'   \item{The results are modified to include a simulation ID number, summary
+#'   tables, and metadata about the simulation settings, and then returned.}
+#' }
+#' 
+#' @param .source the path to source file for the fsr analysis functions 
+#' (i.e., this file). This is used to read the files into child processes 
+#' during parallel processing if `ncore > 1`. Defaults to the name of the 
+#' published file in current working directory. 
+
+#' 
+#' @result
+#' A named list with up to 6 components:
+#' \describe{
+#'   \item{`$rates`}{rate estimates for each protein in each replicate simulation}
+#'   \item{`$tests`}{contrast estimates for each protein in each simulation (not
+#'   included if `delta = 0`)}
+#'   \item{`$summary`}{summary statistics for each protein, across the set of
+#'    simulations}
+#'   \item{`$overall`}{summary statistics aggregated across proteins and 
+#'   simulations}
+#'   \item{`$metadata`}{information about the simulation parameters, including
+#'   argument values and the random number seed}
+#'   \item{`$data`}{if requested, a list of the datasets from all 'n' replicate
+#'    simulations (excluded if `data = FALSE`)}
+#' }
+#' 
+#' @seealso
+#' \code{\link{.genFsrSim}} for data simulation details and 
+#' \code{\link{.fitFsrSim}} for model fitting.
+#' 
+#' @examples
+#' \dontrun{
+#'   # Run single simulation of one protein with 2 peptides, measured
+#'   # at a single time point with 3 reps and 2 treatments, assuming 
+#'   # no inter-individual variation in rate or BWE. 
+#'   fsr <- simFsr(
+#'     n = 1, 
+#'     sequences = list(P07724 = c("CCSGSLVER", "CCTLPEDQR")), 
+#'     nreps = 3, ntimes = 1, baserate = 0.5, delta = 0.2, 
+#'     bwe_mean = 0.02, bwe_sd = 0, rate_sd = 0, model = "e1c", 
+#'     count = 1e6, trap_factor = 1, noise_threshold = 100, 
+#'     repeated = FALSE, maxpeaks = 10, ncore = 1, 
+#'     seed=12345
+#'     )
+#' }
+#' #' \dontrun{
+#'   # Returning only the simulated datasets. 
+#'   fsr <- simFsr(
+#'     n = 1, 
+#'     sequences = list(P07724 = c("CCSGSLVER", "CCTLPEDQR")), 
+#'     nreps = 3, ntimes = 1, baserate = 0.5, delta = 0.2, 
+#'     bwe_mean = 0.02, bwe_sd = 0, rate_sd = 0, model = "e1c", 
+#'     count = 1e6, trap_factor = 1, noise_threshold = 100, 
+#'     repeated = FALSE, maxpeaks = 10, ncore = 1, 
+#'     analyze = FALSE, data = TRUE, 
+#'     seed=12345
+#'     )
+#' }
+#' 
+simFsr <<- function(n, sequences, nreps, ntimes, baserate, delta, bwe_mean, 
+                    bwe_sd, rate_sd, count, trap_factor, noise_threshold, 
+                    repeated = FALSE, unbalanced = 0, model = "e1c", maxpeaks, 
+                    ncore = 1, seed = NULL, analyze = TRUE, data = FALSE, 
+                    .source = "./Beckett_et_al_2026_fsr_functions.R", ...){
+  `%>%` <- magrittr::`%>%`
+  proteins <- do.call(c, lapply(names(sequences), function(i){
+    setNames(rep(i, length(sequences[[i]])), sequences[[i]])
+  }))
+  delta <- unique(c(0, delta))
+  X <- expand.grid(
+    Sequence = unlist(sequences),
+    Rep = 1:nreps,
+    Rate = baserate,
+    Delta = delta,
+    Time = 1:ntimes,
+    Count = count,
+    Trap_factor = trap_factor,
+    Noise_threshold = noise_threshold,
+    stringsAsFactors = FALSE
+  )
+  if(unbalanced > 1 | unbalanced < 0 | !is.numeric(unbalanced)){
+    stop("unbalanced should be a proportion of sequence reads to drop.")
+  }
+  if(unbalanced > 0){
+    X <- X[-sample(nrow(X))[1:(nrow(X)*unbalanced)],] # goes to floor
+  }
+  X <- dplyr::mutate(
+    X, 
+    Proteins = proteins[unlist(Sequence)],
+    Treatment = c("Control","Treatment")[(Delta != 0) + 1],
+    Subject = if(repeated){
+      paste(Treatment, Rep, sep = "_")
+    } else {
+      paste(Treatment, Rep, Time, sep = "_")
+    },
+    File = paste0(Subject, "_", Time)
+  )
+  pars <- list(model = model, baserate = baserate, delta = delta, 
+               bwe_mean = bwe_mean, bwe_sd = bwe_sd, rate_sd, count = count,
+               trap_factor = trap_factor, noise_threshold = noise_threshold,
+               maxpeaks = maxpeaks, nreps = nreps, ntimes = ntimes, 
+               repeated = repeated)
+  if(is.null(ncore)|ncore == 1){
+    cl <- NULL
+    if(!is.null(seed)){
+      rs <- .Random.seed
+      set.seed(seed)
+      on.exit(.Random.seed <<- rs)
+    }
+  } else {
+    cl <- parallel::makeCluster(ncore)
+    on.exit(parallel::stopCluster(cl))
+    seed <- ifelse(is.null(seed), as.numeric(Sys.time()), seed)
+    parallel::clusterSetRNGStream(cl, seed)
+    parallel::clusterCall(cl, source, file = .source)
+    parallel::clusterCall(cl, library, package = "dplyr", character.only = TRUE)
+  }
+  if(data){
+    message("Simulating datasets:")
+    DATA <- pbapply::pbreplicate(n, .genFsrSim(X, pars, ...), 
+                                 simplify = FALSE, cl = cl)
+    if(analyze){
+      message("Running analyses:")
+      RES <- pbapply::pbsapply(DATA, .fitFsrSim, pars = pars, ..., cl = cl)
+    } else {
+      return(DATA)
+    }
+  } else {
+    if(!analyze){
+      warning("analyze = FALSE and DATA = FALSE --> Nothing to do!")
+      return(invisible())
+    }
+    message("Running simulations:")
+    DATA = NULL
+    RES <- pbapply::pbreplicate(n, {
+      X <- .genFsrSim(X, pars, ...)
+      .fitFsrSim(X, pars, ...)
+    }, cl = cl)
+  }
+  suppressMessages({
+    RES <- apply(RES, 1, do.call, what = rbind)   
+    RES$rates <- dplyr::mutate(
+      RES$rates, 
+      simId = rep(1:n, each = length(delta) * length(sequences))
+    ) %>% 
+      dplyr::select(c(10,1:9))
+    S1 <- dplyr::summarize(
+      dplyr::group_by(RES$rates, Protein, parameter),
+      type = "estimate",
+      coverage = sum(in_ci, na.rm = TRUE)/sum(!is.na(in_ci)),
+      mean = mean(error, na.rm = TRUE),
+      sd = sd(error, na.rm = TRUE),
+      as.data.frame(t(quantile(error, c(0,0.1,0.25,0.5,0.75,0.9,1), 
+                               na.rm = TRUE)))
+    ) %>% dplyr::ungroup()
+    if(!is.null(RES$tests)){
+      RES$tests <- dplyr::mutate(
+        RES$tests,
+        simId = rep(1:n, each = length(sequences))
+      ) %>% 
+        dplyr::select(c(13,1:12))
+      S2 <- dplyr::summarize(
+        dplyr::group_by(RES$tests, Protein, parameter),
+        type = "contrast",
+        coverage = sum(in_ci, na.rm = TRUE)/sum(!is.na(in_ci)),
+        mean = mean(error, na.rm = TRUE),
+        sd = sd(error, na.rm = TRUE),
+        as.data.frame(t(quantile(error, c(0,0.1,0.25,0.5,0.75,0.9,1), 
+                                 na.rm = TRUE))),
+        reject = sum(p.value <= 0.05, na.rm = TRUE)/sum(!is.na(p.value))
+      ) %>%
+        dplyr::ungroup() 
+    } else {
+      S2 <- NULL
+      S1$reject <- NA
+    }
+    RES$summary <- dplyr::bind_rows(S1, S2)
+    RES$overall <- dplyr::summarize(
+      dplyr::group_by(RES$summary, type),
+      mean_coverage = mean(coverage),
+      sd_coverage = sd(coverage),
+      setNames(as.data.frame(t(range(coverage))), 
+               c("min_coverage","max_coverage")),
+      mean_reject = mean(reject),
+      sd_reject = sd(reject),
+      setNames(as.data.frame(t(range(reject))), 
+               c("min_reject","max_reject"))
+    ) %>% dplyr::ungroup()
+    RES$metadata <- list(ncore = ncore, random.seed = seed, 
+                         data = data, pars = pars)
+    RES$datasets <- DATA
+  })
+  return(RES)
+}
+
+# A cluster version of simFsr (WARNING: This has not been extensively tested)
+simFsrCluster <- function(run_id, rep_num, N, seed, sequences, nreps, ntimes, 
+                          baserate, delta, bwe_mean, bwe_sd, noise, maxpeaks, 
+                          repeated = FALSE, model = "e1c"){
+  `%>%` <- magrittr::`%>%`
+  proteins <- do.call(c, lapply(names(sequences), function(i){
+    setNames(rep(i, length(sequences[[i]])), sequences[[i]])
+  }))
+  delta <- unique(c(0, delta))
+  X <- expand.grid(
+    Sequence = unlist(sequences),
+    Rep = 1:nreps,
+    Rate = baserate,
+    Delta = delta,
+    Time = 1:ntimes,
+    Noise = noise,
+    stringsAsFactors = FALSE
+  )
+  X <- dplyr::mutate(
+    X, 
+    Proteins = proteins[unlist(Sequence)],
+    Treatment = c("Control","Treatment")[(Delta != 0) + 1],
+    Subject = if(repeated){
+      paste(Treatment, Rep, sep = "_")
+    } else {
+      paste(Treatment, Rep, Time, sep = "_")
+    },
+    File = paste0(Subject, "_", Time)
+  )
+  pars <- list(run_id = run_id, rep_num = rep_num, seed = seed, model = model, 
+               baserate = baserate, delta = delta, bwe_mean = bwe_mean, 
+               bwe_sd = bwe_sd, noise = noise, maxpeaks = maxpeaks, 
+               nreps = nreps, ntimes = ntimes, repeated = repeated)
+  set.seed(seed)
+  set.seed(round(runif(N, -1e9, 1e9))[j])
+  X <- .genFsrSim(X, pars)
+  RES <- .fitFsrSim(X, pars)
+  OUT <- list(pars = pars, res = RES)
+  save(OUT, file = paste0("fsr_sim_result_", run_id, "_", rep_num))
+  return(0)
+}
+
+#' Simulate observed isotope distributions (OIDs) for a given peptide sequence
+#' 
+#' @description
+#' Simulates one collection of OIDs for a set of rates and time points.
+#' 
+#' @details
+#' OID peak intensities are simulated by a censored multinomial-quasi-Poisson  
+#' error process, with the target distribution defined as a mixture of 
+#' theoretical baseline (without heavy isotope enrichment) and asymptotically 
+#' enriched isotope patterns. Mixture weights (labeled `frac_new` in the source  
+#' code) are determined by the synthesis `model`, `rate` parameter, and 
+#' sampling `time`.
+#' 
+#' @param theory a named list with components `$baseline` and `$asymptotic`
+#' each containing an n sample x m peaks matrix of thoretical isotope patterns
+#' @param model character value defining the synthesis model to be used (only 
+#' `"e1c"` is currently recognized, for "exponential-one-compartment")
+#' @param rate a numeric vector of fractional synthesis rates (length n)
+#' @param time a numeric vector of times (in arbitrary units)
+#' @param count peptide abundance in the sample
+#' @param trap_factor ion trap effect on Poisson noise in peak height; the 
+#' standard deviation of the noise is reduced by a factor `1/trap_factor`
+#' @param noise_threshold minimum intensity value at which a peak is recognized
+#' as a signal; peaks `< noise_threshold` are reset to 0.
+#' @param collapse logical; if `TRUE` the result is returned as a character 
+#' vector with peak intensities separated by semicolons (;). Otherwise, a
+#' numeric matrix is returned.
+#' 
+#' @result
+#' A vector or matrix of simulated isotope patterns, depending on the value of 
+#' `collapse`.
+#' 
+#' @seealso
+#' \code{\link{simFsr}} for usage context.  
+#' \code{\link{peptidesToIsotopePatterns}} for the generation of theoretical
+#' distributions.
+#' 
+simulateIsotopePattern <<- function(theory, model = "e1c", rate = 0.5, time = 1, 
+                                    count = 1e6, trap_factor = 1,
+                                    noise_threshold = 100, collapse = FALSE){
+  frac_new <- switch(
+    model,
+    e1c = 1 - exp(-rate %o% time),
+    paste0(quote(model), " is not a recognized model.")
+  )
+  obsv <- real <- with(theory, (1-frac_new) %x% baseline + frac_new %x% asymptotic)
+  lambda <- rmultinom(1, count, real) 
+  obsv[] <- (rpois(length(lambda), lambda) - lambda)/trap_factor + lambda
+  obsv[obsv < noise_threshold] <- 0
+  y <- t(obsv)/colSums(obsv)
+  if(!isFALSE(collapse)){
+    apply(y, 1, function(yy) paste(yy[yy > 0], collapse = collapse))
+  } else {
+    y
+  }
+}
+
+#' Generate an FSR simulation experiment using a Latin hypercube design
+#' 
+#' @description
+#' Uses a Latin hypercube design to parameterize a set of simulations that can 
+#' examine the effects of sample size (`nreps`), the number of time points 
+#' (`ntimes`), base fractional synthesis rate  (`baserate`), treatment effects 
+#' (`delta`), BWE, protein abundance (`count`), and mass spectrometer 
+#' performance () on FSR estimation.
+#' 
+#' @details
+#' A unit Latin hypercube is generated for `n` simulations using an optimal 
+#' design (see \code{\link{lhs::optimum}} for details), and then rescaled as
+#' determined by the function arguments. For convenience, a random number seed
+#' is also generated for each simulation run, so that results can be replicated
+#' if desired.
+#' 
+#' @param n number of simulation runs desired
+#' @param nreps length 2 numeric vector defining bounds for the number of 
+#' sample replicates per simulated experiment
+#' @param ntimes length 2 numeric vector defining bounds for the number of time
+#' points per simulated experiment
+#' @param baserate length 2 numeric vector defining bounds for the baseline
+#' (control) fractional synthesis rate (FSR)
+#' @param delta length 2 numeric vector defining bounds for the treatment 
+#' effect on FSR
+#' @param bwe_mean length 2 numeric vector defining bounds for the mean body 
+#' water enrichment (BWE) per simulation
+#' @param bwe_sd length 2 numeric vector defining bounds for the standard 
+#' deviation of BWE in each simulated experiment
+#' @param count length 2 numeric vector defining bounds for protein abundance
+#' @param trap_factor length 2 numeric vector defining bounds for the ion trap
+#' effect on peak noise
+#' @param seed optional numeric value used to seed the generation of random 
+#' number seeds. By default, seeds are drawn from a uniform distribution that
+#' is seeded based on a call to \code{\link{Sys.time()}}.
+#' 
+#' @result
+#' A data frame providing inputs for a simulation experiment. 
+#' 
+#' @seealso
+#' \code{\link{lhs::optimum}} for more on Latin hypercubes, \code{\link{simFsr}}
+#' for details on simulation procedures.
+#' 
+buildHyperCube <- function(n, nreps = c(3, 10), ntimes = c(1, 5), 
+                           baserate = c(0.1, 1.9), delta = c(0.5, 2), 
+                           bwe_mean = c(0.02, 0.02), bwe_sd = c(0, 0.005), 
+                           count = c(10e4, 10e7), trap_factor = c(1,1), 
+                           seed = NULL){
+  H <- lhs::optimumLHS(n, 8)
+  colnames(H) <- c("nreps", "ntimes", "baserate", "delta",
+                   "bwe_mean", "bwe_sd", "count", "trap_factor")
+  H <- as.data.frame(H)
+  H$nreps <- round(nreps[1] - 0.5 + H$nreps * (diff(nreps) + 1))
+  H$ntimes <- round(ntimes[1] - 0.5 + H$ntimes * (diff(ntimes) + 1))
+  H$baserate <- baserate[1] + H$baserate * diff(baserate)
+  H$delta <- delta[1] + H$delta * diff(delta)
+  H$bwe_mean <- bwe_mean[1] + H$bwe_mean * diff(bwe_mean)
+  H$bwe_sd <- bwe_sd[1] + H$bwe_sd * diff(bwe_sd)
+  H$count <- count[1] + H$count * diff(count)
+  H$trap_factor <- trap_factor[1] + H$trap_factor * diff(trap_factor)
+  rs <- .Random.seed
+  on.exit(.Random.seed <<- rs)
+  seed <- ifelse(is.null(seed), as.numeric(Sys.time()), seed)
+  set.seed(seed)
+  H$seed <- round(runif(nrow(H), -1e9, 1e9))
+  H
+}
+
+## Data sets -------------------------------------------------------------------
+#' Amino acids
+#' 
+#' A table of the 20 amino acids with their symbols, names, chemical formulae,
+#' and number of substitutable hydrogen atoms. Columns are included for other 
+#' elements as well, but no values have been supplied (this may be changed in 
+#' future updates).
+#'   
+#' @format A data frame with 20 rows and 13 variables:
+#' \describe{
+#'   \item{symbol}{symbol for the amino acid}
+#'   \item{name}{name of the amino acid}
+#'   \item{formula}{chemical formula}
+#'   \item{C}{number of substitutable carbon atoms}
+#'   \item{H}{number of substitutable hydrogen atoms}
+#'   \item{N}{number of substitutable nitrogen atoms}
+#'   \item{O}{number of substitutable oxygen atoms}
+#'   \item{P}{number of substitutable phosphorous atoms}
+#'   \item{S}{number of substitutable sulfur atoms}
+#'   \item{F}{number of substitutable florine atoms}
+#'   \item{Cl}{number of substitutable chlorine atoms}
+#'   \item{Br}{number of substitutable bromine atoms}
+#'   \item{I}{number of substitutable iodine atoms}
+#' }
+#' 
+amino_acids <- data.frame(
+  stringsAsFactors = FALSE,
+  symbol = c("A","R","N","D","C","E","Q","G","H","I",
+             "L","K","M","F","P","S","T","Y","W","V"),
+  name = c("Alanine","Arginine","Asparagine","Aspartate",
+           "Cysteine","Glutamate","Glutamine","Glycine",
+           "Histidine","Isoleucine","Leucine","Lysine",
+           "Methionine","Phenylalanine","Proline","Serine",
+           "Threonine","Tyrosine","Tryptophan","Valine"),
+  formula = c("C3H7N1O2","C6H14N4O2","C4H8N2O3","C4H7N1O4",
+              "C3H7N1O2S1","C5H9N1O4","C5H10N2O3","C2H5N1O2",
+              "C6H9N3O2","C6H13N1O2","C6H13N1O2","C6H14N2O2",
+              "C5H11N1O2S1","C9H11N1O2","C5H9N1O2","C3H7N1O3",
+              "C4H9N1O3","C9H11N1O3","C11H12N2O2","C5H11N1O2"),
+  C = NA,
+  H = c(4,3,2,2,2,4,4,2,3,1,1,1,1,0,3,3,0,0,0,1),
+  N = NA, 
+  O = NA, 
+  P = NA, 
+  S = NA, 
+  F = NA, 
+  Cl = NA, 
+  Br = NA, 
+  I = NA,
+  row.names = c("A","R","N","D","C","E","Q","G","H","I",
+                "L","K","M","F","P","S","T","Y","W","V")
+)
+
+#' Stable heavy isotopes
+#' 
+#' A table of stable isotopes for common elements in biological compounds,
+#' including molar fractions, nominal mass differences (= extra neutron
+#' counts), and exact masses. Monoisotopes are \strong{not} included in this 
+#' table but may be added in the future. 
+#' 
+#' Exact mass values follow NIST. Because references differ on the exact values 
+#' for molar fractions of some isotopes, this table includes values from both 
+#' NIST and IUPAC. Users should be aware that these numbers represent global 
+#' averages and may not be representative for the background abundances in a 
+#' given sample due to geographic variation, changes in isotope abundance over 
+#' time, and  fractionation by biological or industrial processes. If it is 
+#' feasible to do so, users should apply values specific to their study system 
+#' instead of values from this table. 
+#'   
+#' @format A data frame with 10 rows and 4 variables:
+#' \describe{
+#'   \item{element}{atomic symbol}
+#'   \item{number}{mass number}
+#'   \item{delta}{number of extra neutrons, relative to the monoisotope}
+#'   \item{mass}{relative atomic mass, where C12 = 12}
+#'   \item{nist}{average proportional molar abundance reported by NIST}
+#'   \item{iupac}{average proportional molar abundance reported by IUPAC; 
+#'   midpoints are reported for isotopes where the database contains a range 
+#'   of values}
+#' }
+#' 
+#' @references
+#' Coursey, J.S., Schwab, D.J., Tsai, J.J., and Dragoset, R.A. 
+#' \emph{\link[
+#' https://www.nist.gov/pml/atomic-weights-and-isotopic-compositions-relative-atomic-masses
+#' ]{Atomic weights and isotopic compositions with relative atomic masses.} 
+#' U.S. National Institutes of Standards and Technology. (accessed 10 August 
+#' 2023)
+#' 
+#' CIAAW. \emph{\link[https://www.ciaaw.org/isotopic-abundances.htm]{Isotopic 
+#' compositions of the elements 2021}. International Union of Pure and Applied 
+#' Chemistry. (accessed 10 August 2023) 
+#' 
+stable_isotopes <- data.frame(
+  element = c("C","H","N","O","O","S","S","S","Cl","Br"),
+  number = c(13,2,15,17,18,33,34,36,37,81),
+  delta = c(1,1,1,1,2,1,2,3,2,2), # nominal mass difference from monoisotope
+  mass = c(
+    C = 13.00335483507,     # Carbon 13
+    H = 2.01410177812,      # Hydrogen 2 (Deuterium)
+    N = 15.00010889888,     # Nitrogen 15
+    O17 = 16.99913175650,   # Oxygen 17
+    O18 = 17.99915961286,   # Oxygen 18
+    # P = 0,                      # Phosphorous has no stable heavy isotopes
+    S33 = 32.9714589098,   # Sulfur 33
+    S34 = 33.967867004,   # Sulfur 34
+    S36 = 35.96708071,   # Sulfur 36
+    # F = 0,                      # Florine has no stable heavy isotopes
+    Cl = 36.965902602,                  # Chlorine 37
+    Br = 80.9162897                   # Bromine 81
+    # I = 0                       # Iodine has no stable heavy isotopes
+  ),
+  nist = c(
+    C = 0.01070,   # 0.011078,    # Carbon 13
+    H = 0.000115,   # 0.00015574,  # Hydrogen 2 (Deuterium)
+    N = 0.00364,   # 0.003663,    # Nitrogen 15
+    O17 = 0.00038, # 0.0003790,   # Oxygen 17
+    O18 = 0.00205, # 0.0020004,   # Oxygen 18
+    # P = 0,                      # Phosphorous has no stable heavy isotopes
+    S33 = 0.0075,  # 0.0419599,   # Sulfur 33
+    S34 = 0.0425,  # 0.0074869,   # Sulfur 34
+    S36 = 0.0001,  # 0.0001458,   # Sulfur 36
+    # F = 0,                      # Florine has no stable heavy isotopes
+    Cl = 0.2424,                  # Chlorine 37
+    Br = 0.4931                   # Bromine 81
+    # I = 0                       # Iodine has no stable heavy isotopes
+  ),
+  iupac = c(
+    C = 0.01060,     # 0.011078,    # Carbon 13
+    H = 0.000145,    # 0.00015574,  # Hydrogen 2 (Deuterium)
+    N = 0.003795,    # 0.003663,    # Nitrogen 15
+    O17 = 0.0003835, # 0.0003790,   # Oxygen 17
+    O18 = 0.002045,  # 0.0020004,   # Oxygen 18
+    # P = 0,                      # Phosphorous has no stable heavy isotopes
+    S33 = 0.00763,   # 0.0419599,   # Sulfur 33
+    S34 = 0.04365,   # 0.0074869,   # Sulfur 34
+    S36 = 0.000158,  # 0.0001458,   # Sulfur 36
+    # F = 0,                      # Florine has no stable heavy isotopes
+    Cl = 0.242,                  # Chlorine 37
+    Br = 0.4935                   # Bromine 81
+    # I = 0                       # Iodine has no stable heavy isotopes
+  ),
+  row.names = paste0(
+    c("C","H","N","O","O","S","S","S","Cl","Br"),
+    c(13,2,15,17,18,33,34,36,37,81)
+  )
+)
\ No newline at end of file
diff --git a/Beckett_et_al_2026_fsr_run.R b/Beckett_et_al_2026_fsr_run.R
new file mode 100644
index 0000000..dbc9838
--- /dev/null
+++ b/Beckett_et_al_2026_fsr_run.R
@@ -0,0 +1,41 @@
+# Settings used to estimate fractional synthesis rates in Beckett et al. 
+# (2026) The impact of high fat diet on global protein abundance and 
+# fractional synthetic rate in liver and mammary gland of peak lactation 
+# ICR mice.
+
+source("fsr_functions.R")
+
+liverfsr <- maxQuantFsr(
+  mq_folder = "./liver",     
+  sample_data = NULL,  
+  formula = ~ Diet,
+  msms_count = 0.5,    
+  scan_corr = 0.8,     
+  use_single_scans = TRUE,  
+  corr_method = "spearman", 
+  minSeq = 1,      
+  model = "e1c",       
+  level = 0.95,        
+  plot = FALSE,        
+  save = "xlsx",
+  ncore = 5,
+  unique_only = FALSE
+)
+
+mammfsr <- maxQuantFsr(
+  mq_folder = "./mammary_gland",     
+  sample_data = NULL,  
+  formula = ~ Diet,
+  msms_count = 0.5,    
+  scan_corr = 0.8,     
+  use_single_scans = TRUE,  
+  corr_method = "spearman", 
+  minSeq = 1,      
+  model = "e1c",       
+  level = 0.95,        
+  plot = FALSE,        
+  save = "xlsx",
+  ncore = 10,
+  unique_only = FALSE
+)
+