Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
DataExploration-AAPF/calculate_repeatability.r
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
184 lines (142 sloc)
5.78 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# calculate_repeatability.r | |
library(dplyr) | |
library(lme4) | |
#library(rptR) | |
# Input long format data | |
# BXXXX, Round 1 | |
path.rgb.long <- ("../AAPF_Bayer_2021_to_2024/2_R_compatible_table/round1_2021_2022/df_rgb_long_round1.csv") | |
path.hsi.long <- ("../AAPF_Bayer_2021_to_2024/2_R_compatible_table/round1_2021_2022/df_hsi_long_round1.csv") | |
path.rpt <- ("./rpt_no_public_round1.csv") | |
# BXXXX Round 2 | |
#path.rgb.long <- ("./df_rgb_long.csv") | |
#path.hsi.long <- ("./df_hsi_long.csv") | |
#path.xray.long <- ("./df_xray_long.csv") | |
#path.rpt <- ("./rpt_no_public_round2.csv") | |
# Define varieties that is excluded for repeatability measurement | |
exclude.variety <-c("P1", "P2", "P3", "P4") | |
# Formula used to evaluate repeatability | |
case <- 1 | |
# Case 1: Y_ij = u + H_i + R_j + e_ij | |
# Y_ij: phenotypic measurement of the ith hybrid in the jh rep | |
# u: grand mean | |
# H_i: random effect of the ith hybrid | |
# R_j: fixed effect of the jth replicate | |
# e_ij: residual error for the ith hybrid in the jth rep. | |
# Repeatability(H^2): Var(hybrid)^2 / {Var(hybrid)^2 + Var(error)^2/rep} | |
# Set formula based on case number | |
if (case==1){ | |
# When number of varieties, reps (experiment id) > 5 | |
formula <- as.formula("value ~ 1 + (1|VARIETY) + (1|EXP.ID)") | |
} | |
# Load RGB and HSI data in long format | |
message("Loading data...") | |
df.rgb <- read.csv(path.rgb.long) | |
df.hsi <- read.csv(path.hsi.long) | |
#df.xray <- read.csv(path.xray.long) | |
# Add sensor column for clarity | |
df.rgb$sensor <- 'RGB' | |
df.hsi$sensor <- 'HSI' | |
#df.xray$sensor <- 'XRAY' | |
# Exclude varieties, as needed | |
if (length(exclude.variety)>0){ | |
df.rgb <- df.rgb %>% filter(!VARIETY %in% exclude.variety) | |
df.hsi <- df.hsi %>% filter(!VARIETY %in% exclude.variety) | |
#df.xray <- df.xray %>% filter(!VARIETY %in% exclude.variety) | |
} | |
# Combine RGB, HSI, and XRAY data | |
#df <- do.call("rbind", list(df.rgb, df.hsi, df.xray)) | |
df <- do.call("rbind", list(df.rgb, df.hsi)) | |
# Create a dataframe to record repeatability | |
message("Finding unique conditions to evaluate repeatability...") | |
if (case==1){ | |
df.rpt <- unique(df[, c('TREATMENT', | |
'GROWTH_STAGE', | |
'View', | |
'frame_nr', | |
'variable', | |
'sensor')]) | |
} | |
# Sort output dataframe. This line can help increase data search efficiency | |
if (case==1){ | |
df.rpt <- df.rpt[order(df.rpt$TREATMENT), ] | |
} | |
# Add columns to record results | |
if (case==1){ | |
df.rpt$var.variety <- NA | |
df.rpt$var.exp.id <- NA | |
df.rpt$var.residual <- NA | |
df.rpt$grand.mean <- NA | |
df.rpt$n.rep <- NA | |
df.rpt$repeatability <- NA | |
} | |
# Calculate repeatability | |
message('Calculating repeatability...') | |
for(i in 1:nrow(df.rpt)){ | |
if (i %% 100 == 0){ | |
message(paste0("(", i, "/", nrow(df.rpt), ")", sep="")) | |
} | |
# Get subset of data under specific condition to evaluate repeatability | |
row <- df.rpt[i,] | |
# Create a subset of data, this can improve data search efficiency | |
att.search <- row[1,1] | |
# Get reduced dataframe if TREATMENT is new or updated | |
if(i==1){ | |
df.search <- df[which(df[ ,colnames(row[1])]==att.search), ] | |
} else if (i>1 & att.search!=att.prev){ | |
df.search <- df[which(df[ ,colnames(row[1])]==att.search), ] | |
} | |
# Subset data by condition by which repeatability is measured | |
if (case==1){ | |
df.temp <- df.search[which(df.search$GROWTH_STAGE==row$GROWTH_STAGE & | |
df.search$View==row$View & | |
df.search$frame_nr==row$frame_nr & | |
df.search$variable==row$variable), ] | |
} | |
# Check if there are too many missing values | |
num.na <- sum(is.na(df.temp$value)) | |
if (nrow(df.temp) == 0){ | |
att.prev <- att.search | |
message(paste0("(", i, "/", nrow(df.rpt), "): No data", sep="")) | |
next | |
} | |
if (num.na/nrow(df.temp) > 0.3){ | |
att.prev <- att.search | |
message(paste0("(", i, "/", nrow(df.rpt), "): Insufficient data", sep="")) | |
next | |
} | |
# Select valid data | |
df.temp <- df.temp[!is.na(df.temp$value) & is.finite(df.temp$value), ] | |
# Evaluate repeatability | |
if (case==1){ | |
model <- lmer(formula=as.formula(formula), data=df.temp) | |
df.cor <- data.frame(VarCorr(model)) | |
var.variety <- df.cor[df.cor$grp=="VARIETY", "vcov"] | |
var.exp.id <- df.cor[df.cor$grp=="EXP.ID", "vcov"] | |
var.residual <- df.cor[df.cor$grp=="Residual", "vcov"] | |
if (var.variety==0 & var.exp.id==0 & var.residual==0) next | |
grand.mean <- summary(model)$coefficients[[1]] | |
n.rep <- summary(model)$ngrps["EXP.ID"] | |
rpt <- var.variety / (var.variety + var.residual/n.rep) | |
#rpt.temp <- rptR::rpt(formula, grname=c("VARIETY", "EXP.ID"), | |
# data=df.temp, datatype="Gaussian", nboot=0, npermut=0) | |
} | |
#df.rpt.temp <- as.data.frame(VarCorr(rpt.temp$mod)) | |
# Save data | |
if (case==1){ | |
#df.rpt$rpt[i] <- rpt.temp$R["VARIETY"] | |
#df.rpt$sdcor.VARIETY[i] <- df.rpt.temp[df.rpt.temp$grp=="VARIETY", 'sdcor'] | |
#df.rpt$sdcor.EXP.ID[i] <- df.rpt.temp[df.rpt.temp$grp=="EXP.ID", 'sdcor'] | |
#df.rpt$sdcor.Residual[i] <- df.rpt.temp[df.rpt.temp$grp=="Residual", 'sdcor'] | |
df.rpt$var.variety[i] <- var.variety | |
df.rpt$var.exp.id[i] <- var.exp.id | |
df.rpt$var.residual[i] <- var.residual | |
df.rpt$grand.mean[i] <- grand.mean | |
df.rpt$n.rep[i] <- n.rep | |
df.rpt$repeatability[i] <- rpt | |
} | |
att.prev <- att.search | |
} | |
# Export repeatability table | |
message('\nExporting repeatability measurements...') | |
write.csv(apply(df.rpt, 2, as.character), path.rpt, row.names=F, na='NA') | |
# EOF |