Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
# calculate_repeatability.r
library(dplyr)
library(lme4)
#library(rptR)
# Input long format data
# BXXXX, Round 1
path.rgb.long <- ("../AAPF_Bayer_2021_to_2024/2_R_compatible_table/round1_2021_2022/df_rgb_long_round1.csv")
path.hsi.long <- ("../AAPF_Bayer_2021_to_2024/2_R_compatible_table/round1_2021_2022/df_hsi_long_round1.csv")
path.rpt <- ("./rpt_no_public_round1.csv")
# BXXXX Round 2
#path.rgb.long <- ("./df_rgb_long.csv")
#path.hsi.long <- ("./df_hsi_long.csv")
#path.xray.long <- ("./df_xray_long.csv")
#path.rpt <- ("./rpt_no_public_round2.csv")
# Define varieties that is excluded for repeatability measurement
exclude.variety <-c("P1", "P2", "P3", "P4")
# Formula used to evaluate repeatability
case <- 1
# Case 1: Y_ij = u + H_i + R_j + e_ij
# Y_ij: phenotypic measurement of the ith hybrid in the jh rep
# u: grand mean
# H_i: random effect of the ith hybrid
# R_j: fixed effect of the jth replicate
# e_ij: residual error for the ith hybrid in the jth rep.
# Repeatability(H^2): Var(hybrid)^2 / {Var(hybrid)^2 + Var(error)^2/rep}
# Set formula based on case number
if (case==1){
# When number of varieties, reps (experiment id) > 5
formula <- as.formula("value ~ 1 + (1|VARIETY) + (1|EXP.ID)")
}
# Load RGB and HSI data in long format
message("Loading data...")
df.rgb <- read.csv(path.rgb.long)
df.hsi <- read.csv(path.hsi.long)
#df.xray <- read.csv(path.xray.long)
# Add sensor column for clarity
df.rgb$sensor <- 'RGB'
df.hsi$sensor <- 'HSI'
#df.xray$sensor <- 'XRAY'
# Exclude varieties, as needed
if (length(exclude.variety)>0){
df.rgb <- df.rgb %>% filter(!VARIETY %in% exclude.variety)
df.hsi <- df.hsi %>% filter(!VARIETY %in% exclude.variety)
#df.xray <- df.xray %>% filter(!VARIETY %in% exclude.variety)
}
# Combine RGB, HSI, and XRAY data
#df <- do.call("rbind", list(df.rgb, df.hsi, df.xray))
df <- do.call("rbind", list(df.rgb, df.hsi))
# Create a dataframe to record repeatability
message("Finding unique conditions to evaluate repeatability...")
if (case==1){
df.rpt <- unique(df[, c('TREATMENT',
'GROWTH_STAGE',
'View',
'frame_nr',
'variable',
'sensor')])
}
# Sort output dataframe. This line can help increase data search efficiency
if (case==1){
df.rpt <- df.rpt[order(df.rpt$TREATMENT), ]
}
# Add columns to record results
if (case==1){
df.rpt$var.variety <- NA
df.rpt$var.exp.id <- NA
df.rpt$var.residual <- NA
df.rpt$grand.mean <- NA
df.rpt$n.rep <- NA
df.rpt$repeatability <- NA
}
# Calculate repeatability
message('Calculating repeatability...')
for(i in 1:nrow(df.rpt)){
if (i %% 100 == 0){
message(paste0("(", i, "/", nrow(df.rpt), ")", sep=""))
}
# Get subset of data under specific condition to evaluate repeatability
row <- df.rpt[i,]
# Create a subset of data, this can improve data search efficiency
att.search <- row[1,1]
# Get reduced dataframe if TREATMENT is new or updated
if(i==1){
df.search <- df[which(df[ ,colnames(row[1])]==att.search), ]
} else if (i>1 & att.search!=att.prev){
df.search <- df[which(df[ ,colnames(row[1])]==att.search), ]
}
# Subset data by condition by which repeatability is measured
if (case==1){
df.temp <- df.search[which(df.search$GROWTH_STAGE==row$GROWTH_STAGE &
df.search$View==row$View &
df.search$frame_nr==row$frame_nr &
df.search$variable==row$variable), ]
}
# Check if there are too many missing values
num.na <- sum(is.na(df.temp$value))
if (nrow(df.temp) == 0){
att.prev <- att.search
message(paste0("(", i, "/", nrow(df.rpt), "): No data", sep=""))
next
}
if (num.na/nrow(df.temp) > 0.3){
att.prev <- att.search
message(paste0("(", i, "/", nrow(df.rpt), "): Insufficient data", sep=""))
next
}
# Select valid data
df.temp <- df.temp[!is.na(df.temp$value) & is.finite(df.temp$value), ]
# Evaluate repeatability
if (case==1){
model <- lmer(formula=as.formula(formula), data=df.temp)
df.cor <- data.frame(VarCorr(model))
var.variety <- df.cor[df.cor$grp=="VARIETY", "vcov"]
var.exp.id <- df.cor[df.cor$grp=="EXP.ID", "vcov"]
var.residual <- df.cor[df.cor$grp=="Residual", "vcov"]
if (var.variety==0 & var.exp.id==0 & var.residual==0) next
grand.mean <- summary(model)$coefficients[[1]]
n.rep <- summary(model)$ngrps["EXP.ID"]
rpt <- var.variety / (var.variety + var.residual/n.rep)
#rpt.temp <- rptR::rpt(formula, grname=c("VARIETY", "EXP.ID"),
# data=df.temp, datatype="Gaussian", nboot=0, npermut=0)
}
#df.rpt.temp <- as.data.frame(VarCorr(rpt.temp$mod))
# Save data
if (case==1){
#df.rpt$rpt[i] <- rpt.temp$R["VARIETY"]
#df.rpt$sdcor.VARIETY[i] <- df.rpt.temp[df.rpt.temp$grp=="VARIETY", 'sdcor']
#df.rpt$sdcor.EXP.ID[i] <- df.rpt.temp[df.rpt.temp$grp=="EXP.ID", 'sdcor']
#df.rpt$sdcor.Residual[i] <- df.rpt.temp[df.rpt.temp$grp=="Residual", 'sdcor']
df.rpt$var.variety[i] <- var.variety
df.rpt$var.exp.id[i] <- var.exp.id
df.rpt$var.residual[i] <- var.residual
df.rpt$grand.mean[i] <- grand.mean
df.rpt$n.rep[i] <- n.rep
df.rpt$repeatability[i] <- rpt
}
att.prev <- att.search
}
# Export repeatability table
message('\nExporting repeatability measurements...')
write.csv(apply(df.rpt, 2, as.character), path.rpt, row.names=F, na='NA')
# EOF