diff --git a/calculate_repeatability.r b/calculate_repeatability.r index 0009a51..fd63ee4 100644 --- a/calculate_repeatability.r +++ b/calculate_repeatability.r @@ -1,13 +1,8 @@ # calculate_repeatability.r -# -# value ~ 1|EXP.ID + VARIETY + TREATMENT + GROWTH_STAGE -# value ~ 1|EXP.ID + VARIETY + TREATMENT Seth's model -# value ~ 1|EXP.ID + VARIETY Current model - library(dplyr) library(lme4) -library(heritability) +library(rptR) @@ -19,14 +14,23 @@ path.rpt <- ("./rpt_no_public.csv") # Define varieties that is excluded for repeatability measurement exclude.variety <-c("P1", "P2", "P3", "P4") -# List of "factor (categorical)" variables -factor.col <- c('EXP.ID', - 'POT_BARCODE', - 'TREATMENT', - 'VARIETY', - 'GROWTH_STAGE', - 'View', - 'frame_nr') +# Formula used to evaluate repeatability +case <- 1 + +# Set formula based on case number +if (case==1){ + # When number of varieties, reps (experiment id) > 5 + formula <- as.formula("value ~ (1|VARIETY) + (1|EXP.ID)") +} + + + + + + + + + # Load RGB and HSI data in long format message("Loading data...") @@ -42,41 +46,54 @@ if (length(exclude.variety)>0){ # Combine RGB and HSI data df <- rbind(df.rgb, df.hsi) -# Set factor columns -df[, factor.col] <- lapply(df[, factor.col], factor) - # Create a dataframe to record repeatability message("Finding unique conditions to evaluate repeatability...") -df.rpt <- unique(df[, c('TREATMENT', - 'GROWTH_STAGE', - 'View', - 'frame_nr', - 'variable')]) -df.rpt <- df.rpt[order(df.rpt$TREATMENT, - df.rpt$GROWTH_STAGE, - df.rpt$View, - df.rpt$frame_nr, - df.rpt$variable), ] -df.rpt$repeatability <- NA -df.rpt$gen.variance <- NA -df.rpt$res.variance <- NA +if (case==1){ + df.rpt <- unique(df[, c('TREATMENT', + 'GROWTH_STAGE', + 'View', + 'frame_nr', + 'variable')]) +} + +# Sort output dataframe. This line can help increase data search efficiency +if (case==1){ + df.rpt <- df.rpt[order(df.rpt$TREATMENT), ] +} +# Add columns to record results +if (case==1){ + df.rpt$rpt <- NA + df.rpt$sdcor.VARIETY <- NA + df.rpt$sdcor.EXP.ID <- NA + df.rpt$sdcor.Residual <- NA +} + + # Calculate repeatability message('Calculating repeatability...') for(i in 1:nrow(df.rpt)){ + + # Get subset of data under specific condition to evaluate repeatability row <- df.rpt[i,] + + # Create a subset of data, this can improve data search efficiency + att.search <- row[1,1] + + if(i==1){ + df.search <- df[which(df[ ,colnames(row[1])]==att.search), ] + } else if (i>1 & att.search!=att.prev){ + df.search <- df[which(df[ ,colnames(row[1])]==att.search), ] + } + # Subset data by condition by which repeatability is measured - message(paste(row$TREATMENT, - row$GROWTH_STAGE, - row$View, - row$frame_nr, - row$variable, - " (", i, "/", nrow(df.rpt), ")", sep=" ")) - df.temp <- df[which(df$TREATMENT==row$TREATMENT & - df$GROWTH_STAGE==row$GROWTH_STAGE & - df$View==row$View & - df$frame_nr==row$frame_nr & - df$variable==row$variable), ] + message(paste0("(", i, "/", nrow(df.rpt), ")", sep="")) + if (case==1){ + df.temp <- df.search[which(df.search$GROWTH_STAGE==row$GROWTH_STAGE & + df.search$View==row$View & + df.search$frame_nr==row$frame_nr & + df.search$variable==row$variable), ] + } # Count NA values and continue if excessive num.na <- sum(is.na(df.temp$value)) @@ -85,22 +102,28 @@ for(i in 1:nrow(df.rpt)){ # Select valid data df.temp <- df.temp[!is.na(df.temp$value) & is.finite(df.temp$value), ] - # Evaluate repeatability (line repeatability=True) - rpt <- repeatability(df.temp$value, df.temp$VARIETY, line.repeatability=T, - covariates.frame = data.frame()) + # Evaluate repeatability + if (case==1){ + rpt.temp <- rptR::rpt(formula, grname=c("VARIETY", "EXP.ID"), + data=df.temp, datatype="Gaussian", nboot=0, npermut=0) + } + df.rpt.temp <- as.data.frame(VarCorr(rpt.temp$mod)) # Save data - df.rpt$repeatability[i] <- rpt$repeatability - df.rpt$gen.variance[i] <- rpt$gen.variance - df.rpt$res.variance[i] <- rpt$res.variance + if (case==1){ + df.rpt$rpt[i] <- rpt.temp$R["VARIETY"] + df.rpt$sdcor.VARIETY[i] <- df.rpt.temp[df.rpt.temp$grp=="VARIETY", 'sdcor'] + df.rpt$sdcor.EXP.ID[i] <- df.rpt.temp[df.rpt.temp$grp=="EXP.ID", 'sdcor'] + df.rpt$sdcor.Residual[i] <- df.rpt.temp[df.rpt.temp$grp=="Residual", 'sdcor'] + } + att.prev <- att.search } - # Export repeatability table message('\nExporting repeatability measurements...') -write.csv(df.rpt, path.rpt, row.names=F) +write.csv(apply(df.rpt, 2, as.character), path.rpt, row.names=F) diff --git a/display_repeatability.r b/display_repeatability.r new file mode 100644 index 0000000..37e3daf --- /dev/null +++ b/display_repeatability.r @@ -0,0 +1,47 @@ +library(dplyr) +library(ggplot2) + + +# File paths to Input and output data +path.rpt <- ("./rpt_no_public.csv") +path.plot.overall <- ("./rpt_overall.png") +path.plot.sideall <- ("./rpt_sideall.png") + + + + + +# Load repeatability results +df.rpt <- read.csv(path.rpt) + +# Exclude results with NA +df.rpt <- df.rpt[which(!is.na(df.rpt$rpt) & is.finite(df.rpt$rpt)), ] + +# Select data with View==SideAll +df.rpt.sideall <- df.rpt[which(df.rpt$View=="SideAll"),] + +# Simplify attributes under df.rpt$View +df.rpt$View <- replace(df.rpt$View, + df.rpt$View %in% c("SideAll", "SideAverage"), + "Side") + +# Plot results (overall data) +p1 <- ggplot(df.rpt, aes(x=View, y=rpt)) + geom_boxplot() + + facet_grid(rows=vars(GROWTH_STAGE), cols=vars(TREATMENT)) + + labs(y = "Repeatability, Input: all RGB and HSI data") +png(path.plot.overall) +print(p1) +dev.off() + +# Plot results (sideall data) +df.rpt.sideall$frame_nr <- as.factor(df.rpt.sideall$frame_nr) +p2 <- ggplot(df.rpt.sideall, aes(x=frame_nr, y=rpt)) + geom_boxplot() + + facet_grid(rows=vars(GROWTH_STAGE), cols=vars(TREATMENT)) + + labs(y = "Repeatability, Input: RGB data from different vewing angles") +png(path.plot.sideall) +print(p2) +dev.off() + + + +# EOF diff --git a/display_variance.r b/display_variance.r index 0b46dbc..0b95b96 100644 --- a/display_variance.r +++ b/display_variance.r @@ -1,55 +1,97 @@ +library(dplyr) library(ggplot2) -# TODO -# calc rpt without public hybrid -# order hybrid a b c ... -# yaxis label <- variable -# Input long format data + +# Path to input and output data path.rgb.long <- ("./df_rgb_long.csv") path.hsi.long <- ("./df_hsi_long.csv") -path.rpt <- ("./rpt.csv") +path.rpt <- ("./rpt_no_public.csv") +# Variables demonstrating highest to n-th highest repeatability will be displayed +# in each combination of (TREATMENT, GROWTH_STAGE) +n <- 5 + +# List of varieties to be excluded, if any; otherwise, exclude.variety as "c()" +exclude.variety <-c("P1", "P2", "P3", "P4") -# Load RGB and HSI data in long format -message("Loading data...") -df.rgb <- read.csv(path.rgb.long) -df.hsi <- read.csv(path.hsi.long) -df.rpt <- read.csv(path.rpt) -# Combine RGB and HSI data -df <- rbind(df.rgb, df.hsi) -# Sort repeatability by desending order -message("Sorting repeatability data...") -df.rpt <- df.rpt[!is.na(df.rpt$repeatability),] -df.rpt.sort <- df.rpt[order(df.rpt$repeatability, decreasing=T), ] -n <- 10 -for (i in seq(1,10)){ - row <- df.rpt.sort[i,] - df.temp <- df[which(df$TREATMENT==row$TREATMENT & - df$GROWTH_STAGE==row$GROWTH_STAGE & - df$View==row$View & - df$frame_nr==row$frame_nr & - df$variable==row$variable), ] - p = ggplot(df.temp, aes(x=VARIETY, y=value)) + - geom_boxplot() - path.plot <- paste0('./variance_', i, '_', row$TREATMENT, '_', row$GROWTH_STAGE, '_', row$View, '_', row$frame_nr, '_', row$variable, '.png') - png(path.plot) - print(p) - dev.off() +# Load RGB and HSI data in long format +message("Loading data...") +df.rgb <- read.csv(path.rgb.long) +df.hsi <- read.csv(path.hsi.long) +df.rpt <- read.csv(path.rpt) + +# Exclude varieties, as needed +if (length(exclude.variety)>0){ + df.rgb <- df.rgb %>% filter(!VARIETY %in% exclude.variety) + df.hsi <- df.hsi %>% filter(!VARIETY %in% exclude.variety) } +# Combine RGB and HSI data +df <- rbind(df.rgb, df.hsi) +# Simplify attributes under df.rpt$View +df.rpt$View <- replace(df.rpt$View, + df.rpt$View %in% c("SideAll", "SideAverage"), + "Side") +df$View <- replace(df$View, + df$View %in% c("SideAll", "SideAverage"), + "Side") + +# Choose valid repeatability measurements +df.rpt <- df.rpt[!is.na(df.rpt$rpt),] + +for (treatment in unique(df.rpt$TREATMENT)){ + for (growth.stage in unique(df.rpt$GROWTH_STAGE)){ + print(paste0("Generating plots for ", + treatment, ", ", growth.stage, " case...")) + + # Subset data by treatment and growth stage + df.rpt.temp <- df.rpt[which(df.rpt$TREATMENT==treatment & + df.rpt$GROWTH_STAGE==growth.stage), ] + df.rpt.temp <- df.rpt.temp[order(df.rpt.temp$rpt, decreasing=T), ] + + + for (i in seq(1, n)){ + # Gather measurements from a variable with n-th highest repeatability + row <- df.rpt.temp[i,] + df.temp <- df[which(df$TREATMENT==row$TREATMENT & + df$GROWTH_STAGE==row$GROWTH_STAGE & + df$View==row$View & + df$frame_nr==row$frame_nr & + df$variable==row$variable), ] + # Draw box plot + p = ggplot(df.temp, aes(x=VARIETY, y=value)) + + ggtitle(paste0("Repeatability=", round(row$rpt, 3))) + + geom_boxplot() + + ylab(paste("Variable: ", + row$TREATMENT, row$GROWTH_STAGE, + row$View, row$frame_nr, row$variable, + sep='_')) + path.plot <- paste0(paste('./variance', + row$TREATMENT, row$GROWTH_STAGE, + i, + row$View, row$frame_nr, row$variable, + sep='_'), '.png') + # Save plot + png(path.plot) + print(p) + dev.off() + } + } +} +# EOF diff --git a/import_and_reshape.r b/import_and_reshape.r index a86b911..8da0fdc 100644 --- a/import_and_reshape.r +++ b/import_and_reshape.r @@ -93,7 +93,7 @@ path.hsi.long <- ("./df_hsi_long.csv") # 390 20-1 45 2024-03-08 10:30:00 4 #---------------------------------------------------------- df.unwanted.rgb <- data.frame( - EXP_ID=c(374, 374, 374, 374, 396), + EXP_ID =c( 374, 374, 374, 374, 396), TREATMENT=c("20 -1", "50-3", "50-3", "50-4", "20-1"), SCAN_DATETIME_BY15MIN=c("2023-10-30 11:15:00", "2023-09-28 09:15:00", @@ -334,6 +334,7 @@ print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP, SCAN_DATETIME_BY15MIN)) message('\nPlease check [`EXP ID`, TREATMENT, SCAN_DATETIME_BY15MIN]') message('to exclude data from incomplete scan session...') message('Check df.unwanted.rgb in DataExplore.r to exclude such records...\n') +if (!askYesNo('Proceed? (y/n)?')) stop()