From bf1801d760a0498099224c80782c485760675f4a Mon Sep 17 00:00:00 2001
From: Sungchan Oh <oh231@login03.negishi.rcac.purdue.edu>
Date: Fri, 5 Jul 2024 18:47:13 -0400
Subject: [PATCH] Calculate repeatability

---
 calculate_repeatability.r | 102 +++++++++++++++++++++++++++
 import_and_reshape.r      | 144 ++++++++++++++------------------------
 2 files changed, 155 insertions(+), 91 deletions(-)
 create mode 100644 calculate_repeatability.r

diff --git a/calculate_repeatability.r b/calculate_repeatability.r
new file mode 100644
index 0000000..41a57cc
--- /dev/null
+++ b/calculate_repeatability.r
@@ -0,0 +1,102 @@
+# calculate_repeatability.r
+
+library(lme4)
+library(heritability)
+
+
+
+# Input long format data
+path.rgb.long <- ("./df_rgb_long.csv")
+path.hsi.long <- ("./df_hsi_long.csv")
+path.rpt      <- ("./rpt.csv")
+
+factor.col <- c('EXP.ID', 
+                'POT_BARCODE',
+                'TREATMENT', 
+                'VARIETY', 
+                'GROWTH_STAGE',
+                'View',
+                'frame_nr')
+
+
+# Load RGB and HSI data in long format
+message("Loading data...")
+df.rgb <- read.csv(path.rgb.long)
+df.hsi <- read.csv(path.hsi.long)
+
+# Combine RGB and HSI data
+df <- rbind(df.rgb, df.hsi)
+
+# Set factor columns
+df[, factor.col] <- lapply(df[, factor.col], factor)
+
+# Create a dataframe to record repeatability
+message("Finding unique conditions to evaluate repeatability...")
+df.rpt <- unique(df[, c('TREATMENT', 
+                        'GROWTH_STAGE', 
+                        'View', 
+                        'frame_nr', 
+                        'variable')])
+df.rpt <- df.rpt[order(df.rpt$TREATMENT, 
+                       df.rpt$GROWTH_STAGE, 
+                       df.rpt$View, 
+                       df.rpt$frame_nr, 
+                       df.rpt$variable), ]
+df.rpt$repeatability <- NA
+df.rpt$gen.variance  <- NA
+df.rpt$res.variance  <- NA
+
+# Calculate repeatability
+message('Calculating repeatability...')
+for(i in 1:nrow(df.rpt)){
+    row <- df.rpt[i,]
+    # Subset data by condition by which repeatability is measured
+    message(paste(row$TREATMENT, 
+                  row$GROWTH_STAGE, 
+                  row$View, 
+                  row$frame_nr, 
+                  row$variable, 
+                  " (", i, "/", nrow(df.rpt), ")" sep=" "))
+    df.temp <- df[which(df$TREATMENT==row$TREATMENT & 
+                        df$GROWTH_STAGE==row$GROWTH_STAGE & 
+                        df$View==row$View & 
+                        df$frame_nr==row$frame_nr & 
+                        df$variable==row$variable), ]
+
+    # Count NA values and continue if excessive
+    num.na <- sum(is.na(df.temp$value))
+    if (num.na/nrow(df.temp) > 0.3) next
+
+    # Select valid data
+    df.temp <- df.temp[!is.na(df.temp$value) & is.finite(df.temp$value), ]
+
+    # Evaluate repeatability (line repeatability=True)
+    rpt <- repeatability(df.temp$value, df.temp$VARIETY, line.repeatability=T,
+                         covariates.frame = data.frame())
+
+    # Save data
+    df.rpt$repeatability[i] <- rpt$repeatability
+    df.rpt$gen.variance[i]  <- rpt$gen.variance
+    df.rpt$res.variance[i]  <- rpt$res.variance
+}
+
+
+
+
+# Export repeatability table
+message('\nExporting repeatability measurements...')
+write.csv(df.rpt, path.rpt, row.names=F)
+
+
+
+
+# factor(EXP.ID, frame_nr, VARIETY, TREATMENT)
+# value ~ 1|EXP.ID + VARIETY + TREATMENT + GROWTH_STAGE
+# value ~ 1|EXP.ID + VARIETY + TREATMENT                Seth
+# value ~ 1|EXP.ID + VARIETY                            Current
+
+
+
+
+
+# EOF
diff --git a/import_and_reshape.r b/import_and_reshape.r
index a8b5fe7..a86b911 100644
--- a/import_and_reshape.r
+++ b/import_and_reshape.r
@@ -1,7 +1,12 @@
-# DataExplore.r 
-# Explore AAPF data products and get some insight
-# Currently, developed for RGB and HSI masterfiles (.xlsx)
+# import_and_reshape.r
+# 
+# This R script import RGB and HSI masterfiles (tables)
+# produced from Ag Alumni Seed Phenotyping Facility (AAPF), Purdue,
+# then convert the data into long format compatible for 
+# data analysis using various R libraries and functions
+
 
+# Import libraries
 library(dplyr)
 library(readxl)
 library(data.table)
@@ -34,6 +39,7 @@ c("/depot/smarterag/data/HSI/Master files/Hyperspectral_data_AAPF_experiment_374
 "/depot/smarterag/data/HSI/Master files/Hyperspectral_data_AAPF_experiment_396.xlsx",
 "/depot/smarterag/data/HSI/Master files/Hyperspectral_data_AAPF_experiment_401.xlsx")
 
+# Output filenames for converted long tables
 path.rgb.long <- ("./df_rgb_long.csv")
 path.hsi.long <- ("./df_hsi_long.csv")
 
@@ -228,30 +234,55 @@ col.factor <- c("EXP ID", "VARIETY", "TREATMENT",
 
 
 
-# Create empty rgb dataframe
+# Create empty RGB dataframe
 mat <- matrix(ncol=0, nrow=0) 
 df.rgb <- data.frame(mat) 
 
-# Import rgb data
+# Import RGB data
 for (path.rgb in paths.rgb){
     for (tab.rgb in excel_sheets(path = path.rgb)){
         if (tab.rgb=="PPEW") next
         if (tab.rgb=="Information") next
-
         # Read a rgb worksheet
         temp <- read_excel(path.rgb, sheet=tab.rgb)
-
         # Remove hand-made columns
         cols <- grepl(regex.col.rgb, as.character(colnames(temp)))
         temp <- temp[cols]
-
         # Remove unused columns
         temp <- temp[,!names(temp) %in% nouse.col.rgb]
-
         # Simplify values under "View" column
         # Side{Bottom|Small|Full|Tall} to Side{Average|All}
-        if (tolower(tab.rgb) =="side average") temp["View"] <- "SideAverage"
-        if (tolower(tab.rgb) =="side all")     temp["View"] <- "SideAll"
+        if (tolower(tab.rgb) == "side average") temp["View"] <- "SideAverage"
+        if (tolower(tab.rgb) == "side all")     temp["View"] <- "SideAll"
+        # Re-index frame_nr with the largest plant volume in the side view
+        #
+        # Replace values under frame_nr (0, 1, 2, ..., 11)
+        # starting from the default pot orientation
+        # to new indexes (also 0, 1, 2, ..., 11) 
+        # starting from the direction with the largest plant volume from side
+        #print(temp$frame_nr)
+        list.sideall <- unique(temp[, c('POT_BARCODE', 'SCAN_DATE', 'SCAN_TIME')])
+        if (tolower(tab.rgb) == "side all"){
+            for(i in 1:nrow(list.sideall)){
+                row <- list.sideall[i,]
+
+                # Get plant surface area from side view
+                where <- vector(length=12)
+                for (f in seq(1,12)){
+                    where[f] <- which(temp$POT_BARCODE==row$POT_BARCODE &
+                                      temp$SCAN_DATE==row$SCAN_DATE &
+                                      temp$SCAN_TIME==row$SCAN_TIME &
+                                      temp$frame_nr==(f-1))
+                }
+                temp.surface <- temp[where, "Surface"]
+                # Find direction with largest plant surface area
+                max.f <- which.max(temp.surface$Surface)
+                # Update frame_nr (0: direction with largest plant surface)
+                for (f in seq(1,12)){
+                    temp[where[f], "frame_nr"] <- ((f-1)-max.f)  %% 12 
+                }
+            }
+        }
 
         # Find id columns that does not exist 
         cols.to.add <-id.vars[!id.vars %in% names(temp)]
@@ -284,17 +315,13 @@ message('Succeeded importing RGB data...')
 
 
 
-
-
-# Overview 1: Show summary of rgb data
+# Overview 1: Show summary of RGB data
 df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ]
 message('\nRGB Data Acquisiton Summary (Overview 1)\n')
 print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP))
 
 
 
-
-
 # Overview 2: Round down scan date and time down to nearest 15 minute and display
 format <- "%Y-%m-%d %H:%M:%S"
 df.rgb.perscan["SCAN_DATETIME"] = 
@@ -310,9 +337,7 @@ message('Check df.unwanted.rgb in DataExplore.r to exclude such records...\n')
 
 
 
-
-
-# Overview 3: Drop unwanted rgb data
+# Overview 3: Drop unwanted RGB data
 df.rgb["SCAN_DATETIME"] = 
     as.POSIXct(paste(as.Date(df.rgb$SCAN_DATE), df.rgb$SCAN_TIME), format=format)
 df.rgb["SCAN_DATETIME_BY15MIN"] = 
@@ -329,8 +354,6 @@ print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP))
 
 
 
-
-
 # Overview 4: Convert treatment attributes
 for(i in 1:nrow(df.convert.treatment)){
     row <- df.convert.treatment[i,]
@@ -342,8 +365,6 @@ print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP))
 
 
 
-
-
 # Overview 5: DFP is always smaller than 365 (to prevent case with wrong year input)
 df.rgb$DFP = df.rgb$DFP %% 365
 df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ]
@@ -352,8 +373,6 @@ print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP))
 
 
 
-
-
 # Overview 6: Add growth stage attribute
 df.rgb$GROWTH_STAGE <- NA
 for(i in 1:nrow(df.growth.stage)){
@@ -368,17 +387,13 @@ print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP, GROWTH_STAGE))
 
 
 
-
-
 # Drop unused columns
 df.rgb <- df.rgb[ , !(names(df.rgb) %in% drops.col.rgb)]
-
 # Change column order
 df.rgb <- df.rgb[, rgb.col.order]
 df.rgb[col.factor] <- lapply(df.rgb[col.factor], factor)
-
-# Export rgb dataframe
-message('\nExporting rgb dataframe (long format)...')
+# Export RGB dataframe
+message('\nExporting RGB dataframe (long format)...')
 write.csv(df.rgb, path.rgb.long, row.names=F)
 
 
@@ -390,44 +405,35 @@ write.csv(df.rgb, path.rgb.long, row.names=F)
 
 
 
-# Create empty hsi dataframe
+# Create empty HSI dataframe
 mat <- matrix(ncol=0, nrow=0) 
 df.hsi <- data.frame(mat) 
 
-# Import hsi data
+# Import HSI data
 for (path.hsi in paths.hsi){
     for (tab.hsi in excel_sheets(path = path.hsi)){
         if (tab.hsi=="PPEW") next
         if (tab.hsi=="Information") next
-
-        # Read a hsi worksheet
+        # Read a HSI worksheet
         temp <- read_excel(path.hsi, sheet=tab.hsi)
-
         # Remove erroneous ghost column
         temp <- select(temp, -starts_with("..."))
-
         # Remove hand-made columns
         cols <- grepl(regex.col.hsi, as.character(colnames(temp)))
         temp <- temp[cols]
-
         # Remove unused columns
         temp <- temp[,!names(temp) %in% nouse.col.hsi]
-
         # Add "View" column
         temp["View"] <- NA
         if (grepl("side", tolower(tab.hsi), fixed=T)==T) temp["View"] <- "Side"
         if (grepl("top",  tolower(tab.hsi), fixed=T)==T) temp["View"] <- "Top"
-
         # Add "frame_nr" column
         temp["frame_nr"] = -1
-
         # Change table into long format
         temp.long <- reshape2::melt(temp, id.vars = id.vars, 
                                     variable.name = "variable")
-
         # Combine rows from different worksheets
         df.hsi <- rbind(df.hsi, temp.long)
-
         ## Debugging
         #print(paste(path.hsi, " ", tab.hsi))
         #print(unique(temp$View))
@@ -446,8 +452,6 @@ print(count(df.hsi.perscan, `EXP ID`, TREATMENT, DFP))
 
 
 
-
-
 # Overview 2: Round down scan date and time down to nearest 15 minute and display
 format <- "%Y-%m-%d %H:%M:%S"
 df.hsi.perscan["SCAN_DATETIME"] = 
@@ -463,9 +467,7 @@ message('Check df.unwanted.hsi in DataExplore.r to exclude such records...\n')
 
 
 
-
-
-# Overview 3: Drop unwanted hsi data
+# Overview 3: Drop unwanted HSI data
 df.hsi["SCAN_DATETIME"] = 
     as.POSIXct(paste(as.Date(df.hsi$SCAN_DATE), df.hsi$SCAN_TIME), format=format)
 df.hsi["SCAN_DATETIME_BY15MIN"] = 
@@ -482,8 +484,6 @@ print(count(df.hsi.perscan, `EXP ID`, TREATMENT, DFP))
 
 
 
-
-
 # Overview 4: Convert treatment attributes
 for(i in 1:nrow(df.convert.treatment)){
     row <- df.convert.treatment[i,]
@@ -495,8 +495,6 @@ print(count(df.hsi.perscan, `EXP ID`, TREATMENT, DFP))
 
 
 
-
-
 # Overview 5: DFP is always smaller than 365 (to prevent case with wrong year input)
 df.hsi$DFP = df.hsi$DFP %% 365
 df.hsi.perscan <- df.hsi[which(df.hsi$View=="Top" & df.hsi$variable=="NDVI_mean"), ]
@@ -505,8 +503,6 @@ print(count(df.hsi.perscan, `EXP ID`, TREATMENT, DFP))
 
 
 
-
-
 # Overview 6: Add growth stage attribute
 df.hsi$GROWTH_STAGE <- NA
 for(i in 1:nrow(df.growth.stage)){
@@ -521,49 +517,15 @@ print(count(df.hsi.perscan, `EXP ID`, TREATMENT, DFP, GROWTH_STAGE))
 
 
 
-
-
 # Drop unused columns
 df.hsi <- df.hsi[ , !(names(df.hsi) %in% drops.col.hsi)]
-
 # Change column order
 df.hsi <- df.hsi[, hsi.col.order]
 df.hsi[col.factor] <- lapply(df.hsi[col.factor], factor)
-
-# Export hsi dataframe
-message('\nExporting hsi dataframe (long format)...')
+# Export HSI dataframe
+message('\nExporting HSI dataframe (long format)...')
 write.csv(df.hsi, path.hsi.long, row.names=F)
 
 
 
-
-
-
-
-
-# Combine rgb and hsi data using df <- rbind(df.rgb, df.hsi)
-
-
-
-# frame_nr 0-11 (normal) , 0-23 for one scan
-# For "RGB-SideAll", change column names Frame0-11 to major, major+30...
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-# Check input variables with repeatability and ANOVA
-# For "RGB-SideAll", visualize importance along the side angle
-
-# Visualize feature importance
-
-# RGB-visualization
+# EOF