diff --git a/DataExplore.r b/DataExplore.r index 01cfb98..a8b5fe7 100644 --- a/DataExplore.r +++ b/DataExplore.r @@ -35,6 +35,7 @@ c("/depot/smarterag/data/HSI/Master files/Hyperspectral_data_AAPF_experiment_374 "/depot/smarterag/data/HSI/Master files/Hyperspectral_data_AAPF_experiment_401.xlsx") path.rgb.long <- ("./df_rgb_long.csv") +path.hsi.long <- ("./df_hsi_long.csv") @@ -49,14 +50,15 @@ path.rgb.long <- ("./df_rgb_long.csv") # (e.g., a handful of measurements in an enough time period). # 3. Review this list and decide which measurements you want # to exclude from your analysis. -# 4. Look for a variable named df.unwanted below. +# 4. Look for a variable named df.unwanted.{rgb|hsi} below. # This variable is a list where you can specify the unwanted # measurements you identified in step 3. -# 5. Fill in the list under df.unwanted with the attributes of the -# measurements you want to exclude. The number of items in the list will -# depend on your specific experiment and the output from DataExplore.r. +# 5. Fill in the list under df.unwanted.{rgb|hsi} with the attributes +# of the measurements you want to exclude. The number of items in +# the list will depend on your specific experiment and the output +# from DataExplore.r. # -# Unwanted data example: +# Unwanted data example (RGB): #---------------------------------------------------------- # EXP ID TREATMENT DFP SCAN_DATETIME_BY15MIN NUM_SCANS #---------------------------------------------------------- @@ -77,7 +79,14 @@ path.rgb.long <- ("./df_rgb_long.csv") # 5 # 396 20-1 55 2024-04-29 12:00:00 1 #---------------------------------------------------------- -df.unwanted <- data.frame( +# Unwanted data example(HSI): +#---------------------------------------------------------- +# EXP ID TREATMENT DFP SCAN_DATETIME_BY30MIN NUM_SCANS +#---------------------------------------------------------- +# 1 +# 390 20-1 45 2024-03-08 10:30:00 4 +#---------------------------------------------------------- +df.unwanted.rgb <- data.frame( EXP_ID=c(374, 374, 374, 374, 396), TREATMENT=c("20 -1", "50-3", "50-3", "50-4", "20-1"), SCAN_DATETIME_BY15MIN=c("2023-10-30 11:15:00", @@ -86,6 +95,12 @@ df.unwanted <- data.frame( "2023-09-28 09:45:00", "2024-04-29 12:00:00"), stringsAsFactors=FALSE) +df.unwanted.hsi <- data.frame( + EXP_ID=c(390), + TREATMENT=c("20-1"), + SCAN_DATETIME_BY30MIN=c("2024-03-08 10:30:00"), + stringsAsFactors=FALSE) + # List descriptive treatment info @@ -189,11 +204,16 @@ id.vars <- c("EXP ID", "POT_BARCODE", "SCAN_TIME", "SCAN_DATE", # Columns not exported drops.col.rgb <- c("SCAN_TIME", "SCAN_DATE", "SCAN_DATETIME", "SCAN_DATETIME_BY15MIN") +drops.col.hsi <- c("SCAN_TIME", "SCAN_DATE", + "SCAN_DATETIME", "SCAN_DATETIME_BY30MIN") # Order of columns for exported dataframe rgb.col.order <- c("EXP ID", "POT_BARCODE", "TREATMENT", "VARIETY", "DFP", "GROWTH_STAGE", "View", "frame_nr", "variable", "value") +hsi.col.order <- c("EXP ID", "POT_BARCODE", "TREATMENT", "VARIETY", + "DFP", "GROWTH_STAGE", "View", "frame_nr", + "variable", "value") # Columns used as factors col.factor <- c("EXP ID", "VARIETY", "TREATMENT", @@ -202,6 +222,12 @@ col.factor <- c("EXP ID", "VARIETY", "TREATMENT", + + + + + + # Create empty rgb dataframe mat <- matrix(ncol=0, nrow=0) df.rgb <- data.frame(mat) @@ -260,6 +286,110 @@ message('Succeeded importing RGB data...') +# Overview 1: Show summary of rgb data +df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ] +message('\nRGB Data Acquisiton Summary (Overview 1)\n') +print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP)) + + + + + +# Overview 2: Round down scan date and time down to nearest 15 minute and display +format <- "%Y-%m-%d %H:%M:%S" +df.rgb.perscan["SCAN_DATETIME"] = + as.POSIXct(paste(as.Date(df.rgb.perscan$SCAN_DATE), df.rgb.perscan$SCAN_TIME), + format=format) +df.rgb.perscan["SCAN_DATETIME_BY15MIN"] = + lubridate::round_date(df.rgb.perscan$SCAN_DATETIME, "15 minutes") +message('\nRGB Data Acquisiton Summary by Date and Time (Overview 2)\n') +print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP, SCAN_DATETIME_BY15MIN)) +message('\nPlease check [`EXP ID`, TREATMENT, SCAN_DATETIME_BY15MIN]') +message('to exclude data from incomplete scan session...') +message('Check df.unwanted.rgb in DataExplore.r to exclude such records...\n') + + + + + +# Overview 3: Drop unwanted rgb data +df.rgb["SCAN_DATETIME"] = + as.POSIXct(paste(as.Date(df.rgb$SCAN_DATE), df.rgb$SCAN_TIME), format=format) +df.rgb["SCAN_DATETIME_BY15MIN"] = + lubridate::round_date(df.rgb$SCAN_DATETIME, "15 minutes") +for(i in 1:nrow(df.unwanted.rgb)){ + row <- df.unwanted.rgb[i,] + df.rgb <- df.rgb %>% filter(!(`EXP ID` == row$EXP_ID & + TREATMENT == row$TREATMENT & + SCAN_DATETIME_BY15MIN == row$SCAN_DATETIME_BY15MIN)) +} +df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ] +message('\nRGB Data Acquisiton Summary (Unwanted Data Filtered, Overview 3)\n') +print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP)) + + + + + +# Overview 4: Convert treatment attributes +for(i in 1:nrow(df.convert.treatment)){ + row <- df.convert.treatment[i,] + df.rgb$TREATMENT[df.rgb$TREATMENT==row$from] <- row$to +} +df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ] +message('\nRGB Data Acquisiton Summary (Treatment Updated, Overview 4)\n') +print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP)) + + + + + +# Overview 5: DFP is always smaller than 365 (to prevent case with wrong year input) +df.rgb$DFP = df.rgb$DFP %% 365 +df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ] +message('\nRGB Data Acquisiton Summary (DFP Always Less Than 365, Overview 5)\n') +print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP)) + + + + + +# Overview 6: Add growth stage attribute +df.rgb$GROWTH_STAGE <- NA +for(i in 1:nrow(df.growth.stage)){ + row <- df.growth.stage[i,] + df.rgb$GROWTH_STAGE[(df.rgb$`EXP ID` == row$EXP_ID) & + (df.rgb$TREATMENT == row$TREATMENT) & + (df.rgb$DFP == row$DFP)] <- row$GROWTH_STAGE +} +df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ] +message('\nRGB Data Acquisiton Summary (Growth Stage Added, Overview 6)\n') +print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP, GROWTH_STAGE)) + + + + + +# Drop unused columns +df.rgb <- df.rgb[ , !(names(df.rgb) %in% drops.col.rgb)] + +# Change column order +df.rgb <- df.rgb[, rgb.col.order] +df.rgb[col.factor] <- lapply(df.rgb[col.factor], factor) + +# Export rgb dataframe +message('\nExporting rgb dataframe (long format)...') +write.csv(df.rgb, path.rgb.long, row.names=F) + + + + + + + + + + # Create empty hsi dataframe mat <- matrix(ncol=0, nrow=0) df.hsi <- data.frame(mat) @@ -309,105 +439,108 @@ message('Succeeded importing HSI data...') +# Overview 1: Show summary of rgb data +df.hsi.perscan <- df.hsi[which(df.hsi$View=="Top" & df.hsi$variable=="NDVI_mean"), ] +message('\nHSI Data Acquisiton Summary (Overview 1)\n') +print(count(df.hsi.perscan, `EXP ID`, TREATMENT, DFP)) -# Get unique rgb measurement -df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ] -# Show summary of rgb data -message('\nRGB Data Acquisiton Summary (Overview 1)\n') -print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP)) -# Round down scan date and time down to nearest 15 minute and display +# Overview 2: Round down scan date and time down to nearest 15 minute and display format <- "%Y-%m-%d %H:%M:%S" -df.rgb.perscan["SCAN_DATETIME"] = - as.POSIXct(paste(as.Date(df.rgb.perscan$SCAN_DATE), df.rgb.perscan$SCAN_TIME), +df.hsi.perscan["SCAN_DATETIME"] = + as.POSIXct(paste(as.Date(df.hsi.perscan$SCAN_DATE), df.hsi.perscan$SCAN_TIME), format=format) -df.rgb.perscan["SCAN_DATETIME_BY15MIN"] = - lubridate::round_date(df.rgb.perscan$SCAN_DATETIME, "15 minutes") -message('\nRGB Data Acquisiton Summary by Date and Time (Overview 2)\n') -print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP, SCAN_DATETIME_BY15MIN)) -message('\nPlease check [`EXP ID`, TREATMENT, SCAN_DATETIME_BY15MIN]') +df.hsi.perscan["SCAN_DATETIME_BY30MIN"] = + lubridate::round_date(df.hsi.perscan$SCAN_DATETIME, "30 minutes") +message('\nHSI Data Acquisiton Summary by Date and Time (Overview 2)\n') +print(count(df.hsi.perscan, `EXP ID`, TREATMENT, DFP, SCAN_DATETIME_BY30MIN)) +message('\nPlease check [`EXP ID`, TREATMENT, SCAN_DATETIME_BY30MIN]') message('to exclude data from incomplete scan session...') -message('Check df.unwanted in DataExplore.r to exclude such records...\n') +message('Check df.unwanted.hsi in DataExplore.r to exclude such records...\n') -# Drop unwanted rgb data -df.rgb["SCAN_DATETIME"] = - as.POSIXct(paste(as.Date(df.rgb$SCAN_DATE), df.rgb$SCAN_TIME), format=format) -df.rgb["SCAN_DATETIME_BY15MIN"] = - lubridate::round_date(df.rgb$SCAN_DATETIME, "15 minutes") -for(i in 1:nrow(df.unwanted)){ - row <- df.unwanted[i,] - df.rgb <- df.rgb %>% filter(!(`EXP ID` == row$EXP_ID & + +# Overview 3: Drop unwanted hsi data +df.hsi["SCAN_DATETIME"] = + as.POSIXct(paste(as.Date(df.hsi$SCAN_DATE), df.hsi$SCAN_TIME), format=format) +df.hsi["SCAN_DATETIME_BY15MIN"] = + lubridate::round_date(df.hsi$SCAN_DATETIME, "30 minutes") +for(i in 1:nrow(df.unwanted.hsi)){ + row <- df.unwanted.hsi[i,] + df.hsi <- df.hsi %>% filter(!(`EXP ID` == row$EXP_ID & TREATMENT == row$TREATMENT & - SCAN_DATETIME_BY15MIN == row$SCAN_DATETIME_BY15MIN)) + SCAN_DATETIME_BY15MIN == row$SCAN_DATETIME_BY30MIN)) } -df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ] -message('\nRGB Data Acquisiton Summary (Unwanted Data Filtered, Overview 3)\n') -print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP)) +df.hsi.perscan <- df.hsi[which(df.hsi$View=="Top" & df.hsi$variable=="NDVI_mean"), ] +message('\nHSI Data Acquisiton Summary (Unwanted Data Filtered, Overview 3)\n') +print(count(df.hsi.perscan, `EXP ID`, TREATMENT, DFP)) -# Convert treatment attributes +# Overview 4: Convert treatment attributes for(i in 1:nrow(df.convert.treatment)){ row <- df.convert.treatment[i,] - df.rgb$TREATMENT[df.rgb$TREATMENT==row$from] <- row$to + df.hsi$TREATMENT[df.hsi$TREATMENT==row$from] <- row$to } -df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ] -message('\nRGB Data Acquisiton Summary (Treatment Updated, Overview 4)\n') -print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP)) +df.hsi.perscan <- df.hsi[which(df.hsi$View=="Top" & df.hsi$variable=="NDVI_mean"), ] +message('\nHSI Data Acquisiton Summary (Treatment Updated, Overview 4)\n') +print(count(df.hsi.perscan, `EXP ID`, TREATMENT, DFP)) -# DFP is always smaller than 365 (to prevent case with wrong year input) -df.rgb$DFP = df.rgb$DFP %% 365 -df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ] -message('\nRGB Data Acquisiton Summary (DFP Always Less Than 365, Overview 5)\n') -print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP)) +# Overview 5: DFP is always smaller than 365 (to prevent case with wrong year input) +df.hsi$DFP = df.hsi$DFP %% 365 +df.hsi.perscan <- df.hsi[which(df.hsi$View=="Top" & df.hsi$variable=="NDVI_mean"), ] +message('\nHSI Data Acquisiton Summary (DFP Always Less Than 365, Overview 5)\n') +print(count(df.hsi.perscan, `EXP ID`, TREATMENT, DFP)) -# Add growth stage attribute -df.rgb$GROWTH_STAGE <- NA +# Overview 6: Add growth stage attribute +df.hsi$GROWTH_STAGE <- NA for(i in 1:nrow(df.growth.stage)){ row <- df.growth.stage[i,] - df.rgb$GROWTH_STAGE[(df.rgb$`EXP ID` == row$EXP_ID) & - (df.rgb$TREATMENT == row$TREATMENT) & - (df.rgb$DFP == row$DFP)] <- row$GROWTH_STAGE + df.hsi$GROWTH_STAGE[(df.hsi$`EXP ID` == row$EXP_ID) & + (df.hsi$TREATMENT == row$TREATMENT) & + (df.hsi$DFP == row$DFP)] <- row$GROWTH_STAGE } -df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ] -message('\nRGB Data Acquisiton Summary (Growth Stage Added, Overview 6)\n') -print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP, GROWTH_STAGE)) +df.hsi.perscan <- df.hsi[which(df.hsi$View=="Top" & df.hsi$variable=="NDVI_mean"), ] +message('\nHSI Data Acquisiton Summary (Growth Stage Added, Overview 6)\n') +print(count(df.hsi.perscan, `EXP ID`, TREATMENT, DFP, GROWTH_STAGE)) # Drop unused columns -df.rgb <- df.rgb[ , !(names(df.rgb) %in% drops.col.rgb)] +df.hsi <- df.hsi[ , !(names(df.hsi) %in% drops.col.hsi)] # Change column order -df.rgb <- df.rgb[, rgb.col.order] -df.rgb[col.factor] <- lapply(df.rgb[col.factor], factor) +df.hsi <- df.hsi[, hsi.col.order] +df.hsi[col.factor] <- lapply(df.hsi[col.factor], factor) + +# Export hsi dataframe +message('\nExporting hsi dataframe (long format)...') +write.csv(df.hsi, path.hsi.long, row.names=F) + + + -# Export dataframe -message('\nExporting rgb dataframe (long format)...') -write.csv(df.rgb, path.rgb.long, row.names=F) -# Repeat for df.hsi # Combine rgb and hsi data using df <- rbind(df.rgb, df.hsi)