-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Sungchan Oh
committed
Jun 29, 2024
1 parent
79f9558
commit 9c10a43
Showing
3 changed files
with
184 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
# DataExplore.r | ||
# Explore AAPF data products and get some insight | ||
# Currently, developed for RGB and HSI masterfiles (.xlsx) | ||
|
||
library(dplyr) | ||
library(readxl) | ||
library(data.table) | ||
|
||
|
||
# File path to masterfiles | ||
paths.rgb <- c("../RGB_Bayer_2.xlsx") | ||
paths.hsi <- c("../HS_Bayer_2.xlsx") | ||
|
||
|
||
|
||
|
||
|
||
# Column names in the generic master files | ||
regex.col.rgb <- paste("Filename", "EXP ID", "POT_BARCODE", "VARIETY", | ||
"TREATMENT", "SCAN_TIME", "SCAN_DATE", "DFP", "View", | ||
"frame_nr", "Width", "Height", "Surface", "Angle", | ||
"Convex_hull", "Roundness", "Center_of_mass_distance", | ||
"Center_of_mass_x", "Center_of_mass_y", | ||
"Hue", "Saturation", "Intensity", "Fluorescence", | ||
"[HSVF][[:digit:]]{1,3}", sep="|") | ||
regex.col.hsi <- paste("EXP ID", "POT_BARCODE", "VARIETY", "TREATMENT", | ||
"SCAN_TIME", "SCAN_DATE", "DFP", | ||
"[[:alnum:]]_+(mean|max|min|std|p[[:digit:]]{1,2})", | ||
"[[:digit:]]{3,4}(\\.[[:digit:]]{1,15})?", | ||
sep="|") | ||
|
||
# Column names to be disregarded | ||
nouse.col.rgb <- c("Filename", "SCAN_TIME", "SCAN_DATE") | ||
nouse.col.hsi <- c("Filename-VNIR-SIDE", "Filename-VNIR-TOP", | ||
"Filename-SWIR-SIDE", "Filename-SWIR-TOP", | ||
"SCAN_TIME", "SCAN_DATE") | ||
|
||
# Column names used as identifier | ||
id.vars <- c("EXP ID", "POT_BARCODE", "VARIETY", | ||
"TREATMENT", "DFP", "View", "frame_nr") | ||
|
||
|
||
|
||
|
||
|
||
# Create empty rgb dataframe | ||
mat <- matrix(ncol=0, nrow=0) | ||
df.rgb <- data.frame(mat) | ||
|
||
for (path.rgb in paths.rgb){ | ||
for (tab.rgb in excel_sheets(path = path.rgb)){ | ||
if (tab.rgb=="PPEW") next | ||
|
||
# Read a rgb worksheet | ||
temp <- read_excel(path.rgb, sheet=tab.rgb) | ||
|
||
# Remove hand-made columns | ||
cols <- grepl(regex.col.rgb, as.character(colnames(temp))) | ||
temp <- temp[cols] | ||
|
||
# Remove unused columns | ||
temp <- temp[,!names(temp) %in% nouse.col.rgb] | ||
|
||
# Simplify values under "View" column | ||
# Side{Bottom|Small|Full|Tall} to Side{Average|All} | ||
if (tolower(tab.rgb) =="side average") temp["View"] <- "SideAverage" | ||
if (tolower(tab.rgb) =="side all") temp["View"] <- "SideAll" | ||
|
||
# Find id columns that does not exist | ||
cols.to.add <-id.vars[!id.vars %in% names(temp)] | ||
|
||
# Assign values in the id columns that didn't exist | ||
for (col.to.add in cols.to.add){ | ||
# TODO | ||
if (col.to.add == "View" && tolower(tab.rgb) =="top"){ | ||
temp["View"] = "Top" | ||
} | ||
if (col.to.add == "frame_nr"){ | ||
temp["frame_nr"] = -1 | ||
} | ||
} | ||
|
||
# Change table into long format | ||
temp.long <- reshape2::melt(temp, id.vars = id.vars, | ||
variable.name = "variable") | ||
|
||
# Combine rows from different worksheets | ||
df.rgb <- rbind(df.rgb, temp.long) | ||
|
||
# Debugging | ||
#print(paste(path.rgb, " ", tab.rgb)) | ||
#print(unique(temp$View)) | ||
#print(unique(temp$frame_nr)) | ||
#print(unique(temp.long$variable)) | ||
} | ||
} | ||
message('Succeeded importing RGB data...') | ||
|
||
|
||
|
||
# Create empty rgb dataframe | ||
mat <- matrix(ncol=0, nrow=0) | ||
df.hsi <- data.frame(mat) | ||
|
||
for (path.hsi in paths.hsi){ | ||
for (tab.hsi in excel_sheets(path = path.hsi)){ | ||
if (tab.hsi=="PPEW") next | ||
|
||
# Read a hsi worksheet | ||
temp <- read_excel(path.hsi, sheet=tab.hsi) | ||
|
||
# Remove erroneous ghost column | ||
temp <- select(temp, -starts_with("...")) | ||
|
||
# Remove hand-made columns | ||
cols <- grepl(regex.col.hsi, as.character(colnames(temp))) | ||
temp <- temp[cols] | ||
|
||
# Remove unused columns | ||
temp <- temp[,!names(temp) %in% nouse.col.hsi] | ||
|
||
# Add "View" column | ||
temp["View"] <- NA | ||
if (grepl("side", tolower(tab.hsi), fixed=T)==T) temp["View"] <- "Side" | ||
if (grepl("top", tolower(tab.hsi), fixed=T)==T) temp["View"] <- "Top" | ||
|
||
# Add "frame_nr" column | ||
temp["frame_nr"] = -1 | ||
|
||
# Change table into long format | ||
temp.long <- reshape2::melt(temp, id.vars = id.vars, | ||
variable.name = "variable") | ||
|
||
# Combine rows from different worksheets | ||
df.hsi <- rbind(df.hsi, temp.long) | ||
|
||
## Debugging | ||
#print(paste(path.hsi, " ", tab.hsi)) | ||
#print(unique(temp$View)) | ||
#print(unique(temp$frame_nr)) | ||
#print(unique(temp.long$variable)) | ||
} | ||
} | ||
message('Succeeded importing HSI data...') | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
# Merge RGB and HSI data | ||
|
||
# For "RGB-SideAll", change column names Frame0-11 to major, major+30... | ||
|
||
# Change DFPs to growth stage | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
# Check input variables with repeatability and ANOVA | ||
# For "RGB-SideAll", visualize importance along the side angle | ||
|
||
# Visualize feature importance | ||
|
||
# RGB-visualization |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters