Skip to content

Commit

Permalink
Load masterfile
Browse files Browse the repository at this point in the history
  • Loading branch information
Sungchan Oh committed Jun 29, 2024
1 parent 79f9558 commit 9c10a43
Show file tree
Hide file tree
Showing 3 changed files with 184 additions and 52 deletions.
172 changes: 172 additions & 0 deletions DataExplore.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
# DataExplore.r
# Explore AAPF data products and get some insight
# Currently, developed for RGB and HSI masterfiles (.xlsx)

library(dplyr)
library(readxl)
library(data.table)


# File path to masterfiles
paths.rgb <- c("../RGB_Bayer_2.xlsx")
paths.hsi <- c("../HS_Bayer_2.xlsx")





# Column names in the generic master files
regex.col.rgb <- paste("Filename", "EXP ID", "POT_BARCODE", "VARIETY",
"TREATMENT", "SCAN_TIME", "SCAN_DATE", "DFP", "View",
"frame_nr", "Width", "Height", "Surface", "Angle",
"Convex_hull", "Roundness", "Center_of_mass_distance",
"Center_of_mass_x", "Center_of_mass_y",
"Hue", "Saturation", "Intensity", "Fluorescence",
"[HSVF][[:digit:]]{1,3}", sep="|")
regex.col.hsi <- paste("EXP ID", "POT_BARCODE", "VARIETY", "TREATMENT",
"SCAN_TIME", "SCAN_DATE", "DFP",
"[[:alnum:]]_+(mean|max|min|std|p[[:digit:]]{1,2})",
"[[:digit:]]{3,4}(\\.[[:digit:]]{1,15})?",
sep="|")

# Column names to be disregarded
nouse.col.rgb <- c("Filename", "SCAN_TIME", "SCAN_DATE")
nouse.col.hsi <- c("Filename-VNIR-SIDE", "Filename-VNIR-TOP",
"Filename-SWIR-SIDE", "Filename-SWIR-TOP",
"SCAN_TIME", "SCAN_DATE")

# Column names used as identifier
id.vars <- c("EXP ID", "POT_BARCODE", "VARIETY",
"TREATMENT", "DFP", "View", "frame_nr")





# Create empty rgb dataframe
mat <- matrix(ncol=0, nrow=0)
df.rgb <- data.frame(mat)

for (path.rgb in paths.rgb){
for (tab.rgb in excel_sheets(path = path.rgb)){
if (tab.rgb=="PPEW") next

# Read a rgb worksheet
temp <- read_excel(path.rgb, sheet=tab.rgb)

# Remove hand-made columns
cols <- grepl(regex.col.rgb, as.character(colnames(temp)))
temp <- temp[cols]

# Remove unused columns
temp <- temp[,!names(temp) %in% nouse.col.rgb]

# Simplify values under "View" column
# Side{Bottom|Small|Full|Tall} to Side{Average|All}
if (tolower(tab.rgb) =="side average") temp["View"] <- "SideAverage"
if (tolower(tab.rgb) =="side all") temp["View"] <- "SideAll"

# Find id columns that does not exist
cols.to.add <-id.vars[!id.vars %in% names(temp)]

# Assign values in the id columns that didn't exist
for (col.to.add in cols.to.add){
# TODO
if (col.to.add == "View" && tolower(tab.rgb) =="top"){
temp["View"] = "Top"
}
if (col.to.add == "frame_nr"){
temp["frame_nr"] = -1
}
}

# Change table into long format
temp.long <- reshape2::melt(temp, id.vars = id.vars,
variable.name = "variable")

# Combine rows from different worksheets
df.rgb <- rbind(df.rgb, temp.long)

# Debugging
#print(paste(path.rgb, " ", tab.rgb))
#print(unique(temp$View))
#print(unique(temp$frame_nr))
#print(unique(temp.long$variable))
}
}
message('Succeeded importing RGB data...')



# Create empty rgb dataframe
mat <- matrix(ncol=0, nrow=0)
df.hsi <- data.frame(mat)

for (path.hsi in paths.hsi){
for (tab.hsi in excel_sheets(path = path.hsi)){
if (tab.hsi=="PPEW") next

# Read a hsi worksheet
temp <- read_excel(path.hsi, sheet=tab.hsi)

# Remove erroneous ghost column
temp <- select(temp, -starts_with("..."))

# Remove hand-made columns
cols <- grepl(regex.col.hsi, as.character(colnames(temp)))
temp <- temp[cols]

# Remove unused columns
temp <- temp[,!names(temp) %in% nouse.col.hsi]

# Add "View" column
temp["View"] <- NA
if (grepl("side", tolower(tab.hsi), fixed=T)==T) temp["View"] <- "Side"
if (grepl("top", tolower(tab.hsi), fixed=T)==T) temp["View"] <- "Top"

# Add "frame_nr" column
temp["frame_nr"] = -1

# Change table into long format
temp.long <- reshape2::melt(temp, id.vars = id.vars,
variable.name = "variable")

# Combine rows from different worksheets
df.hsi <- rbind(df.hsi, temp.long)

## Debugging
#print(paste(path.hsi, " ", tab.hsi))
#print(unique(temp$View))
#print(unique(temp$frame_nr))
#print(unique(temp.long$variable))
}
}
message('Succeeded importing HSI data...')










# Merge RGB and HSI data

# For "RGB-SideAll", change column names Frame0-11 to major, major+30...

# Change DFPs to growth stage







# Check input variables with repeatability and ANOVA
# For "RGB-SideAll", visualize importance along the side angle

# Visualize feature importance

# RGB-visualization
15 changes: 12 additions & 3 deletions DataGuide_AAPF_RGB.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,17 +112,26 @@ Both the master data file (.xlsx) and individual measurement files (.csv) share

| Column Head | Description |
|----------------------------------------|-------------------------------------------------------------------------------------------------|
| `frame_nr` | Frame number from 0-11 for side view data |
| `Filename` | Input spreadsheet filename (.csv) |
| `EXP ID` | Experiment number within AAPF |
| `POT_BARCODE` | Unique identifier for the plant pot |
| `VARIETY` | Variety assigned in PPEW |
| `TREATMENT` | Treatment applied to plant |
| `SCAN_TIME` | Scan start time |
| `SCAN_DATE` | Scan start date |
| `DFP` | Age of plant in days from planting at time of imaging |
| `Angle` | Side view angle where the vegetation is most dispersed |
| `View` | TOP FRAME: Ignore this row<br>TOP AVG: Data from the top view<br>Side{Bottom, Small, Top, Full} FRAME 0 to 11: Data from various side view angles (0, 30, ..., 330 degrees)<br>Side{Bottom, Small, Top, Full} AVG: Average of Side view FRAME data |
| `frame_nr` | Frame number from 0-11 for side view data |
| `Width`, `Height` | Dimensions of the smallest enclosing rectangle of the vegetation segments |
| `Surface` | Area of the vegetation within the pixel-wise boundary of vegetation segments |
| `Convex hull` | Area of the convex hull measured by the smallest enclosing polygon of the vegetation segments |
| `Roundedness` | Roundness of the pixel-wise boundary of vegetation segments (❗ calculation method unclear) |
| `Roundness` | Roundness of the pixel-wise boundary of vegetation segments (❗ calculation method unclear) |
| `Center_of_mass_distance` | ❗ Unclear meaning |
| `Center_of_mass_x`, `Center_of_mass_y` | Coordinates of the center of mass point |
| `Hue, Saturation`, `Intensity` | Average values for these color properties in the image |
| `Fluorescence` | Average fluorescence within the pixel-wise boundary of vegetation segments |
| `H###`, `S##`, `V##` | Frequency of pixels with specific hue (###: 0-359), saturation (##: 0-99), and value (##: 0-99)<br>Check the definitions of hue, saturation, and value in [this link](https://changingminds.org/explanations/perception/visual/hsl.htm) |
| `F##` | Frequency of pixels with a specific fluorescence value (##: 0-99) |
| `View` | TOP FRAME: Ignore this row<br>TOP AVG: Data from the top view<br>Side{Bottom, Small, Top, Full} FRAME 0 to 11: Data from various side view angles (0, 30, ..., 330 degrees)<br>Side{Bottom, Small, Top, Full} AVG: Average of Side view FRAME data |


49 changes: 0 additions & 49 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,52 +3,3 @@ Exploratory data analysis for AAPF dataset (RGB, HSI, XRAY)



# Change xlsx files to csv files

# Load csv files
# Merge DFPs? range->text
# Replace DFPs with growth stage if available


# RGB, HSI
# Check input variables with repeatability and ANOVA



# RGB-visualization



# Column header
# frame_nr: frame number
# Angle: side view angle when vegetation is mostly dispersed along the plane
# Width: width measured by green box in MES file (see 2.1)
# Height: height measured by green box in MES file (see 2.1)
# Surface: vegetation area measured by red polygon (see 2.3)
# Convex hull: convex hull area measured by blue polygon (see 2.2)
# Roundedness: roundedness measured by red polygon (unsure, see 2.3)
# Center_of_mass_distance: unclear
# Center_of_mass_x: x coordinate of center of mass point (see 2.4)
# Center_of_mass_y: y coordinate of center of mass point (see 2.4)
# Hue: average hue value
# Saturation: average saturation value
# Intensity: average intensity value
# Fluorescene: average flurescence value
# H##: frequency of pixels with a hue value of ## (##: 0-359)
# S##: frequency of pixels with a saturation value of ## (##: 0-99)
# V##: frequency of pixels with a intensity value of ## (##: 0-99)
# F##: frequency of pixels with a flurescence value of ## (##: 0-99)
#
# Row index (view)
# TOP FRAME: Ignore this row
# TOP AVG: Top view
# SideBottom FRAME 0 to 11: Side view data from various angles (0, 30, ..., 330 degree)
# SideBottom AVG: average of SideBottom FRAME data








0 comments on commit 9c10a43

Please sign in to comment.