Skip to content

Commit

Permalink
Load HSI data and convert to long format
Browse files Browse the repository at this point in the history
  • Loading branch information
Sungchan Oh committed Jul 4, 2024
1 parent 9d8681b commit 2e399bf
Showing 1 changed file with 190 additions and 57 deletions.
247 changes: 190 additions & 57 deletions DataExplore.r
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ c("/depot/smarterag/data/HSI/Master files/Hyperspectral_data_AAPF_experiment_374
"/depot/smarterag/data/HSI/Master files/Hyperspectral_data_AAPF_experiment_401.xlsx")

path.rgb.long <- ("./df_rgb_long.csv")
path.hsi.long <- ("./df_hsi_long.csv")



Expand All @@ -49,14 +50,15 @@ path.rgb.long <- ("./df_rgb_long.csv")
# (e.g., a handful of measurements in an enough time period).
# 3. Review this list and decide which measurements you want
# to exclude from your analysis.
# 4. Look for a variable named df.unwanted below.
# 4. Look for a variable named df.unwanted.{rgb|hsi} below.
# This variable is a list where you can specify the unwanted
# measurements you identified in step 3.
# 5. Fill in the list under df.unwanted with the attributes of the
# measurements you want to exclude. The number of items in the list will
# depend on your specific experiment and the output from DataExplore.r.
# 5. Fill in the list under df.unwanted.{rgb|hsi} with the attributes
# of the measurements you want to exclude. The number of items in
# the list will depend on your specific experiment and the output
# from DataExplore.r.
#
# Unwanted data example:
# Unwanted data example (RGB):
#----------------------------------------------------------
# EXP ID TREATMENT DFP SCAN_DATETIME_BY15MIN NUM_SCANS
#----------------------------------------------------------
Expand All @@ -77,7 +79,14 @@ path.rgb.long <- ("./df_rgb_long.csv")
# 5
# 396 20-1 55 2024-04-29 12:00:00 1
#----------------------------------------------------------
df.unwanted <- data.frame(
# Unwanted data example(HSI):
#----------------------------------------------------------
# EXP ID TREATMENT DFP SCAN_DATETIME_BY30MIN NUM_SCANS
#----------------------------------------------------------
# 1
# 390 20-1 45 2024-03-08 10:30:00 4
#----------------------------------------------------------
df.unwanted.rgb <- data.frame(
EXP_ID=c(374, 374, 374, 374, 396),
TREATMENT=c("20 -1", "50-3", "50-3", "50-4", "20-1"),
SCAN_DATETIME_BY15MIN=c("2023-10-30 11:15:00",
Expand All @@ -86,6 +95,12 @@ df.unwanted <- data.frame(
"2023-09-28 09:45:00",
"2024-04-29 12:00:00"),
stringsAsFactors=FALSE)
df.unwanted.hsi <- data.frame(
EXP_ID=c(390),
TREATMENT=c("20-1"),
SCAN_DATETIME_BY30MIN=c("2024-03-08 10:30:00"),
stringsAsFactors=FALSE)



# List descriptive treatment info
Expand Down Expand Up @@ -189,11 +204,16 @@ id.vars <- c("EXP ID", "POT_BARCODE", "SCAN_TIME", "SCAN_DATE",
# Columns not exported
drops.col.rgb <- c("SCAN_TIME", "SCAN_DATE",
"SCAN_DATETIME", "SCAN_DATETIME_BY15MIN")
drops.col.hsi <- c("SCAN_TIME", "SCAN_DATE",
"SCAN_DATETIME", "SCAN_DATETIME_BY30MIN")

# Order of columns for exported dataframe
rgb.col.order <- c("EXP ID", "POT_BARCODE", "TREATMENT", "VARIETY",
"DFP", "GROWTH_STAGE", "View", "frame_nr",
"variable", "value")
hsi.col.order <- c("EXP ID", "POT_BARCODE", "TREATMENT", "VARIETY",
"DFP", "GROWTH_STAGE", "View", "frame_nr",
"variable", "value")

# Columns used as factors
col.factor <- c("EXP ID", "VARIETY", "TREATMENT",
Expand All @@ -202,6 +222,12 @@ col.factor <- c("EXP ID", "VARIETY", "TREATMENT",









# Create empty rgb dataframe
mat <- matrix(ncol=0, nrow=0)
df.rgb <- data.frame(mat)
Expand Down Expand Up @@ -260,6 +286,110 @@ message('Succeeded importing RGB data...')



# Overview 1: Show summary of rgb data
df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ]
message('\nRGB Data Acquisiton Summary (Overview 1)\n')
print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP))





# Overview 2: Round down scan date and time down to nearest 15 minute and display
format <- "%Y-%m-%d %H:%M:%S"
df.rgb.perscan["SCAN_DATETIME"] =
as.POSIXct(paste(as.Date(df.rgb.perscan$SCAN_DATE), df.rgb.perscan$SCAN_TIME),
format=format)
df.rgb.perscan["SCAN_DATETIME_BY15MIN"] =
lubridate::round_date(df.rgb.perscan$SCAN_DATETIME, "15 minutes")
message('\nRGB Data Acquisiton Summary by Date and Time (Overview 2)\n')
print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP, SCAN_DATETIME_BY15MIN))
message('\nPlease check [`EXP ID`, TREATMENT, SCAN_DATETIME_BY15MIN]')
message('to exclude data from incomplete scan session...')
message('Check df.unwanted.rgb in DataExplore.r to exclude such records...\n')





# Overview 3: Drop unwanted rgb data
df.rgb["SCAN_DATETIME"] =
as.POSIXct(paste(as.Date(df.rgb$SCAN_DATE), df.rgb$SCAN_TIME), format=format)
df.rgb["SCAN_DATETIME_BY15MIN"] =
lubridate::round_date(df.rgb$SCAN_DATETIME, "15 minutes")
for(i in 1:nrow(df.unwanted.rgb)){
row <- df.unwanted.rgb[i,]
df.rgb <- df.rgb %>% filter(!(`EXP ID` == row$EXP_ID &
TREATMENT == row$TREATMENT &
SCAN_DATETIME_BY15MIN == row$SCAN_DATETIME_BY15MIN))
}
df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ]
message('\nRGB Data Acquisiton Summary (Unwanted Data Filtered, Overview 3)\n')
print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP))





# Overview 4: Convert treatment attributes
for(i in 1:nrow(df.convert.treatment)){
row <- df.convert.treatment[i,]
df.rgb$TREATMENT[df.rgb$TREATMENT==row$from] <- row$to
}
df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ]
message('\nRGB Data Acquisiton Summary (Treatment Updated, Overview 4)\n')
print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP))





# Overview 5: DFP is always smaller than 365 (to prevent case with wrong year input)
df.rgb$DFP = df.rgb$DFP %% 365
df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ]
message('\nRGB Data Acquisiton Summary (DFP Always Less Than 365, Overview 5)\n')
print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP))





# Overview 6: Add growth stage attribute
df.rgb$GROWTH_STAGE <- NA
for(i in 1:nrow(df.growth.stage)){
row <- df.growth.stage[i,]
df.rgb$GROWTH_STAGE[(df.rgb$`EXP ID` == row$EXP_ID) &
(df.rgb$TREATMENT == row$TREATMENT) &
(df.rgb$DFP == row$DFP)] <- row$GROWTH_STAGE
}
df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ]
message('\nRGB Data Acquisiton Summary (Growth Stage Added, Overview 6)\n')
print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP, GROWTH_STAGE))





# Drop unused columns
df.rgb <- df.rgb[ , !(names(df.rgb) %in% drops.col.rgb)]

# Change column order
df.rgb <- df.rgb[, rgb.col.order]
df.rgb[col.factor] <- lapply(df.rgb[col.factor], factor)

# Export rgb dataframe
message('\nExporting rgb dataframe (long format)...')
write.csv(df.rgb, path.rgb.long, row.names=F)










# Create empty hsi dataframe
mat <- matrix(ncol=0, nrow=0)
df.hsi <- data.frame(mat)
Expand Down Expand Up @@ -309,105 +439,108 @@ message('Succeeded importing HSI data...')



# Overview 1: Show summary of rgb data
df.hsi.perscan <- df.hsi[which(df.hsi$View=="Top" & df.hsi$variable=="NDVI_mean"), ]
message('\nHSI Data Acquisiton Summary (Overview 1)\n')
print(count(df.hsi.perscan, `EXP ID`, TREATMENT, DFP))



# Get unique rgb measurement
df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ]

# Show summary of rgb data
message('\nRGB Data Acquisiton Summary (Overview 1)\n')
print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP))

# Round down scan date and time down to nearest 15 minute and display
# Overview 2: Round down scan date and time down to nearest 15 minute and display
format <- "%Y-%m-%d %H:%M:%S"
df.rgb.perscan["SCAN_DATETIME"] =
as.POSIXct(paste(as.Date(df.rgb.perscan$SCAN_DATE), df.rgb.perscan$SCAN_TIME),
df.hsi.perscan["SCAN_DATETIME"] =
as.POSIXct(paste(as.Date(df.hsi.perscan$SCAN_DATE), df.hsi.perscan$SCAN_TIME),
format=format)
df.rgb.perscan["SCAN_DATETIME_BY15MIN"] =
lubridate::round_date(df.rgb.perscan$SCAN_DATETIME, "15 minutes")
message('\nRGB Data Acquisiton Summary by Date and Time (Overview 2)\n')
print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP, SCAN_DATETIME_BY15MIN))
message('\nPlease check [`EXP ID`, TREATMENT, SCAN_DATETIME_BY15MIN]')
df.hsi.perscan["SCAN_DATETIME_BY30MIN"] =
lubridate::round_date(df.hsi.perscan$SCAN_DATETIME, "30 minutes")
message('\nHSI Data Acquisiton Summary by Date and Time (Overview 2)\n')
print(count(df.hsi.perscan, `EXP ID`, TREATMENT, DFP, SCAN_DATETIME_BY30MIN))
message('\nPlease check [`EXP ID`, TREATMENT, SCAN_DATETIME_BY30MIN]')
message('to exclude data from incomplete scan session...')
message('Check df.unwanted in DataExplore.r to exclude such records...\n')
message('Check df.unwanted.hsi in DataExplore.r to exclude such records...\n')




# Drop unwanted rgb data
df.rgb["SCAN_DATETIME"] =
as.POSIXct(paste(as.Date(df.rgb$SCAN_DATE), df.rgb$SCAN_TIME), format=format)
df.rgb["SCAN_DATETIME_BY15MIN"] =
lubridate::round_date(df.rgb$SCAN_DATETIME, "15 minutes")
for(i in 1:nrow(df.unwanted)){
row <- df.unwanted[i,]
df.rgb <- df.rgb %>% filter(!(`EXP ID` == row$EXP_ID &

# Overview 3: Drop unwanted hsi data
df.hsi["SCAN_DATETIME"] =
as.POSIXct(paste(as.Date(df.hsi$SCAN_DATE), df.hsi$SCAN_TIME), format=format)
df.hsi["SCAN_DATETIME_BY15MIN"] =
lubridate::round_date(df.hsi$SCAN_DATETIME, "30 minutes")
for(i in 1:nrow(df.unwanted.hsi)){
row <- df.unwanted.hsi[i,]
df.hsi <- df.hsi %>% filter(!(`EXP ID` == row$EXP_ID &
TREATMENT == row$TREATMENT &
SCAN_DATETIME_BY15MIN == row$SCAN_DATETIME_BY15MIN))
SCAN_DATETIME_BY15MIN == row$SCAN_DATETIME_BY30MIN))
}
df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ]
message('\nRGB Data Acquisiton Summary (Unwanted Data Filtered, Overview 3)\n')
print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP))
df.hsi.perscan <- df.hsi[which(df.hsi$View=="Top" & df.hsi$variable=="NDVI_mean"), ]
message('\nHSI Data Acquisiton Summary (Unwanted Data Filtered, Overview 3)\n')
print(count(df.hsi.perscan, `EXP ID`, TREATMENT, DFP))





# Convert treatment attributes
# Overview 4: Convert treatment attributes
for(i in 1:nrow(df.convert.treatment)){
row <- df.convert.treatment[i,]
df.rgb$TREATMENT[df.rgb$TREATMENT==row$from] <- row$to
df.hsi$TREATMENT[df.hsi$TREATMENT==row$from] <- row$to
}
df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ]
message('\nRGB Data Acquisiton Summary (Treatment Updated, Overview 4)\n')
print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP))
df.hsi.perscan <- df.hsi[which(df.hsi$View=="Top" & df.hsi$variable=="NDVI_mean"), ]
message('\nHSI Data Acquisiton Summary (Treatment Updated, Overview 4)\n')
print(count(df.hsi.perscan, `EXP ID`, TREATMENT, DFP))





# DFP is always smaller than 365 (to prevent case with wrong year input)
df.rgb$DFP = df.rgb$DFP %% 365
df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ]
message('\nRGB Data Acquisiton Summary (DFP Always Less Than 365, Overview 5)\n')
print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP))
# Overview 5: DFP is always smaller than 365 (to prevent case with wrong year input)
df.hsi$DFP = df.hsi$DFP %% 365
df.hsi.perscan <- df.hsi[which(df.hsi$View=="Top" & df.hsi$variable=="NDVI_mean"), ]
message('\nHSI Data Acquisiton Summary (DFP Always Less Than 365, Overview 5)\n')
print(count(df.hsi.perscan, `EXP ID`, TREATMENT, DFP))





# Add growth stage attribute
df.rgb$GROWTH_STAGE <- NA
# Overview 6: Add growth stage attribute
df.hsi$GROWTH_STAGE <- NA
for(i in 1:nrow(df.growth.stage)){
row <- df.growth.stage[i,]
df.rgb$GROWTH_STAGE[(df.rgb$`EXP ID` == row$EXP_ID) &
(df.rgb$TREATMENT == row$TREATMENT) &
(df.rgb$DFP == row$DFP)] <- row$GROWTH_STAGE
df.hsi$GROWTH_STAGE[(df.hsi$`EXP ID` == row$EXP_ID) &
(df.hsi$TREATMENT == row$TREATMENT) &
(df.hsi$DFP == row$DFP)] <- row$GROWTH_STAGE
}
df.rgb.perscan <- df.rgb[which(df.rgb$View=="Top" & df.rgb$variable=="Surface"), ]
message('\nRGB Data Acquisiton Summary (Growth Stage Added, Overview 6)\n')
print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP, GROWTH_STAGE))
df.hsi.perscan <- df.hsi[which(df.hsi$View=="Top" & df.hsi$variable=="NDVI_mean"), ]
message('\nHSI Data Acquisiton Summary (Growth Stage Added, Overview 6)\n')
print(count(df.hsi.perscan, `EXP ID`, TREATMENT, DFP, GROWTH_STAGE))





# Drop unused columns
df.rgb <- df.rgb[ , !(names(df.rgb) %in% drops.col.rgb)]
df.hsi <- df.hsi[ , !(names(df.hsi) %in% drops.col.hsi)]

# Change column order
df.rgb <- df.rgb[, rgb.col.order]
df.rgb[col.factor] <- lapply(df.rgb[col.factor], factor)
df.hsi <- df.hsi[, hsi.col.order]
df.hsi[col.factor] <- lapply(df.hsi[col.factor], factor)

# Export hsi dataframe
message('\nExporting hsi dataframe (long format)...')
write.csv(df.hsi, path.hsi.long, row.names=F)




# Export dataframe
message('\nExporting rgb dataframe (long format)...')
write.csv(df.rgb, path.rgb.long, row.names=F)




# Repeat for df.hsi
# Combine rgb and hsi data using df <- rbind(df.rgb, df.hsi)


Expand Down

0 comments on commit 2e399bf

Please sign in to comment.