Skip to content

Commit

Permalink
Display repeatability, variance
Browse files Browse the repository at this point in the history
  • Loading branch information
Sungchan Oh committed Jul 10, 2024
1 parent d301e1c commit 7cffa02
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 81 deletions.
121 changes: 72 additions & 49 deletions calculate_repeatability.r
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
# calculate_repeatability.r
#
# value ~ 1|EXP.ID + VARIETY + TREATMENT + GROWTH_STAGE
# value ~ 1|EXP.ID + VARIETY + TREATMENT Seth's model
# value ~ 1|EXP.ID + VARIETY Current model


library(dplyr)
library(lme4)
library(heritability)
library(rptR)



Expand All @@ -19,14 +14,23 @@ path.rpt <- ("./rpt_no_public.csv")
# Define varieties that is excluded for repeatability measurement
exclude.variety <-c("P1", "P2", "P3", "P4")

# List of "factor (categorical)" variables
factor.col <- c('EXP.ID',
'POT_BARCODE',
'TREATMENT',
'VARIETY',
'GROWTH_STAGE',
'View',
'frame_nr')
# Formula used to evaluate repeatability
case <- 1

# Set formula based on case number
if (case==1){
# When number of varieties, reps (experiment id) > 5
formula <- as.formula("value ~ (1|VARIETY) + (1|EXP.ID)")
}










# Load RGB and HSI data in long format
message("Loading data...")
Expand All @@ -42,41 +46,54 @@ if (length(exclude.variety)>0){
# Combine RGB and HSI data
df <- rbind(df.rgb, df.hsi)

# Set factor columns
df[, factor.col] <- lapply(df[, factor.col], factor)

# Create a dataframe to record repeatability
message("Finding unique conditions to evaluate repeatability...")
df.rpt <- unique(df[, c('TREATMENT',
'GROWTH_STAGE',
'View',
'frame_nr',
'variable')])
df.rpt <- df.rpt[order(df.rpt$TREATMENT,
df.rpt$GROWTH_STAGE,
df.rpt$View,
df.rpt$frame_nr,
df.rpt$variable), ]
df.rpt$repeatability <- NA
df.rpt$gen.variance <- NA
df.rpt$res.variance <- NA
if (case==1){
df.rpt <- unique(df[, c('TREATMENT',
'GROWTH_STAGE',
'View',
'frame_nr',
'variable')])
}

# Sort output dataframe. This line can help increase data search efficiency
if (case==1){
df.rpt <- df.rpt[order(df.rpt$TREATMENT), ]
}
# Add columns to record results
if (case==1){
df.rpt$rpt <- NA
df.rpt$sdcor.VARIETY <- NA
df.rpt$sdcor.EXP.ID <- NA
df.rpt$sdcor.Residual <- NA
}



# Calculate repeatability
message('Calculating repeatability...')
for(i in 1:nrow(df.rpt)){

# Get subset of data under specific condition to evaluate repeatability
row <- df.rpt[i,]

# Create a subset of data, this can improve data search efficiency
att.search <- row[1,1]

if(i==1){
df.search <- df[which(df[ ,colnames(row[1])]==att.search), ]
} else if (i>1 & att.search!=att.prev){
df.search <- df[which(df[ ,colnames(row[1])]==att.search), ]
}

# Subset data by condition by which repeatability is measured
message(paste(row$TREATMENT,
row$GROWTH_STAGE,
row$View,
row$frame_nr,
row$variable,
" (", i, "/", nrow(df.rpt), ")", sep=" "))
df.temp <- df[which(df$TREATMENT==row$TREATMENT &
df$GROWTH_STAGE==row$GROWTH_STAGE &
df$View==row$View &
df$frame_nr==row$frame_nr &
df$variable==row$variable), ]
message(paste0("(", i, "/", nrow(df.rpt), ")", sep=""))
if (case==1){
df.temp <- df.search[which(df.search$GROWTH_STAGE==row$GROWTH_STAGE &
df.search$View==row$View &
df.search$frame_nr==row$frame_nr &
df.search$variable==row$variable), ]
}

# Count NA values and continue if excessive
num.na <- sum(is.na(df.temp$value))
Expand All @@ -85,22 +102,28 @@ for(i in 1:nrow(df.rpt)){
# Select valid data
df.temp <- df.temp[!is.na(df.temp$value) & is.finite(df.temp$value), ]

# Evaluate repeatability (line repeatability=True)
rpt <- repeatability(df.temp$value, df.temp$VARIETY, line.repeatability=T,
covariates.frame = data.frame())
# Evaluate repeatability
if (case==1){
rpt.temp <- rptR::rpt(formula, grname=c("VARIETY", "EXP.ID"),
data=df.temp, datatype="Gaussian", nboot=0, npermut=0)
}
df.rpt.temp <- as.data.frame(VarCorr(rpt.temp$mod))

# Save data
df.rpt$repeatability[i] <- rpt$repeatability
df.rpt$gen.variance[i] <- rpt$gen.variance
df.rpt$res.variance[i] <- rpt$res.variance
if (case==1){
df.rpt$rpt[i] <- rpt.temp$R["VARIETY"]
df.rpt$sdcor.VARIETY[i] <- df.rpt.temp[df.rpt.temp$grp=="VARIETY", 'sdcor']
df.rpt$sdcor.EXP.ID[i] <- df.rpt.temp[df.rpt.temp$grp=="EXP.ID", 'sdcor']
df.rpt$sdcor.Residual[i] <- df.rpt.temp[df.rpt.temp$grp=="Residual", 'sdcor']
}
att.prev <- att.search
}




# Export repeatability table
message('\nExporting repeatability measurements...')
write.csv(df.rpt, path.rpt, row.names=F)
write.csv(apply(df.rpt, 2, as.character), path.rpt, row.names=F)



Expand Down
47 changes: 47 additions & 0 deletions display_repeatability.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
library(dplyr)
library(ggplot2)


# File paths to Input and output data
path.rpt <- ("./rpt_no_public.csv")
path.plot.overall <- ("./rpt_overall.png")
path.plot.sideall <- ("./rpt_sideall.png")





# Load repeatability results
df.rpt <- read.csv(path.rpt)

# Exclude results with NA
df.rpt <- df.rpt[which(!is.na(df.rpt$rpt) & is.finite(df.rpt$rpt)), ]

# Select data with View==SideAll
df.rpt.sideall <- df.rpt[which(df.rpt$View=="SideAll"),]

# Simplify attributes under df.rpt$View
df.rpt$View <- replace(df.rpt$View,
df.rpt$View %in% c("SideAll", "SideAverage"),
"Side")

# Plot results (overall data)
p1 <- ggplot(df.rpt, aes(x=View, y=rpt)) + geom_boxplot() +
facet_grid(rows=vars(GROWTH_STAGE), cols=vars(TREATMENT)) +
labs(y = "Repeatability, Input: all RGB and HSI data")
png(path.plot.overall)
print(p1)
dev.off()

# Plot results (sideall data)
df.rpt.sideall$frame_nr <- as.factor(df.rpt.sideall$frame_nr)
p2 <- ggplot(df.rpt.sideall, aes(x=frame_nr, y=rpt)) + geom_boxplot() +
facet_grid(rows=vars(GROWTH_STAGE), cols=vars(TREATMENT)) +
labs(y = "Repeatability, Input: RGB data from different vewing angles")
png(path.plot.sideall)
print(p2)
dev.off()



# EOF
104 changes: 73 additions & 31 deletions display_variance.r
Original file line number Diff line number Diff line change
@@ -1,55 +1,97 @@
library(dplyr)
library(ggplot2)

# TODO
# calc rpt without public hybrid
# order hybrid a b c ...
# yaxis label <- variable


# Input long format data


# Path to input and output data
path.rgb.long <- ("./df_rgb_long.csv")
path.hsi.long <- ("./df_hsi_long.csv")
path.rpt <- ("./rpt.csv")
path.rpt <- ("./rpt_no_public.csv")

# Variables demonstrating highest to n-th highest repeatability will be displayed
# in each combination of (TREATMENT, GROWTH_STAGE)
n <- 5

# List of varieties to be excluded, if any; otherwise, exclude.variety as "c()"
exclude.variety <-c("P1", "P2", "P3", "P4")


# Load RGB and HSI data in long format
message("Loading data...")
df.rgb <- read.csv(path.rgb.long)
df.hsi <- read.csv(path.hsi.long)
df.rpt <- read.csv(path.rpt)

# Combine RGB and HSI data
df <- rbind(df.rgb, df.hsi)



# Sort repeatability by desending order
message("Sorting repeatability data...")
df.rpt <- df.rpt[!is.na(df.rpt$repeatability),]
df.rpt.sort <- df.rpt[order(df.rpt$repeatability, decreasing=T), ]




n <- 10
for (i in seq(1,10)){
row <- df.rpt.sort[i,]
df.temp <- df[which(df$TREATMENT==row$TREATMENT &
df$GROWTH_STAGE==row$GROWTH_STAGE &
df$View==row$View &
df$frame_nr==row$frame_nr &
df$variable==row$variable), ]
p = ggplot(df.temp, aes(x=VARIETY, y=value)) +
geom_boxplot()
path.plot <- paste0('./variance_', i, '_', row$TREATMENT, '_', row$GROWTH_STAGE, '_', row$View, '_', row$frame_nr, '_', row$variable, '.png')
png(path.plot)
print(p)
dev.off()
# Load RGB and HSI data in long format
message("Loading data...")
df.rgb <- read.csv(path.rgb.long)
df.hsi <- read.csv(path.hsi.long)
df.rpt <- read.csv(path.rpt)

# Exclude varieties, as needed
if (length(exclude.variety)>0){
df.rgb <- df.rgb %>% filter(!VARIETY %in% exclude.variety)
df.hsi <- df.hsi %>% filter(!VARIETY %in% exclude.variety)
}

# Combine RGB and HSI data
df <- rbind(df.rgb, df.hsi)

# Simplify attributes under df.rpt$View
df.rpt$View <- replace(df.rpt$View,
df.rpt$View %in% c("SideAll", "SideAverage"),
"Side")
df$View <- replace(df$View,
df$View %in% c("SideAll", "SideAverage"),
"Side")

# Choose valid repeatability measurements
df.rpt <- df.rpt[!is.na(df.rpt$rpt),]

for (treatment in unique(df.rpt$TREATMENT)){
for (growth.stage in unique(df.rpt$GROWTH_STAGE)){
print(paste0("Generating plots for ",
treatment, ", ", growth.stage, " case..."))

# Subset data by treatment and growth stage
df.rpt.temp <- df.rpt[which(df.rpt$TREATMENT==treatment &
df.rpt$GROWTH_STAGE==growth.stage), ]
df.rpt.temp <- df.rpt.temp[order(df.rpt.temp$rpt, decreasing=T), ]


for (i in seq(1, n)){
# Gather measurements from a variable with n-th highest repeatability
row <- df.rpt.temp[i,]
df.temp <- df[which(df$TREATMENT==row$TREATMENT &
df$GROWTH_STAGE==row$GROWTH_STAGE &
df$View==row$View &
df$frame_nr==row$frame_nr &
df$variable==row$variable), ]
# Draw box plot
p = ggplot(df.temp, aes(x=VARIETY, y=value)) +
ggtitle(paste0("Repeatability=", round(row$rpt, 3))) +
geom_boxplot() +
ylab(paste("Variable: ",
row$TREATMENT, row$GROWTH_STAGE,
row$View, row$frame_nr, row$variable,
sep='_'))
path.plot <- paste0(paste('./variance',
row$TREATMENT, row$GROWTH_STAGE,
i,
row$View, row$frame_nr, row$variable,
sep='_'), '.png')
# Save plot
png(path.plot)
print(p)
dev.off()
}
}
}



# EOF
3 changes: 2 additions & 1 deletion import_and_reshape.r
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ path.hsi.long <- ("./df_hsi_long.csv")
# 390 20-1 45 2024-03-08 10:30:00 4
#----------------------------------------------------------
df.unwanted.rgb <- data.frame(
EXP_ID=c(374, 374, 374, 374, 396),
EXP_ID =c( 374, 374, 374, 374, 396),
TREATMENT=c("20 -1", "50-3", "50-3", "50-4", "20-1"),
SCAN_DATETIME_BY15MIN=c("2023-10-30 11:15:00",
"2023-09-28 09:15:00",
Expand Down Expand Up @@ -334,6 +334,7 @@ print(count(df.rgb.perscan, `EXP ID`, TREATMENT, DFP, SCAN_DATETIME_BY15MIN))
message('\nPlease check [`EXP ID`, TREATMENT, SCAN_DATETIME_BY15MIN]')
message('to exclude data from incomplete scan session...')
message('Check df.unwanted.rgb in DataExplore.r to exclude such records...\n')
if (!askYesNo('Proceed? (y/n)?')) stop()



Expand Down

0 comments on commit 7cffa02

Please sign in to comment.