Skip to content

Commit

Permalink
Incorporated XRAY data
Browse files Browse the repository at this point in the history
  • Loading branch information
Sungchan Oh committed Jul 30, 2024
1 parent c26f5a9 commit 54df941
Show file tree
Hide file tree
Showing 5 changed files with 185 additions and 38 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
# Output figures
*.png

# Animated gif
*.gif

# History files
.Rhistory
.Rapp.history
Expand Down
46 changes: 33 additions & 13 deletions calculate_repeatability.r
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@ library(rptR)
# Input long format data
path.rgb.long <- ("./df_rgb_long.csv")
path.hsi.long <- ("./df_hsi_long.csv")
path.xray.long <- ("./df_xray_long.csv")
path.rpt <- ("./rpt_no_public.csv")

# Define varieties that is excluded for repeatability measurement
exclude.variety <-c("P1", "P2", "P3", "P4")

# Formula used to evaluate repeatability
case <- 1
case <- 1 # Currently only one case exists, more cases to be added

# Set formula based on case number
if (case==1){
Expand All @@ -34,17 +35,24 @@ if (case==1){

# Load RGB and HSI data in long format
message("Loading data...")
df.rgb <- read.csv(path.rgb.long)
df.hsi <- read.csv(path.hsi.long)
df.rgb <- read.csv(path.rgb.long)
df.hsi <- read.csv(path.hsi.long)
df.xray <- read.csv(path.xray.long)

# Add sensor column for clarity
df.rgb$sensor <- 'RGB'
df.hsi$sensor <- 'HSI'
df.xray$sensor <- 'XRAY'

# Exclude varieties, as needed
if (length(exclude.variety)>0){
df.rgb <- df.rgb %>% filter(!VARIETY %in% exclude.variety)
df.hsi <- df.hsi %>% filter(!VARIETY %in% exclude.variety)
df.rgb <- df.rgb %>% filter(!VARIETY %in% exclude.variety)
df.hsi <- df.hsi %>% filter(!VARIETY %in% exclude.variety)
df.xray <- df.xray %>% filter(!VARIETY %in% exclude.variety)
}

# Combine RGB and HSI data
df <- rbind(df.rgb, df.hsi)
# Combine RGB, HSI, and XRAY data
df <- do.call("rbind", list(df.rgb, df.hsi, df.xray))

# Create a dataframe to record repeatability
message("Finding unique conditions to evaluate repeatability...")
Expand All @@ -53,7 +61,8 @@ if (case==1){
'GROWTH_STAGE',
'View',
'frame_nr',
'variable')])
'variable',
'sensor')])
}

# Sort output dataframe. This line can help increase data search efficiency
Expand All @@ -73,31 +82,42 @@ if (case==1){
# Calculate repeatability
message('Calculating repeatability...')
for(i in 1:nrow(df.rpt)){
if (i %% 100 == 0){
message(paste0("(", i, "/", nrow(df.rpt), ")", sep=""))
}

# Get subset of data under specific condition to evaluate repeatability
row <- df.rpt[i,]

# Create a subset of data, this can improve data search efficiency
att.search <- row[1,1]

# Get reduced dataframe if TREATMENT is new or updated
if(i==1){
df.search <- df[which(df[ ,colnames(row[1])]==att.search), ]
} else if (i>1 & att.search!=att.prev){
df.search <- df[which(df[ ,colnames(row[1])]==att.search), ]
}

# Subset data by condition by which repeatability is measured
message(paste0("(", i, "/", nrow(df.rpt), ")", sep=""))
if (case==1){
df.temp <- df.search[which(df.search$GROWTH_STAGE==row$GROWTH_STAGE &
df.search$View==row$View &
df.search$frame_nr==row$frame_nr &
df.search$variable==row$variable), ]
}

# Count NA values and continue if excessive
# Check if there are too many missing values
num.na <- sum(is.na(df.temp$value))
if (num.na/nrow(df.temp) > 0.3) next
if (nrow(df.temp) == 0){
att.prev <- att.search
message(paste0("(", i, "/", nrow(df.rpt), "): No data", sep=""))
next
}
if (num.na/nrow(df.temp) > 0.3){
att.prev <- att.search
message(paste0("(", i, "/", nrow(df.rpt), "): Insufficient data", sep=""))
next
}

# Select valid data
df.temp <- df.temp[!is.na(df.temp$value) & is.finite(df.temp$value), ]
Expand All @@ -123,7 +143,7 @@ for(i in 1:nrow(df.rpt)){

# Export repeatability table
message('\nExporting repeatability measurements...')
write.csv(apply(df.rpt, 2, as.character), path.rpt, row.names=F)
write.csv(apply(df.rpt, 2, as.character), path.rpt, row.names=F, na='NA')



Expand Down
100 changes: 75 additions & 25 deletions display_cluster.r
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
library(dplyr)
library(reshape2)
library(Rtsne)
library(magick)



Expand All @@ -12,23 +13,43 @@ path.rpt <- ("./rpt_no_public.csv")
# List of varieties to be excluded, if any; otherwise, exclude.variety as "c()"
exclude.variety <-c("P1", "P2", "P3", "P4")

## Number of phenotypes demonstrating highest to n-th highest repeatability
## across all TREATMENT and GROWTH_STAGE
#n <- 30


case <- 1

drops.1 <- c("View", "frame_nr", "variable")
# Number of phenotypes for analysis
# phenotypes with 1st to n-th highest phenotypes are used
n.rpt <- 30

# Clustering (tSNE) parameters
list.pp <- c(5, 10, 20, 40, 50) # Perplexity [5, 50]
list.iter <- seq(100, 500, by=5) # Number of iteration [,1000]



case <- 1

# Columns to delete (1)
drops.1 <- c("View", "frame_nr", "variable")

if (case==1){
# Columns to delete (2)
drops.2 <- c("EXP.ID", "POT_BARCODE", "TREATMENT", "DFP", "GROWTH_STAGE")
# Define the order of variety for displaying
variety.order = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L",
"M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X",
"Y", "Z", "AA", "BB", "CC", "DD", "EE", "FF", "GG", "HH",
"II", "JJ", "KK", "LL", "MM", "NN")
}










# Load RGB and HSI data in long format
message("Loading data...")
df.rgb <- read.csv(path.rgb.long)
Expand All @@ -44,19 +65,21 @@ if (length(exclude.variety)>0){
# Combine RGB and HSI data
df <- rbind(df.rgb, df.hsi)

# Rename variables
# Create a new variable column by concatenating View, frame_nr, variable
df$variable.concat <- paste(df$View, df$frame_nr, df$variable, sep="_")

# Remove columns used to define variables
# Remove View, frame_nr, variable column
df <- df[ , !(names(df) %in% drops.1)]

# Reshape data
form <- paste0("EXP.ID+POT_BARCODE+TREATMENT+VARIETY",
"+DFP+GROWTH_STAGE ~ variable.concat")
df <- reshape2::dcast(df, as.formula(form), value.var="value")



# Order varieties for displaying
if (!is.null(variety.order)){
df$VARIETY <- factor(df$VARIETY, levels = variety.order)
}



Expand All @@ -74,23 +97,36 @@ for (treatment in unique(df$TREATMENT)){
# Remove unnecessary columns for clustering
df.temp <- df.temp[ , !(names(df.temp) %in% drops.2)]

# Select complete columns (no NaN, infinite) for analysis
df.temp <- do.call(data.frame,
lapply(df.temp,
function(x) replace(x, is.infinite(x),NA)))
# Subset repeatability data by treatment and growth stage
df.rpt.temp <- df.rpt[which(df.rpt$TREATMENT==treatment &
df.rpt$GROWTH_STAGE==growth.stage), ]
df.rpt.temp <- df.rpt.temp[order(df.rpt.temp$rpt, decreasing=T), ]
df.rpt.temp <- df.rpt.temp[1:n.rpt, ]

# Create a new variable column by concatenating View, frame_nr, variable
df.rpt.temp$variable.concat <- paste(df.rpt.temp$View,
df.rpt.temp$frame_nr,
df.rpt.temp$variable, sep="_")

# Select variables (phenotypes) with high repeatability
if (case==1){
df.temp <- df.temp[, c("VARIETY", df.rpt.temp$variable.concat)]
}

# Select complete columns for analysis (exclude NaN, Inf, -Inf)
df.temp[sapply(df.temp, is.infinite)] <- NA
df.temp <- df.temp[ , colSums(is.na(df.temp))==0]
if (nrow(df.temp)<5) next

# Scale data
for (c in 2:ncol(df.temp)) df.temp[, c] <- scale(df.temp[,c])


if (case==1){

if (case==1){
# Set label as factor (VARIETY)
df.temp$VARIETY <- as.factor(df.temp$VARIETY)

# TODO only for byr
df.temp$VARIETY <- factor(df.temp$VARIETY, levels = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "AA", "BB", "CC", "DD", "EE", "FF", "GG", "HH", "II", "JJ", "KK", "LL", "MM", "NN"))

# Get training data
num.train <- 0.8 * nrow(df.temp)
set.seed(1)
Expand All @@ -100,17 +136,19 @@ for (treatment in unique(df$TREATMENT)){
# T-SNE
colors = rainbow(length(unique(df.temp$VARIETY)))
names(colors) = unique(df.temp$VARIETY)
for (pp in c(5, 10, 20, 40)){ ## pp in [5,50]
for (iter in c(10, 15, 20, 30, 40, 50, 100, 200, 500, 1000, 2000, 5000)){
for (pp in list.pp){
list.img <- c()
for (iter in list.iter){
set.seed(1)
tsne <- Rtsne(train[,-1],
dims=2,
perplexity=pp,
verbose=F,
max_iter=iter)
dims=2, perplexity=pp, max_iter=iter,
verbose=F, check_duplicates = FALSE)

# Visuzlize clusters
png(paste0("./tsne_", treatment, "_", growth.stage, "_",
pp, "_", iter, ".png"))
fn <- paste0(paste("./tsne", treatment, growth.stage,
pp, iter, sep="_"), ".png")
list.img <- c(list.img, fn)
png(fn)
par(mgp=c(2.5,1,0))
plot(tsne$Y, t='n',
main=paste("tSNE", treatment, growth.stage, pp, iter),
Expand All @@ -120,6 +158,18 @@ for (treatment in unique(df$TREATMENT)){
text(tsne$Y, labels=train$VARIETY, col=colors[train$VARIETY])
dev.off()
}

# Create animated gif file
imgs <- lapply(list.img, image_read)
joined.img <- image_join(imgs)
animated.img <- image_animate(joined.img, fps = 10)
fn.gif <- paste0(paste("./tsne", treatment, growth.stage, pp,
sep="_"), ".gif")
image_write(image = animated.img, path=fn.gif)
message(paste0("Exported ", fn.gif))

# Delete png files
for (f in list.img) unlink(f)
}
}
}
Expand Down
File renamed without changes.
74 changes: 74 additions & 0 deletions reshape_xray.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@

library(dplyr)
path.xray.out <- './df_xray_long.csv'

# Columns to be retrieved from rgb data and added to xray data
col.add <- c('POT_BARCODE', 'TREATMENT', 'VARIETY', 'DFP', 'GROWTH_STAGE')

# File path to rgb dataframe to retrieve TREATMENT, VARIETY, DFP, GROWTH_STAGE
path.rgb.long <- './df_rgb_long.csv'

# File path pattern containing xray results
pattern <- paste0('/depot/smarterag/data/XRAY/',
'Experiment_Bayer2/',
'Experiment_*/',
'kimimaro_out_200/',
'*.txt')

# Get file paths containing individual phenotyping results
files <- Sys.glob(file.path(pattern))

# Open individual files and merge them into a dataframe
xray <- data.frame()
for (f in files){
tmp <- read.csv(f)
xray <- rbind(xray, tmp)
}




# Change column names
names(xray)[names(xray)=='experiment_ID'] <- 'EXP ID'
names(xray)[names(xray)=='plant_ID'] <- 'POT_BARCODE'
names(xray)[names(xray)=='RESOLUTION'] <- 'View'

# Remove unnecessary columns
xray <- subset(xray, select=-c(pot_height))




# Load rgb dataframe (long format table)
rgb.long <- read.csv(path.rgb.long)

# Get attributes per each POT_BARCODE (TREATMENT, VARIEY, DFP, GRWOTH_STAGE)
attributes <- rgb.long %>%
group_by(POT_BARCODE) %>%
summarise(TREATMENT=names(which.max(table(TREATMENT))),
VARIETY=names(which.max(table(VARIETY))),
DFP=names(which.max(table(DFP))),
GROWTH_STAGE=names(which.max(table(GROWTH_STAGE)))) %>%

ungroup()

# Add more attributes for consistency with rgb and hsi data
#attributes$SCAN_TIME <- -1
#attributes$SCAN_DATE <- -1
attributes$frame_nr <- -1

# Add the attributes to xray data
xray <- left_join(xray, attributes, by = 'POT_BARCODE')

# Reshape xray dataframe
id.vars.xray <- c("EXP ID", "POT_BARCODE", "VARIETY", "TREATMENT", "DFP",
"GROWTH_STAGE", "View", "frame_nr")

xray.long <- reshape2::melt(xray, id.vars = id.vars.xray,
variable.name = "variable")

# Export dataframe
write.csv(xray.long, path.xray.out, row.names=F)


# EOF

0 comments on commit 54df941

Please sign in to comment.