Skip to content

Commit

Permalink
Display cluster
Browse files Browse the repository at this point in the history
  • Loading branch information
Sungchan Oh committed Jul 11, 2024
1 parent 7cffa02 commit c26f5a9
Showing 1 changed file with 133 additions and 0 deletions.
133 changes: 133 additions & 0 deletions display_cluster.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
library(dplyr)
library(reshape2)
library(Rtsne)



# Path to input and output data
path.rgb.long <- ("./df_rgb_long.csv")
path.hsi.long <- ("./df_hsi_long.csv")
path.rpt <- ("./rpt_no_public.csv")

# List of varieties to be excluded, if any; otherwise, exclude.variety as "c()"
exclude.variety <-c("P1", "P2", "P3", "P4")

## Number of phenotypes demonstrating highest to n-th highest repeatability
## across all TREATMENT and GROWTH_STAGE
#n <- 30


case <- 1

drops.1 <- c("View", "frame_nr", "variable")



if (case==1){
drops.2 <- c("EXP.ID", "POT_BARCODE", "TREATMENT", "DFP", "GROWTH_STAGE")
}



# Load RGB and HSI data in long format
message("Loading data...")
df.rgb <- read.csv(path.rgb.long)
df.hsi <- read.csv(path.hsi.long)
df.rpt <- read.csv(path.rpt)

# Exclude varieties, as needed
if (length(exclude.variety)>0){
df.rgb <- df.rgb %>% filter(!VARIETY %in% exclude.variety)
df.hsi <- df.hsi %>% filter(!VARIETY %in% exclude.variety)
}

# Combine RGB and HSI data
df <- rbind(df.rgb, df.hsi)

# Rename variables
df$variable.concat <- paste(df$View, df$frame_nr, df$variable, sep="_")

# Remove columns used to define variables
df <- df[ , !(names(df) %in% drops.1)]

# Reshape data
form <- paste0("EXP.ID+POT_BARCODE+TREATMENT+VARIETY",
"+DFP+GROWTH_STAGE ~ variable.concat")
df <- reshape2::dcast(df, as.formula(form), value.var="value")








for (treatment in unique(df$TREATMENT)){
for (growth.stage in unique(df$GROWTH_STAGE)){
print(paste0("Generating plots for ",
treatment, ", ", growth.stage, " case..."))

# Subset data by treatment and growth stage
df.temp <- df[which(df$TREATMENT==treatment &
df$GROWTH_STAGE==growth.stage), ]

# Remove unnecessary columns for clustering
df.temp <- df.temp[ , !(names(df.temp) %in% drops.2)]

# Select complete columns (no NaN, infinite) for analysis
df.temp <- do.call(data.frame,
lapply(df.temp,
function(x) replace(x, is.infinite(x),NA)))
df.temp <- df.temp[ , colSums(is.na(df.temp))==0]
if (nrow(df.temp)<5) next



if (case==1){

# Set label as factor (VARIETY)
df.temp$VARIETY <- as.factor(df.temp$VARIETY)

# TODO only for byr
df.temp$VARIETY <- factor(df.temp$VARIETY, levels = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "AA", "BB", "CC", "DD", "EE", "FF", "GG", "HH", "II", "JJ", "KK", "LL", "MM", "NN"))

# Get training data
num.train <- 0.8 * nrow(df.temp)
set.seed(1)
rows <- sample(1:nrow(df.temp), num.train)
train <- df.temp[rows, ]

# T-SNE
colors = rainbow(length(unique(df.temp$VARIETY)))
names(colors) = unique(df.temp$VARIETY)
for (pp in c(5, 10, 20, 40)){ ## pp in [5,50]
for (iter in c(10, 15, 20, 30, 40, 50, 100, 200, 500, 1000, 2000, 5000)){
tsne <- Rtsne(train[,-1],
dims=2,
perplexity=pp,
verbose=F,
max_iter=iter)

# Visuzlize clusters
png(paste0("./tsne_", treatment, "_", growth.stage, "_",
pp, "_", iter, ".png"))
par(mgp=c(2.5,1,0))
plot(tsne$Y, t='n',
main=paste("tSNE", treatment, growth.stage, pp, iter),
xlab="tSNE dimension 1",
ylab="tSNE dimension 2",
"cex.main"=2, "cex.lab"=1.5)
text(tsne$Y, labels=train$VARIETY, col=colors[train$VARIETY])
dev.off()
}
}
}
}
}






# EOF

0 comments on commit c26f5a9

Please sign in to comment.