Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
DataExploration-AAPF/display_cluster.r
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
183 lines (127 sloc)
5.67 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(reshape2) | |
library(Rtsne) | |
library(magick) | |
# Path to input and output data | |
path.rgb.long <- ("./df_rgb_long.csv") | |
path.hsi.long <- ("./df_hsi_long.csv") | |
path.rpt <- ("./rpt_no_public.csv") | |
# List of varieties to be excluded, if any; otherwise, exclude.variety as "c()" | |
exclude.variety <-c("P1", "P2", "P3", "P4") | |
# Number of phenotypes for analysis | |
# phenotypes with 1st to n-th highest phenotypes are used | |
n.rpt <- 30 | |
# Clustering (tSNE) parameters | |
list.pp <- c(5, 10, 20, 40, 50) # Perplexity [5, 50] | |
list.iter <- seq(100, 500, by=5) # Number of iteration [,1000] | |
case <- 1 | |
# Columns to delete (1) | |
drops.1 <- c("View", "frame_nr", "variable") | |
if (case==1){ | |
# Columns to delete (2) | |
drops.2 <- c("EXP.ID", "POT_BARCODE", "TREATMENT", "DFP", "GROWTH_STAGE") | |
# Define the order of variety for displaying | |
variety.order = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", | |
"M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", | |
"Y", "Z", "AA", "BB", "CC", "DD", "EE", "FF", "GG", "HH", | |
"II", "JJ", "KK", "LL", "MM", "NN") | |
} | |
# Load RGB and HSI data in long format | |
message("Loading data...") | |
df.rgb <- read.csv(path.rgb.long) | |
df.hsi <- read.csv(path.hsi.long) | |
df.rpt <- read.csv(path.rpt) | |
# Exclude varieties, as needed | |
if (length(exclude.variety)>0){ | |
df.rgb <- df.rgb %>% filter(!VARIETY %in% exclude.variety) | |
df.hsi <- df.hsi %>% filter(!VARIETY %in% exclude.variety) | |
} | |
# Combine RGB and HSI data | |
df <- rbind(df.rgb, df.hsi) | |
# Create a new variable column by concatenating View, frame_nr, variable | |
df$variable.concat <- paste(df$View, df$frame_nr, df$variable, sep="_") | |
# Remove View, frame_nr, variable column | |
df <- df[ , !(names(df) %in% drops.1)] | |
# Reshape data | |
form <- paste0("EXP.ID+POT_BARCODE+TREATMENT+VARIETY", | |
"+DFP+GROWTH_STAGE ~ variable.concat") | |
df <- reshape2::dcast(df, as.formula(form), value.var="value") | |
# Order varieties for displaying | |
if (!is.null(variety.order)){ | |
df$VARIETY <- factor(df$VARIETY, levels = variety.order) | |
} | |
for (treatment in unique(df$TREATMENT)){ | |
for (growth.stage in unique(df$GROWTH_STAGE)){ | |
print(paste0("Generating plots for ", | |
treatment, ", ", growth.stage, " case...")) | |
# Subset data by treatment and growth stage | |
df.temp <- df[which(df$TREATMENT==treatment & | |
df$GROWTH_STAGE==growth.stage), ] | |
# Remove unnecessary columns for clustering | |
df.temp <- df.temp[ , !(names(df.temp) %in% drops.2)] | |
# Subset repeatability data by treatment and growth stage | |
df.rpt.temp <- df.rpt[which(df.rpt$TREATMENT==treatment & | |
df.rpt$GROWTH_STAGE==growth.stage), ] | |
df.rpt.temp <- df.rpt.temp[order(df.rpt.temp$rpt, decreasing=T), ] | |
df.rpt.temp <- df.rpt.temp[1:n.rpt, ] | |
# Create a new variable column by concatenating View, frame_nr, variable | |
df.rpt.temp$variable.concat <- paste(df.rpt.temp$View, | |
df.rpt.temp$frame_nr, | |
df.rpt.temp$variable, sep="_") | |
# Select variables (phenotypes) with high repeatability | |
if (case==1){ | |
df.temp <- df.temp[, c("VARIETY", df.rpt.temp$variable.concat)] | |
} | |
# Select complete columns for analysis (exclude NaN, Inf, -Inf) | |
df.temp[sapply(df.temp, is.infinite)] <- NA | |
df.temp <- df.temp[ , colSums(is.na(df.temp))==0] | |
if (nrow(df.temp)<5) next | |
# Scale data | |
for (c in 2:ncol(df.temp)) df.temp[, c] <- scale(df.temp[,c]) | |
if (case==1){ | |
# Set label as factor (VARIETY) | |
df.temp$VARIETY <- as.factor(df.temp$VARIETY) | |
# Get training data | |
num.train <- 0.8 * nrow(df.temp) | |
set.seed(1) | |
rows <- sample(1:nrow(df.temp), num.train) | |
train <- df.temp[rows, ] | |
# T-SNE | |
colors = rainbow(length(unique(df.temp$VARIETY))) | |
names(colors) = unique(df.temp$VARIETY) | |
for (pp in list.pp){ | |
list.img <- c() | |
for (iter in list.iter){ | |
set.seed(1) | |
tsne <- Rtsne(train[,-1], | |
dims=2, perplexity=pp, max_iter=iter, | |
verbose=F, check_duplicates = FALSE) | |
# Visuzlize clusters | |
fn <- paste0(paste("./tsne", treatment, growth.stage, | |
pp, iter, sep="_"), ".png") | |
list.img <- c(list.img, fn) | |
png(fn) | |
par(mgp=c(2.5,1,0)) | |
plot(tsne$Y, t='n', | |
main=paste("tSNE", treatment, growth.stage, pp, iter), | |
xlab="tSNE dimension 1", | |
ylab="tSNE dimension 2", | |
"cex.main"=2, "cex.lab"=1.5) | |
text(tsne$Y, labels=train$VARIETY, col=colors[train$VARIETY]) | |
dev.off() | |
} | |
# Create animated gif file | |
imgs <- lapply(list.img, image_read) | |
joined.img <- image_join(imgs) | |
animated.img <- image_animate(joined.img, fps = 10) | |
fn.gif <- paste0(paste("./tsne", treatment, growth.stage, pp, | |
sep="_"), ".gif") | |
image_write(image = animated.img, path=fn.gif) | |
message(paste0("Exported ", fn.gif)) | |
# Delete png files | |
for (f in list.img) unlink(f) | |
} | |
} | |
} | |
} | |
# EOF |