-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Sungchan Oh
committed
Jul 11, 2024
1 parent
7cffa02
commit c26f5a9
Showing
1 changed file
with
133 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,133 @@ | ||
| library(dplyr) | ||
| library(reshape2) | ||
| library(Rtsne) | ||
|
|
||
|
|
||
|
|
||
| # Path to input and output data | ||
| path.rgb.long <- ("./df_rgb_long.csv") | ||
| path.hsi.long <- ("./df_hsi_long.csv") | ||
| path.rpt <- ("./rpt_no_public.csv") | ||
|
|
||
| # List of varieties to be excluded, if any; otherwise, exclude.variety as "c()" | ||
| exclude.variety <-c("P1", "P2", "P3", "P4") | ||
|
|
||
| ## Number of phenotypes demonstrating highest to n-th highest repeatability | ||
| ## across all TREATMENT and GROWTH_STAGE | ||
| #n <- 30 | ||
|
|
||
|
|
||
| case <- 1 | ||
|
|
||
| drops.1 <- c("View", "frame_nr", "variable") | ||
|
|
||
|
|
||
|
|
||
| if (case==1){ | ||
| drops.2 <- c("EXP.ID", "POT_BARCODE", "TREATMENT", "DFP", "GROWTH_STAGE") | ||
| } | ||
|
|
||
|
|
||
|
|
||
| # Load RGB and HSI data in long format | ||
| message("Loading data...") | ||
| df.rgb <- read.csv(path.rgb.long) | ||
| df.hsi <- read.csv(path.hsi.long) | ||
| df.rpt <- read.csv(path.rpt) | ||
|
|
||
| # Exclude varieties, as needed | ||
| if (length(exclude.variety)>0){ | ||
| df.rgb <- df.rgb %>% filter(!VARIETY %in% exclude.variety) | ||
| df.hsi <- df.hsi %>% filter(!VARIETY %in% exclude.variety) | ||
| } | ||
|
|
||
| # Combine RGB and HSI data | ||
| df <- rbind(df.rgb, df.hsi) | ||
|
|
||
| # Rename variables | ||
| df$variable.concat <- paste(df$View, df$frame_nr, df$variable, sep="_") | ||
|
|
||
| # Remove columns used to define variables | ||
| df <- df[ , !(names(df) %in% drops.1)] | ||
|
|
||
| # Reshape data | ||
| form <- paste0("EXP.ID+POT_BARCODE+TREATMENT+VARIETY", | ||
| "+DFP+GROWTH_STAGE ~ variable.concat") | ||
| df <- reshape2::dcast(df, as.formula(form), value.var="value") | ||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
| for (treatment in unique(df$TREATMENT)){ | ||
| for (growth.stage in unique(df$GROWTH_STAGE)){ | ||
| print(paste0("Generating plots for ", | ||
| treatment, ", ", growth.stage, " case...")) | ||
|
|
||
| # Subset data by treatment and growth stage | ||
| df.temp <- df[which(df$TREATMENT==treatment & | ||
| df$GROWTH_STAGE==growth.stage), ] | ||
|
|
||
| # Remove unnecessary columns for clustering | ||
| df.temp <- df.temp[ , !(names(df.temp) %in% drops.2)] | ||
|
|
||
| # Select complete columns (no NaN, infinite) for analysis | ||
| df.temp <- do.call(data.frame, | ||
| lapply(df.temp, | ||
| function(x) replace(x, is.infinite(x),NA))) | ||
| df.temp <- df.temp[ , colSums(is.na(df.temp))==0] | ||
| if (nrow(df.temp)<5) next | ||
|
|
||
|
|
||
|
|
||
| if (case==1){ | ||
|
|
||
| # Set label as factor (VARIETY) | ||
| df.temp$VARIETY <- as.factor(df.temp$VARIETY) | ||
|
|
||
| # TODO only for byr | ||
| df.temp$VARIETY <- factor(df.temp$VARIETY, levels = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "AA", "BB", "CC", "DD", "EE", "FF", "GG", "HH", "II", "JJ", "KK", "LL", "MM", "NN")) | ||
|
|
||
| # Get training data | ||
| num.train <- 0.8 * nrow(df.temp) | ||
| set.seed(1) | ||
| rows <- sample(1:nrow(df.temp), num.train) | ||
| train <- df.temp[rows, ] | ||
|
|
||
| # T-SNE | ||
| colors = rainbow(length(unique(df.temp$VARIETY))) | ||
| names(colors) = unique(df.temp$VARIETY) | ||
| for (pp in c(5, 10, 20, 40)){ ## pp in [5,50] | ||
| for (iter in c(10, 15, 20, 30, 40, 50, 100, 200, 500, 1000, 2000, 5000)){ | ||
| tsne <- Rtsne(train[,-1], | ||
| dims=2, | ||
| perplexity=pp, | ||
| verbose=F, | ||
| max_iter=iter) | ||
|
|
||
| # Visuzlize clusters | ||
| png(paste0("./tsne_", treatment, "_", growth.stage, "_", | ||
| pp, "_", iter, ".png")) | ||
| par(mgp=c(2.5,1,0)) | ||
| plot(tsne$Y, t='n', | ||
| main=paste("tSNE", treatment, growth.stage, pp, iter), | ||
| xlab="tSNE dimension 1", | ||
| ylab="tSNE dimension 2", | ||
| "cex.main"=2, "cex.lab"=1.5) | ||
| text(tsne$Y, labels=train$VARIETY, col=colors[train$VARIETY]) | ||
| dev.off() | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
| # EOF |