Skip to content

Commit

Permalink
ensemble id to gene name
Browse files Browse the repository at this point in the history
  • Loading branch information
Zhongli Jiang committed May 1, 2022
1 parent 0ea5df5 commit 23401c4
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion script/adj/protein_coding_gtex.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ names(POS) <-c("chr","start","end","gene")
gtf_cut <- gtf[which(gtf$gene_id %in% as.matrix(POS[, 4])), ]
idx_pc <- (gtf_cut[ ,12]=="protein_coding")&(gtf_cut[ ,7]=="gene")

POS <- unique(gtf_cut[idx_pc, c(10, 1, 2, 3)])
#ensembl id
#POS <- unique(gtf_cut[idx_pc, c(10, 1, 2, 3)])
#gene name
POS <- unique(gtf_cut[idx_pc, c(13, 1, 2, 3)])
names(POS) <- c("gene","chr","start","end")
##16762 protein coding genes

Expand All @@ -30,6 +33,7 @@ distance=POS$end-POS$start
idx <- which(distance<=2.3e6)

POS <- POS[idx,] #16761 genes left
POS[, 2] <- paste0("chr", POS[, 2])
data <- data[,idx]
data_rmpc <- data_rmpc[, idx]

Expand Down

0 comments on commit 23401c4

Please sign in to comment.