forked from SuiYinG2000/Signature-Correlation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGSEA_GeneSets_ID2Symbol.Rmd
141 lines (99 loc) · 4.01 KB
/
GSEA_GeneSets_ID2Symbol.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
---
title: "GSEA_GeneSets_Normalize2Symbol"
author: "Botao Wang"
date: "2022-09-26"
output: html_document
params:
workdir: "E:\\PostGraduate\\Botao\\Code_Verification\\SignatureGenes_GO-PW_Correlation"
libdir: "E:\\Code_Example\\R\\GeneSignature\\GeneSignature-Pathways_Correlation\\lib"
resultsdir: ".\\results\\"
picdir: ".\\pics\\"
GOGeneset: ".\\data\\c5.go.v2022.1.Hs.entrez.gmt"
PWGeneset: ".\\data\\c2.cp.v2022.1.Hs.entrez.gmt"
GeneNameAnno: ".\\data\\Homo_sapiens.gene_info.csv"
---
```{r setup, include=FALSE}
# rm(list = ls())
# .rs.restartR()
library("knitr")
# library("limma")
# library("ComplexHeatmap")
# library("circlize")
knitr::opts_chunk$set(echo = TRUE)
#* HTML显示结果时同时显示代码
file.exists(params$workdir)
knitr::opts_knit$set(root.dir = params$workdir)
#* 设定工作目录
getwd()
source(file.path(params$lib, "GeneSignature-Pathways_Correlation_Function.R"))
```
## Prepare data
```{r Preparation}
# Read in Gene Sets from GO&PW
goGeneSet <- readGMT(params$GOGeneset)
pathwayGeneSet <- readGMT(params$PWGeneset)
# Read in Gene Name Annotation
geneNameAnno <- read.csv(params$GeneNameAnno, check.names = F)
```
## Gene Name Conversion
```{r GeneName Conversion}
#===============================================================================
#---------------------------- Gene ID Conversion -------------------------------
#===============================================================================
# GO Gene-Sets ID Convert to Symbol
#===============================================================================
goGeneSet_new <- goGeneSet
message("提示:正在将ID统一规范为Symbol:")
progressBar <- txtProgressBar(style=3)
setNum <- 1
geneNum <- 0
for (i in 1:length(goGeneSet)){
setTxtProgressBar(progressBar, setNum/length(goGeneSet))
for (j in 1:length(goGeneSet[[i]])){
position <- which(geneNameAnno$GeneID == goGeneSet[[i]][j])
goGeneSet_new[[i]][j] <- geneNameAnno$Symbol[position]
geneNum <- geneNum+1
}
setNum <- setNum+1
}
close(progressBar)
message(paste("完成!GO共转换了", setNum, "个基因集,共转换了", geneNum, "个基因ID"))
# Pathway Gene-Sets ID Convert to Symbol
#===============================================================================
pathwayGeneSet_new <- pathwayGeneSet
message("提示:正在将ID统一规范为Symbol:")
progressBar <- txtProgressBar(style=3)
setNum <- 1
geneNum <- 0
for (i in 1:length(pathwayGeneSet)){
setTxtProgressBar(progressBar, setNum/length(pathwayGeneSet))
for (j in 1:length(pathwayGeneSet[[i]])){
position <- which(geneNameAnno$GeneID == pathwayGeneSet[[i]][j])
pathwayGeneSet_new[[i]][j] <- geneNameAnno$Symbol[position]
geneNum <- geneNum+1
}
setNum <- setNum+1
}
close(progressBar)
message(paste("完成!Pathway共转换了", setNum, "个基因集,共转换了", geneNum, "个基因ID"))
```
```{r save result}
# save go Gene set
#===============================================================================
goGeneSet_normalized_file <- file.path(params$workdir ,params$resultsdir, "goGeneSet_normalized.gmt")
file.create(goGeneSet_normalized_file)
for (i in 1:length(goGeneSet_new)){
goGeneSet_new[[i]][length(goGeneSet_new[[i]])] <- paste(goGeneSet_new[[i]][length(goGeneSet_new[[i]])], "\n", sep="")
line <- c(names(goGeneSet_new)[i], goGeneSet_new[[i]])
cat(line, file=goGeneSet_normalized_file, append=TRUE, sep="\t", fill=FALSE)
}
# save pathway Gene set
#===============================================================================
pathwayGeneSet_normalized_file <- file.path(params$workdir ,params$resultsdir, "pathwayGeneSet_normalized.gmt")
file.create(pathwayGeneSet_normalized_file)
for (i in 1:length(pathwayGeneSet_new)){
pathwayGeneSet_new[[i]][length(pathwayGeneSet_new[[i]])] <- paste(pathwayGeneSet_new[[i]][length(pathwayGeneSet_new[[i]])], "\n", sep="")
line <- c(names(pathwayGeneSet_new)[i], pathwayGeneSet_new[[i]])
cat(line, file=pathwayGeneSet_normalized_file, append=TRUE, sep="\t", fill=FALSE)
}
```