-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path12_gene_tad.py
executable file
·69 lines (51 loc) · 2.14 KB
/
12_gene_tad.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/python3
#
# This file is part of Progesterone pipeline.
#
# Progesterone pipeline is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Progesterone pipeline is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Progesterone pipeline. If not, see <https://www.gnu.org/licenses/>.
#
# the single input file from endometrial microvascular endothelial cells from
# Job Dekker lab, https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE105710
# the (bed) file with TADs can be found here
# https://www.encodeproject.org/experiments/ENCSR551IPY/
# (under processed data)
from utils.utils import *
from utils.mysqldb import *
#########################################
def main():
gene_name = "Hand2"
assembly = "hg19"
external_exp_id = "ENCFF633ORE"
conf_file = "/home/ivana/.mysql_conf"
for prerequisite in [ conf_file]:
if os.path.exists(prerequisite): continue
print(prerequisite, "not found")
exit()
db = connect_to_mysql(conf_file)
cursor = db.cursor()
switch_to_db(cursor,'progesterone')
# find xref_id for the experimental data file
tad_file_xref_id = get_xref_id(db,cursor,external_exp_id)
# find gene coordinates
[chromosome, strand, min_start, max_end] = get_gene_coords(db,cursor,gene_name,assembly)
# finally, use that info to find the TAD
[tad_start, tad_end] = get_tad_region(db, cursor, tad_file_xref_id, chromosome, min_start, max_end)
cursor.close()
db.close()
print ("{} {} {}:{}:{}-{}".format(gene_name, strand, assembly, chromosome, min_start, max_end) )
print ("TAD containing %s region: %s:%s:%d-%d length %d"%(gene_name, assembly, chromosome,
tad_start, tad_end, tad_end-tad_start+1))
#########################################
if __name__ == '__main__':
main()