-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path01_chromosome_lengths_from_UCSC.py
executable file
·80 lines (64 loc) · 2.58 KB
/
01_chromosome_lengths_from_UCSC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/python3
#
# This file is part of Progesterone pipeline.
#
# Progesterone pipeline is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Progesterone pipeline is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Progesterone pipeline. If not, see <https://www.gnu.org/licenses/>.
#
# mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A
# -A skips auto rehash
from utils.mysqldb import *
import os
#########################################
def main():
# The UCSC Genome Browser database: 2019 update.: pubmed id 30407534
pubmed_id = '30407534'
local_conf_file = "/home/ivana/.mysql_conf"
ucsc_conf_file = "/home/ivana/.ucsc_mysql_conf"
for dependency in [local_conf_file, ucsc_conf_file]:
if not os.path.exists(dependency):
print(dependency, "not found")
exit()
local_db = connect_to_mysql(local_conf_file)
local_cursor = local_db.cursor()
# autocommit is on by default, except when it is not
search_db(local_cursor,"set autocommit=1")
switch_to_db(local_cursor,'progesterone')
# store reference info
xref_id = store_xref(local_cursor, 'pubmed', pubmed_id)
species = {'hg18':'human','hg19':'human', 'mm9':'mouse'}
# note you should have the skip-auto-rehash option in .ucsc_myql_conf
# it is the equivalent to -A on the mysql command line
# means: no autocompletion, which makes mysql get up mych faster
ucsc_db = connect_to_mysql(ucsc_conf_file)
ucsc_cursor = ucsc_db.cursor()
for assembly in ["hg18","hg19", "mm9"]:
qry = "select chrom, size from %s.chromInfo" % assembly
rows = search_db(ucsc_cursor,qry)
if not rows or 'Error' in rows[0][0]:
search_db(ucsc_cursor,qry, verbose=True)
break
for row in rows:
[chrom, size] = row
if '_' in chrom: continue # we don't want to get _too_ general here
fixed_fields = {'species':species[assembly], 'chromosome':chrom, 'assembly':assembly, 'rtype':'chromosome'}
update_fields = {'rfrom':1, 'rto':int(size), 'xref_id':xref_id}
store_or_update(local_cursor, 'regions', fixed_fields, update_fields)
ucsc_cursor.close()
ucsc_db.close()
local_cursor.close()
local_db.close()
return True
#########################################
if __name__ == '__main__':
main()