-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmindist.py
executable file
·182 lines (159 loc) · 6.09 KB
/
mindist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#!/usr/bin/env python3
"""
DESCRIPTION
Calculate distances between all atoms
in selection1 and selection2, then make a
distance object for the smallest distance.
If you want to calculate the shortest distance for a very
large selection, consider installing anaconda (https://www.anaconda.com/).
Anaconda provides numpy which is needed for faster calculations.
ARGUMENTS
selection1 : selection string for atoms in the first selection
selection2 : selection string for atoms in the selection selection
n : number of distances to calculate (default: 1)
t : threshold of number of atoms if using non-numpy calculation (default: 500)
unique : only one distance per residue (default: 0 (0=no, 1=yes))
EXAMPLE
mindist chain A, chain D
mindist chain A, chain D, n=10, unique=1
"""
author = "Matthijs J. Tadema, MSc (2020)"
version = 20201213
from pymol import cmd
import warnings
warnings.simplefilter('error', ResourceWarning)
try:
import numpy as np
except:
warnings.warn("Using mindist without numpy; limit your selections to few atoms.")
def gen_index(selection) -> int:
"convert the selection to a list of unique indices"
model = cmd.get_model(selection)
for at in model.atom:
yield int(at.index)
def gen_pairs(selection1, selection2, *, t=500) -> tuple:
"Exhaustively generate index pairs for all atoms in selection"
set_selection1 = set(list(gen_index(selection1)))
set_selection2 = set(list(gen_index(selection2)))
# Check if not selecting too many atoms
l1 = len(set_selection1)
l2 = len(set_selection2)
l = sum([l1, l2])
if l > t:
msg = f"Using slow (non numpy) implementation with too many atoms ({l}> {t}). Consider selecting fewer atoms or installing numpy/conda."
warnings.warn(msg, ResourceWarning)
# ensure that all pairs are unique
diff_selection2 = set_selection1.difference(set_selection2)
for i in set_selection1:
for j in set_selection2:
yield (i, j)
def get_distances(selection1, selection2, **kwargs) -> float:
"Calculate distances for all atoms in selection"
for at1, at2 in gen_pairs(selection1, selection2, **kwargs):
d = float(cmd.get_distance(f"index {at1}", f"index {at2}"))
yield (at1, at2), d
def get_mindist(selection1, selection2, **kwargs):
"""
Terribly slow implementation.
Warn if selecting too many atoms
t: int, threshold value
"""
raise NotImplementedError("Have to fix this again")
all_distances = {pair: d for pair, d in get_distances(selection1, selection2, **kwargs)}
sortkey = lambda x: all_distances[x]
min_pairs = sorted(all_distances.keys(), key=sortkey)
for min_pair in min_pairs:
yield min_pair
class Selection:
"""
Generate indices and an object name
"""
def __init__(self, sele_str):
"""
Make a sele_str object to keep some stuff together
"""
index = []
coord = []
obj = cmd.get_object_list(sele_str)
if len(obj) != 1:
raise NotImplementedError("Selection should be in a single object")
for at in cmd.get_model(sele_str).atom:
x, y, z = [float(n) for n in at.coord]
coord.append((x, y, z))
index.append(int(at.index))
self.index = index
self.coord = coord
self.obj = obj[0]
def remove_dupes(self, sele):
"""
Remove duplicates
"""
tmp = []
# if the objects are different, there are no duplicates
if self.obj != sele.obj:
return
for i in self.index:
if not i in sele.index:
tmp.append(i)
self.index = tmp
def get_mindist_np(sele_1, sele_2):
"""
Calculate distances using numpy
"""
# Calculate distance matrix
coord_1 = np.array(sele_1.coord)
coord_2 = np.array(sele_2.coord)
matrix = np.subtract(coord_1[:,None], coord_2[None,:])
dm = np.linalg.norm(matrix, axis=2)
# Min index of the matrix
sort_index = np.argsort(dm, axis=None)
for idx in sort_index:
# Min indices unraveled
idx1, idx2 = np.unravel_index(idx, np.shape(dm))
# Back to atom indices
min_pair = (sele_1.index[idx1], sele_2.index[idx2])
yield min_pair
def idx_to_resi(idx: int) -> int:
"Get the residue number corresponding to the atom index"
model = cmd.get_model(f"index {idx}")
resi = model.atom[0].resi
resi = int(resi)
return resi
ir = idx_to_resi # abbrev
def mindist(selection1, selection2, n=1, t=500, unique=0, _legacy=0):
# Create selection objects
sele_1 = Selection(selection1)
sele_2 = Selection(selection2)
sele_2.remove_dupes(sele_1)
try:
if int(_legacy) != 0:
raise ImportError("Used for testing without numpy")
gen_mindist = get_mindist_np(sele_1, sele_2)
except (NameError, ImportError, NotImplementedError):
gen_mindist = get_mindist(selection1, selection2, t=500)
resi_pairs = [] # keep track of residue pairs in case we want to skip
count = 0
while count < int(n):
try:
min_pair = next(gen_mindist)
except StopIteration:
# it could be that < n pairs were selected
break
a, b = min_pair
resi_pair = {ir(a), ir(b)}
if unique == 1 and resi_pair in resi_pairs:
# skip if we want only unique residues
# distances are sorted anyway so we'll get the shortes distance
continue
else:
resi_pairs.append(resi_pair)
cmd.distance(f"mindist_{a}_{b}", f"index {a} and {sele_1.obj}", f"index {b} and {sele_2.obj}")
count += 1
mindist.__doc__ = __doc__
cmd.extend('mindist', mindist)
cmd.auto_arg[0]['mindist'] = [cmd.selection_sc, 'selection 1', ', ']
cmd.auto_arg[1]['mindist'] = [cmd.selection_sc, 'selection 2', ', ']
cmd.auto_arg[2]['mindist'] = ['', '#distances', '']
cmd.auto_arg[3]['mindist'] = ['', 'threshold', '']
# For some reason pymol doesn't want to add a 5th one...
#cmd.auto_arg[4]['mindist'] = ['', 'unique res', '']