-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmsa2qubo.py
186 lines (145 loc) · 5.53 KB
/
msa2qubo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/usr/bin/env python3
"""msa2qubo.py: Converts a fasta file representing a multiple sequence alignment into a QUBO format file."""
import argparse
import sys
from Bio import SeqIO
from bvc import BVC
import weightings
__author__ = "Dan Mapleson, Luis Yanes, Katie Barr, Sophie Kirkwood and Tim Stitt"
__copyright__ = "Copyright 2016, Quantum MSA"
__credits__ = ["Dan Mapleson", "Luis Yanes", "Katie Barr",
"Sophie Kirkwood", "Tim Stitt"]
__license__ = "GPLv3"
__version__ = "0.0.1"
__maintainer__ = "Dan Mapleson,"
__email__ = "daniel.mapleson@earlham.ac.uk"
__status__ = "Prototype"
class Msa2Qubo:
def __init__(self, input, output, P, delta, l0, l1, l2, reduced=False, verbose=False, scoring = None):
self.data = []
self.input = input
self.output = output
self.P = P
self.delta = delta
self.l0 = l0
self.l1 = l1
self.l2 = l2
self.reduced = reduced
self.verbose = verbose
self.scoring = scoring
self.energy = 0.0
self.active = []
self.bvc = None
def run(self):
if self.l0 < self.l1:
print("-l0 must be larger than -l1")
exit(1)
if self.l1 <= 0.0:
print("-l1 must be larger than 0.0")
print(1)
if self.l2 <= 1.0:
print("-l2 must be > 1.0")
exit(1)
if self.delta <= 1.0:
print("-d must be > 1.0")
exit(1)
if self.P < 1:
print("-P must be >= 1")
exit(1)
weights = None
if self.scoring:
print("Loading " + self.scoring + " scoring matrix...", end="")
weights = weightings.loadScoring(self.scoring)
print(" done")
print("Loading input into memory...", end="")
handle = open(self.input, "rU")
records = list(SeqIO.parse(handle, "fasta"))
handle.close()
print(" done")
print()
print("Input:")
# Collect variables
self.bvc = BVC(P=self.P, d=self.delta, l0=self.l0, l1=self.l1, l2=self.l2, reduced=self.reduced, weights=weights)
for r in records:
self.bvc.add_record(r)
# Input will only be small so just output every sequence found
print(">" + r.id)
print(r.seq)
print()
# Print current state of variables
if self.verbose:
print(self.bvc)
if self.bvc.p() >= self.bvc.m():
print("P (max gap size) is too large. Please ensure that p (bits per gap) is significantly less than m (bits per position in solution space)")
exit(1)
# Create matrix
print()
print("Creating W matrix ...", end="")
sys.stdout.flush()
self.bvc.createW()
# print (m)
print(" done")
self.bvc.createBVMatrix(intmode=True, verbose=self.verbose)
if self.verbose:
self.bvc.printIntegerCoefficients()
print("Creating matrix of coefficients of binary variables ...", end="")
sys.stdout.flush()
self.bvc.createBVMatrix(verbose=self.verbose)
print(" done")
print("Number of active binary variables:", self.bvc.calcActiveBVs())
self.active = self.bvc.active
self.energy = -self.bvc.energy
print("Target energy:", self.energy)
# Write QUBO file to disk
print("Writing QUBO output to disk ...", end="")
sys.stdout.flush()
self.bvc.writeQUBO(self.output, self.input)
print(" done")
print("Output saved to:" + self.output)
sys.stdout.flush()
if self.verbose:
self.bvc.plotMatrix(self.output + ".png")
# Save settings file
print()
self.bvc.save_settings(self.output + ".settings")
print("Saved settings to: " + self.output + ".settings")
def main():
parser = argparse.ArgumentParser("Convert Fasta file containing multiple sequences to align, into QUBO format")
parser.add_argument("input", help="The file containing sequences to align (to be converted into QUBO)")
parser.add_argument("-o", "--output", required=True,
help="The output file, containing the QUBO representation of the MSA problem")
parser.add_argument("-P", type=int, default=1, help="The maximum gap size allowed in the MSA (will round up to nearest power of 2)")
parser.add_argument("-d", "--delta", type=float, default=2.0, help="Delta. The scaling factor to apply when converting products of 3BVs to 2BVs.")
parser.add_argument("-l0", "--invalid_position_penalty", type=float, default=10.0,
help="The penalty to apply for invalid sequence ordering (must be much larger than --gap_weighting)")
parser.add_argument("-l1", "--gap_penalty", type=float, default=0.1,
help="The penalty to apply to any detected gaps")
parser.add_argument("-l2", "--invalid_reward_penalty", type=float, default=100.0,
help="The penalty to apply to matches in different columns (must be greater than 1.0)")
parser.add_argument("-r", "--reduced", action='store_true', default=False,
help="Run in reduced mode, only E0 and E1 active")
parser.add_argument("-s", "--scoring", default=None,
help="The scoring system to use. Leave unset to use score of 1 for a match and 0 for mismatch. Available options: [BLOSUM62]")
parser.add_argument("-v", "--verbose", action='store_true', default=False, help="Display extra information")
args = parser.parse_args()
if args.invalid_position_penalty < args.gap_penalty:
print("-l0 must be larger than -l1")
exit(1)
if args.gap_penalty <= 0.0:
print("-l1 must be larger than 0.0")
print(1)
if args.invalid_reward_penalty <= 1.0:
print("-l2 must be > 1.0")
exit(1)
if args.delta <= 1.0:
print("-d must be > 1.0")
exit(1)
if args.P < 1:
print("-P must be >= 1")
exit(1)
m2q = Msa2Qubo(input=args.input, output=args.output, P=args.P, delta=args.delta, l0=args.position_weighting, l1=args.gap_weighting, l2=args.reward_weighting,
reduced=args.reduced, scoring=args.scoring, verbose=args.verbose)
m2q.run()
if __name__ == "__main__":
# stuff only to run when not called via 'import' here
main()