-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDna.cpp
244 lines (202 loc) · 6.86 KB
/
Dna.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
//
// Created by arrouan on 01/10/18.
//
#include "Dna.h"
#include <cassert>
Dna::Dna(int length, Threefry::Gen &&rng) : seq_(length) {
// Generate a random genome
for (int32_t i = 0; i < length; i++) {
seq_[i] = '0' + rng.random(NB_BASE);
}
}
int Dna::length() const {
return seq_.size();
}
void Dna::save(gzFile backup_file) {
int dna_length = length();
gzwrite(backup_file, &dna_length, sizeof(dna_length));
gzwrite(backup_file, seq_.data(), dna_length * sizeof(seq_[0]));
}
void Dna::load(gzFile backup_file) {
int dna_length;
gzread(backup_file, &dna_length, sizeof(dna_length));
char tmp_seq[dna_length];
gzread(backup_file, tmp_seq, dna_length * sizeof(tmp_seq[0]));
seq_ = std::vector<char>(tmp_seq, tmp_seq + dna_length);
}
void Dna::set(int pos, char c) {
seq_[pos] = c;
}
/**
* Remove the DNA inbetween pos_1 and pos_2
*
* @param pos_1
* @param pos_2
*/
void Dna::remove(int pos_1, int pos_2) {
assert(pos_1 >= 0 && pos_2 >= pos_1 && pos_2 <= seq_.size());
seq_.erase(seq_.begin() + pos_1, seq_.begin() + pos_2);
}
/**
* Insert a sequence of a given length at a given position into the DNA of the Organism
*
* @param pos : where to insert the sequence
* @param seq : the sequence itself
* @param seq_length : the size of the sequence
*/
void Dna::insert(int pos, std::vector<char> seq) {
// Insert sequence 'seq' at position 'pos'
assert(pos >= 0 && pos < seq_.size());
seq_.insert(seq_.begin() + pos, seq.begin(), seq.end());
}
/**
* Insert a sequence of a given length at a given position into the DNA of the Organism
*
* @param pos : where to insert the sequence
* @param seq : the sequence itself
* @param seq_length : the size of the sequence
*/
void Dna::insert(int pos, Dna *seq) {
// Insert sequence 'seq' at position 'pos'
assert(pos >= 0 && pos < seq_.size());
seq_.insert(seq_.begin() + pos, seq->seq_.begin(), seq->seq_.end());
}
void Dna::do_switch(int pos) {
if (seq_[pos] == '0') seq_[pos] = '1';
else seq_[pos] = '0';
}
void Dna::do_duplication(int pos_1, int pos_2, int pos_3) {
// Duplicate segment [pos_1; pos_2[ and insert the duplicate before pos_3
char *duplicate_segment = NULL;
int32_t seg_length;
if (pos_1 < pos_2) {
//
// pos_1 pos_2 -> 0-
// | | - -
// 0---------------------------------> - -
// =============== - - pos_1
// tmp (copy) - -
// ----- |
// pos_2 <-'
//
std::vector<char> seq_dupl =
std::vector<char>(seq_.begin() + pos_1, seq_.begin() + pos_2);
insert(pos_3, seq_dupl);
} else { // if (pos_1 >= pos_2)
// The segment to duplicate includes the origin of replication.
// The copying process will be done in two steps.
//
// ,->
// pos_2 pos_1 | -> 0-
// | | - - pos_2
// 0---------------------------------> pos_1 - -
// ====== ======= - -
// tmp2 tmp1 - -
// -----
//
//
std::vector<char>
seq_dupl = std::vector<char>(seq_.begin() + pos_1, seq_.end());
seq_dupl.insert(seq_dupl.end(), seq_.begin(), seq_.begin() + pos_2);
insert(pos_3, seq_dupl);
}
}
int Dna::promoter_at(int pos) {
int prom_dist[PROM_SIZE];
for (int motif_id = 0; motif_id < PROM_SIZE; motif_id++) {
int search_pos = pos + motif_id;
if (search_pos >= seq_.size())
search_pos -= seq_.size();
// Searching for the promoter
prom_dist[motif_id] =
PROM_SEQ[motif_id] == seq_[search_pos] ? 0 : 1;
}
// Computing if a promoter exists at that position
int dist_lead = prom_dist[0] +
prom_dist[1] +
prom_dist[2] +
prom_dist[3] +
prom_dist[4] +
prom_dist[5] +
prom_dist[6] +
prom_dist[7] +
prom_dist[8] +
prom_dist[9] +
prom_dist[10] +
prom_dist[11] +
prom_dist[12] +
prom_dist[13] +
prom_dist[14] +
prom_dist[15] +
prom_dist[16] +
prom_dist[17] +
prom_dist[18] +
prom_dist[19] +
prom_dist[20] +
prom_dist[21];
return dist_lead;
}
// Given a, b, c, d boolean variable and X random boolean variable,
// a terminator look like : a b c d X X !d !c !b !a
int Dna::terminator_at(int pos) {
int term_dist[TERM_STEM_SIZE];
for (int motif_id = 0; motif_id < TERM_STEM_SIZE; motif_id++) {
int right = pos + motif_id;
int left = pos + (TERM_SIZE - 1) - motif_id;
// loop back the dna inf needed
if (right >= length()) right -= length();
if (left >= length()) left -= length();
// Search for the terminators
term_dist[motif_id] = seq_[right] != seq_[left] ? 1 : 0;
}
int dist_term_lead = term_dist[0] +
term_dist[1] +
term_dist[2] +
term_dist[3];
return dist_term_lead;
}
bool Dna::shine_dal_start(int pos) {
bool start = false;
int t_pos, k_t;
for (int k = 0; k < SHINE_DAL_SIZE + CODON_SIZE; k++) {
k_t = k >= SHINE_DAL_SIZE ? k + SD_START_SPACER : k;
t_pos = pos + k_t;
if (t_pos >= seq_.size())
t_pos -= seq_.size();
if (seq_[t_pos] == SHINE_DAL_SEQ[k_t]) {
start = true;
} else {
start = false;
break;
}
}
return start;
}
bool Dna::protein_stop(int pos) {
bool is_protein;
int t_k;
for (int k = 0; k < CODON_SIZE; k++) {
t_k = pos + k;
if (t_k >= seq_.size())
t_k -= seq_.size();
if (seq_[t_k] == PROTEIN_END[k]) {
is_protein = true;
} else {
is_protein = false;
break;
}
}
return is_protein;
}
int Dna::codon_at(int pos) {
int value = 0;
int t_pos;
for (int i = 0; i < CODON_SIZE; i++) {
t_pos = pos + i;
if (t_pos >= seq_.size())
t_pos -= seq_.size();
if (seq_[t_pos] == '1')
value += 1 << (CODON_SIZE - i - 1);
}
return value;
}