-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathto_matrix.py
118 lines (79 loc) · 2.96 KB
/
to_matrix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import numpy as np
from phylo2vec.base.to_vector import (
_build_vector,
_find_cherries,
_order_cherries_no_parents,
)
def _reduce_with_bls(newick):
ancestry = []
bls = []
def do_reduce(ancestry, bls, newick):
for i, char in enumerate(newick):
if char == "(":
open_idx = i + 1
elif char == ")":
child1, child2 = newick[open_idx:i].split(",", 2)
parent = newick[i + 1 :].split(",", 1)[0].split(")", 1)[0]
child1, bl1 = child1.split(":")
child2, bl2 = child2.split(":")
parent = parent.split(":")[0]
ancestry.append(
[
int(child1),
int(child2),
int(parent),
]
)
bls.append([float(bl1), float(bl2)])
newick = newick[: open_idx - 1] + newick[i + 1 :]
return do_reduce(ancestry, bls, newick)
do_reduce(ancestry, bls, newick[:-1])
return np.array(ancestry, dtype=np.int16), np.array(bls, dtype=np.float16)
def _reduce_no_parents_with_bls(newick):
ancestry = []
bls = []
def do_reduce(ancestry, bls, newick):
for i, char in enumerate(newick):
if char == "(":
open_idx = i + 1
elif char == ")":
child1, child2 = newick[open_idx:i].split(",", 2)
child1, bl1 = child1.split(":")
child2, bl2 = child2.split(":")
child1 = int(child1)
child2 = int(child2)
ancestry.append([child1, child2, max(child1, child2)])
bls.append([bl1, bl2])
newick = newick.replace(
newick[open_idx - 1 : i + 1], f"{min(child1, child2)}"
)
return do_reduce(ancestry, bls, newick)
do_reduce(ancestry, bls, newick[:-1])
return np.array(ancestry, dtype=np.int16), np.array(bls, dtype=np.float16)
def to_matrix(newick):
"""
Convert a Newick string with parent labels and branch lengths to a matrix
This functions wraps a new _reduce function with branch lengths
and the base functions ```_find_cherries``` and ```_build_vector```
Parameters
----------
newick : str
Newick string for a tree
Returns
-------
m : numpy.ndarray
Phylo2Mat matrix
"""
ancestry, bls = _reduce_with_bls(newick)
cherries, idxs = _find_cherries(ancestry)
bls = bls[idxs]
v = _build_vector(cherries)
m = np.concatenate([v[:, None], bls.astype(np.float16)], axis=1)
return m
def to_matrix_no_parents(newick_no_parents):
ancestry, bls = _reduce_no_parents_with_bls(newick_no_parents)
cherries, idxs = _order_cherries_no_parents(ancestry)
bls = bls[idxs]
v = _build_vector(cherries)
m = np.concatenate([v[:, None], bls.astype(np.float16)], axis=1)
return m