Skip to content

Commit

Permalink
updated
Browse files Browse the repository at this point in the history
  • Loading branch information
blackrim committed Feb 28, 2024
1 parent 038fc76 commit 49f6215
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 0 deletions.
55 changes: 55 additions & 0 deletions src/join_paftol_tax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import sys
import node
import tree_reader
import tree_utils

def process_tax(t):
return

def get_mrca_wnms(n,t):
if len(n) == 1:
for i in t.leaves():
if i.label == n[0]:
return i
else:
return tree_utils.get_mrca_wnms(n,t)

if __name__ == "__main__":
if len(sys.argv) != 3:
print("python",sys.argv[0],"paf tax")
sys.exit(0)

paf = tree_reader.read_tree_file_iter(sys.argv[1]).__next__()
tax = tree_reader.read_tree_file_iter(sys.argv[2]).__next__()

for i in paf.leaves():
i.data["original_name"] = i.label
i.label = i.label.split("_")[-1]
for i in tax.leaves():
i.data["original_name"] = i.label
i.label = i.label.split("_")[0]
count= 0
for i in paf.iternodes(order="postorder"):
if len(i.children) < 2:
continue
l = i.lvsnms()
print(l,file=sys.stderr)
p = get_mrca_wnms(l,tax)
chds = []
for j in i.children:
k = get_mrca_wnms(j.lvsnms(),tax)
if k == None:
continue
chds.append(k)
if len(chds) == 1:
continue
n = node.Node()
for j in chds:
pp = j.parent # need to add here if it is non-monophyletic so that things get sunk as a result
pp.remove_child(j)
n.add_child(j)
p.add_child(n)
count += 1
if count == 21:
break
print(tax.get_newick_repr(False))
48 changes: 48 additions & 0 deletions src/process_paftol.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import sys

import node
import tree_reader

def get_name(nm):
nm = nm.replace("'","").replace(" ","_")
nm = "_".join(nm.split("_")[:-1])
return nm

if __name__ == "__main__":
if len(sys.argv) != 2:
print("python",sys.argv[0],"tre")
sys.exit(0)

t = tree_reader.read_tree_file_iter(sys.argv[1]).__next__()
for i in t.leaves():
i.label = get_name(i.label)

# check for monophyly and sink
going = True
while going:
going = False
for i in t.iternodes("postorder"):
lvs = i.leaves()
x = set(i.lvsnms())
if len(x) == 1 and len(lvs) > 1:
going = True
p = i.parent
i.parent.children.remove(i)
i.parent = None
nd = node.Node()
nd.istip = True
nd.label = list(x)[0]
p.add_child(nd)
break
going = True
while going:
going = False
for i in t.iternodes():
if len(i.children) == 1:
p = i.parent
c = i.children[0]
p.children.remove(i)
i.parent = None
p.add_child(c)
going = True
print(t.get_newick_repr(False)+";")

0 comments on commit 49f6215

Please # to comment.