From 49f62151fffef713b737d8bcbecfc0fc032ca71d Mon Sep 17 00:00:00 2001 From: blackrim Date: Wed, 28 Feb 2024 13:06:29 -0500 Subject: [PATCH] updated --- src/join_paftol_tax.py | 55 ++++++++++++++++++++++++++++++++++++++++++ src/process_paftol.py | 48 ++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 src/join_paftol_tax.py create mode 100644 src/process_paftol.py diff --git a/src/join_paftol_tax.py b/src/join_paftol_tax.py new file mode 100644 index 0000000..661c0ac --- /dev/null +++ b/src/join_paftol_tax.py @@ -0,0 +1,55 @@ +import sys +import node +import tree_reader +import tree_utils + +def process_tax(t): + return + +def get_mrca_wnms(n,t): + if len(n) == 1: + for i in t.leaves(): + if i.label == n[0]: + return i + else: + return tree_utils.get_mrca_wnms(n,t) + +if __name__ == "__main__": + if len(sys.argv) != 3: + print("python",sys.argv[0],"paf tax") + sys.exit(0) + + paf = tree_reader.read_tree_file_iter(sys.argv[1]).__next__() + tax = tree_reader.read_tree_file_iter(sys.argv[2]).__next__() + + for i in paf.leaves(): + i.data["original_name"] = i.label + i.label = i.label.split("_")[-1] + for i in tax.leaves(): + i.data["original_name"] = i.label + i.label = i.label.split("_")[0] + count= 0 + for i in paf.iternodes(order="postorder"): + if len(i.children) < 2: + continue + l = i.lvsnms() + print(l,file=sys.stderr) + p = get_mrca_wnms(l,tax) + chds = [] + for j in i.children: + k = get_mrca_wnms(j.lvsnms(),tax) + if k == None: + continue + chds.append(k) + if len(chds) == 1: + continue + n = node.Node() + for j in chds: + pp = j.parent # need to add here if it is non-monophyletic so that things get sunk as a result + pp.remove_child(j) + n.add_child(j) + p.add_child(n) + count += 1 + if count == 21: + break + print(tax.get_newick_repr(False)) diff --git a/src/process_paftol.py b/src/process_paftol.py new file mode 100644 index 0000000..21340cc --- /dev/null +++ b/src/process_paftol.py @@ -0,0 +1,48 @@ +import sys + +import node +import tree_reader + +def get_name(nm): + nm = nm.replace("'","").replace(" ","_") + nm = "_".join(nm.split("_")[:-1]) + return nm + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("python",sys.argv[0],"tre") + sys.exit(0) + + t = tree_reader.read_tree_file_iter(sys.argv[1]).__next__() + for i in t.leaves(): + i.label = get_name(i.label) + + # check for monophyly and sink + going = True + while going: + going = False + for i in t.iternodes("postorder"): + lvs = i.leaves() + x = set(i.lvsnms()) + if len(x) == 1 and len(lvs) > 1: + going = True + p = i.parent + i.parent.children.remove(i) + i.parent = None + nd = node.Node() + nd.istip = True + nd.label = list(x)[0] + p.add_child(nd) + break + going = True + while going: + going = False + for i in t.iternodes(): + if len(i.children) == 1: + p = i.parent + c = i.children[0] + p.children.remove(i) + i.parent = None + p.add_child(c) + going = True + print(t.get_newick_repr(False)+";")