1 | #! /usr/bin/env python |
---|
2 | import math |
---|
3 | from ete2 import Tree |
---|
4 | |
---|
5 | class erlang: |
---|
6 | def __init__(self): |
---|
7 | pass |
---|
8 | |
---|
9 | def one_tail_test(self, rate, k, x): |
---|
10 | """rate: estimated branching rate from reference tree |
---|
11 | k: node height |
---|
12 | x: placement branch length""" |
---|
13 | p = 0.0 |
---|
14 | for n in range(k): |
---|
15 | p = p + (1.0/float(math.factorial(n))) * math.exp((-rate)*x) * math.pow(rate*x, n) |
---|
16 | return p |
---|
17 | |
---|
18 | class tree_param: |
---|
19 | def __init__(self, tree, origin_taxonomy): |
---|
20 | """tree: rooted and branch labled tree in newick format |
---|
21 | origin_taxonomy: a dictionary of leaf name and taxonomy ranks""" |
---|
22 | self.tree = tree |
---|
23 | self.taxonomy = origin_taxonomy |
---|
24 | |
---|
25 | def get_speciation_rate(self): |
---|
26 | #pruning the input tree such that each species only appear once |
---|
27 | species = set() |
---|
28 | keepseqs = [] |
---|
29 | for name in self.taxonomy.keys(): |
---|
30 | ranks = self.taxonomy[name] |
---|
31 | sp = ranks[-1] |
---|
32 | if sp == "-": |
---|
33 | keepseqs.append(name) |
---|
34 | else: |
---|
35 | if not sp in species: |
---|
36 | keepseqs.append(name) |
---|
37 | species.add(sp) |
---|
38 | root = Tree(self.tree) |
---|
39 | root.prune(keepseqs, preserve_branch_length=True) |
---|
40 | sumbr = 0.0 |
---|
41 | cnt = 0.0 |
---|
42 | for node in root.traverse(strategy = "preorder"): |
---|
43 | sumbr = sumbr + node.dist |
---|
44 | cnt = cnt + 1.0 |
---|
45 | return float(cnt) / float(sumbr) |
---|
46 | |
---|
47 | def get_speciation_rate_fast(self): |
---|
48 | """ETE2 prune() function is extremely slow on large trees, so |
---|
49 | this function don't use it and instead just removes "redundant" |
---|
50 | species-level nodes one-by-one""" |
---|
51 | |
---|
52 | species = set() |
---|
53 | root = Tree(self.tree) |
---|
54 | |
---|
55 | name2node = {} |
---|
56 | for node in root.traverse(strategy = "postorder"): |
---|
57 | if node.is_leaf(): |
---|
58 | name2node[node.name] = node |
---|
59 | |
---|
60 | #pruning the input tree such that each species only appear once |
---|
61 | for name in self.taxonomy.keys(): |
---|
62 | ranks = self.taxonomy[name] |
---|
63 | sp = ranks[-1] |
---|
64 | if sp != "-": |
---|
65 | if sp in species: |
---|
66 | node = name2node.get(name, None) |
---|
67 | if node: |
---|
68 | node.delete(preserve_branch_length=True) |
---|
69 | else: |
---|
70 | raise ValueError("Node names not found in the tree: " + name) |
---|
71 | else: |
---|
72 | species.add(sp) |
---|
73 | |
---|
74 | # traverse the pruned tree, counting the number of speciation events and |
---|
75 | # summing up the branch lengths |
---|
76 | sumbr = 0.0 |
---|
77 | cnt = 0 |
---|
78 | for node in root.traverse(strategy = "preorder"): |
---|
79 | sumbr += node.dist |
---|
80 | cnt += 1 |
---|
81 | |
---|
82 | # sp_rate = number_of_sp_events / sum_of_branch_lengts |
---|
83 | return float(cnt) / float(sumbr) |
---|
84 | |
---|
85 | def get_nodesheight(self): |
---|
86 | root = Tree(self.tree) |
---|
87 | nh_map = {} |
---|
88 | for node in root.traverse(strategy = "preorder"): |
---|
89 | if hasattr(node, "B"): |
---|
90 | height = node.get_closest_leaf(topology_only=True) |
---|
91 | #height = node.get_farthest_leaf(topology_only=True) |
---|
92 | nh_map[node.B] = height[1] + 1 |
---|
93 | |
---|
94 | return nh_map |
---|
95 | |
---|
96 | if __name__ == "__main__": |
---|
97 | print("This is erlang.py main") |
---|
98 | el = erlang() |
---|
99 | print el.one_tail_test(rate = 17, k = 1, x = 0.221977) |
---|