| 1 | #! /usr/bin/env python |
|---|
| 2 | import math |
|---|
| 3 | from ete2 import Tree |
|---|
| 4 | |
|---|
| 5 | class erlang: |
|---|
| 6 | def __init__(self): |
|---|
| 7 | pass |
|---|
| 8 | |
|---|
| 9 | def one_tail_test(self, rate, k, x): |
|---|
| 10 | """rate: estimated branching rate from reference tree |
|---|
| 11 | k: node height |
|---|
| 12 | x: placement branch length""" |
|---|
| 13 | p = 0.0 |
|---|
| 14 | for n in range(k): |
|---|
| 15 | p = p + (1.0/float(math.factorial(n))) * math.exp((-rate)*x) * math.pow(rate*x, n) |
|---|
| 16 | return p |
|---|
| 17 | |
|---|
| 18 | class tree_param: |
|---|
| 19 | def __init__(self, tree, origin_taxonomy): |
|---|
| 20 | """tree: rooted and branch labled tree in newick format |
|---|
| 21 | origin_taxonomy: a dictionary of leaf name and taxonomy ranks""" |
|---|
| 22 | self.tree = tree |
|---|
| 23 | self.taxonomy = origin_taxonomy |
|---|
| 24 | |
|---|
| 25 | def get_speciation_rate(self): |
|---|
| 26 | #pruning the input tree such that each species only appear once |
|---|
| 27 | species = set() |
|---|
| 28 | keepseqs = [] |
|---|
| 29 | for name in self.taxonomy.keys(): |
|---|
| 30 | ranks = self.taxonomy[name] |
|---|
| 31 | sp = ranks[-1] |
|---|
| 32 | if sp == "-": |
|---|
| 33 | keepseqs.append(name) |
|---|
| 34 | else: |
|---|
| 35 | if not sp in species: |
|---|
| 36 | keepseqs.append(name) |
|---|
| 37 | species.add(sp) |
|---|
| 38 | root = Tree(self.tree) |
|---|
| 39 | root.prune(keepseqs, preserve_branch_length=True) |
|---|
| 40 | sumbr = 0.0 |
|---|
| 41 | cnt = 0.0 |
|---|
| 42 | for node in root.traverse(strategy = "preorder"): |
|---|
| 43 | sumbr = sumbr + node.dist |
|---|
| 44 | cnt = cnt + 1.0 |
|---|
| 45 | return float(cnt) / float(sumbr) |
|---|
| 46 | |
|---|
| 47 | def get_speciation_rate_fast(self): |
|---|
| 48 | """ETE2 prune() function is extremely slow on large trees, so |
|---|
| 49 | this function don't use it and instead just removes "redundant" |
|---|
| 50 | species-level nodes one-by-one""" |
|---|
| 51 | |
|---|
| 52 | species = set() |
|---|
| 53 | root = Tree(self.tree) |
|---|
| 54 | |
|---|
| 55 | name2node = {} |
|---|
| 56 | for node in root.traverse(strategy = "postorder"): |
|---|
| 57 | if node.is_leaf(): |
|---|
| 58 | name2node[node.name] = node |
|---|
| 59 | |
|---|
| 60 | #pruning the input tree such that each species only appear once |
|---|
| 61 | for name in self.taxonomy.keys(): |
|---|
| 62 | ranks = self.taxonomy[name] |
|---|
| 63 | sp = ranks[-1] |
|---|
| 64 | if sp != "-": |
|---|
| 65 | if sp in species: |
|---|
| 66 | node = name2node.get(name, None) |
|---|
| 67 | if node: |
|---|
| 68 | node.delete(preserve_branch_length=True) |
|---|
| 69 | else: |
|---|
| 70 | raise ValueError("Node names not found in the tree: " + name) |
|---|
| 71 | else: |
|---|
| 72 | species.add(sp) |
|---|
| 73 | |
|---|
| 74 | # traverse the pruned tree, counting the number of speciation events and |
|---|
| 75 | # summing up the branch lengths |
|---|
| 76 | sumbr = 0.0 |
|---|
| 77 | cnt = 0 |
|---|
| 78 | for node in root.traverse(strategy = "preorder"): |
|---|
| 79 | sumbr += node.dist |
|---|
| 80 | cnt += 1 |
|---|
| 81 | |
|---|
| 82 | # sp_rate = number_of_sp_events / sum_of_branch_lengts |
|---|
| 83 | return float(cnt) / float(sumbr) |
|---|
| 84 | |
|---|
| 85 | def get_nodesheight(self): |
|---|
| 86 | root = Tree(self.tree) |
|---|
| 87 | nh_map = {} |
|---|
| 88 | for node in root.traverse(strategy = "preorder"): |
|---|
| 89 | if hasattr(node, "B"): |
|---|
| 90 | height = node.get_closest_leaf(topology_only=True) |
|---|
| 91 | #height = node.get_farthest_leaf(topology_only=True) |
|---|
| 92 | nh_map[node.B] = height[1] + 1 |
|---|
| 93 | |
|---|
| 94 | return nh_map |
|---|
| 95 | |
|---|
| 96 | if __name__ == "__main__": |
|---|
| 97 | print("This is erlang.py main") |
|---|
| 98 | el = erlang() |
|---|
| 99 | print el.one_tail_test(rate = 17, k = 1, x = 0.221977) |
|---|