1 | # -*- coding: utf-8 -*- |
---|
2 | # #START_LICENSE########################################################### |
---|
3 | # |
---|
4 | # |
---|
5 | # This file is part of the Environment for Tree Exploration program |
---|
6 | # (ETE). http://ete.cgenomics.org |
---|
7 | # |
---|
8 | # ETE is free software: you can redistribute it and/or modify it |
---|
9 | # under the terms of the GNU General Public License as published by |
---|
10 | # the Free Software Foundation, either version 3 of the License, or |
---|
11 | # (at your option) any later version. |
---|
12 | # |
---|
13 | # ETE is distributed in the hope that it will be useful, but WITHOUT |
---|
14 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
---|
15 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public |
---|
16 | # License for more details. |
---|
17 | # |
---|
18 | # You should have received a copy of the GNU General Public License |
---|
19 | # along with ETE. If not, see <http://www.gnu.org/licenses/>. |
---|
20 | # |
---|
21 | # |
---|
22 | # ABOUT THE ETE PACKAGE |
---|
23 | # ===================== |
---|
24 | # |
---|
25 | # ETE is distributed under the GPL copyleft license (2008-2011). |
---|
26 | # |
---|
27 | # If you make use of ETE in published work, please cite: |
---|
28 | # |
---|
29 | # Jaime Huerta-Cepas, Joaquin Dopazo and Toni Gabaldon. |
---|
30 | # ETE: a python Environment for Tree Exploration. Jaime BMC |
---|
31 | # Bioinformatics 2010,:24doi:10.1186/1471-2105-11-24 |
---|
32 | # |
---|
33 | # Note that extra references to the specific methods implemented in |
---|
34 | # the toolkit are available in the documentation. |
---|
35 | # |
---|
36 | # More info at http://ete.cgenomics.org |
---|
37 | # |
---|
38 | # |
---|
39 | # #END_LICENSE############################################################# |
---|
40 | __VERSION__="ete2-2.2rev1026" |
---|
41 | #START_LICENSE########################################################### |
---|
42 | # |
---|
43 | # Copyright (C) 2009 by Jaime Huerta Cepas. All rights reserved. |
---|
44 | # email: jhcepas@gmail.com |
---|
45 | # |
---|
46 | # This file is part of the Environment for Tree Exploration program (ETE). |
---|
47 | # http://ete.cgenomics.org |
---|
48 | # |
---|
49 | # ETE is free software: you can redistribute it and/or modify it |
---|
50 | # under the terms of the GNU General Public License as published by |
---|
51 | # the Free Software Foundation, either version 3 of the License, or |
---|
52 | # (at your option) any later version. |
---|
53 | # |
---|
54 | # ETE is distributed in the hope that it will be useful, |
---|
55 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
56 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
57 | # GNU General Public License for more details. |
---|
58 | # |
---|
59 | # You should have received a copy of the GNU General Public License |
---|
60 | # along with ETE. If not, see <http://www.gnu.org/licenses/>. |
---|
61 | # |
---|
62 | # #END_LICENSE############################################################# |
---|
63 | import os |
---|
64 | import cPickle |
---|
65 | import random |
---|
66 | import copy |
---|
67 | from collections import deque |
---|
68 | import itertools |
---|
69 | from newick import read_newick, write_newick |
---|
70 | |
---|
71 | # the following imports are necessary to set fixed styles and faces |
---|
72 | #try: |
---|
73 | # from ete2.treeview.main import NodeStyle, _FaceAreas, FaceContainer, FACE_POSITIONS |
---|
74 | # from ete2.treeview.faces import Face |
---|
75 | #except ImportError: |
---|
76 | # TREEVIEW = False |
---|
77 | #else: |
---|
78 | # TREEVIEW = True |
---|
79 | |
---|
80 | __all__ = ["Tree", "TreeNode"] |
---|
81 | |
---|
82 | TREEVIEW = False |
---|
83 | DEFAULT_COMPACT = False |
---|
84 | DEFAULT_SHOWINTERNAL = False |
---|
85 | DEFAULT_DIST = 1.0 |
---|
86 | DEFAULT_SUPPORT = 1.0 |
---|
87 | DEFAULT_NAME = "NoName" |
---|
88 | |
---|
89 | class TreeError(Exception): |
---|
90 | """ |
---|
91 | A problem occurred during a TreeNode operation |
---|
92 | """ |
---|
93 | def __init__(self, value=''): |
---|
94 | self.value = value |
---|
95 | def __str__(self): |
---|
96 | return repr(self.value) |
---|
97 | |
---|
98 | class TreeNode(object): |
---|
99 | """ |
---|
100 | TreeNode (Tree) class is used to store a tree structure. A tree |
---|
101 | consists of a collection of TreeNode instances connected in a |
---|
102 | hierarchical way. Trees can be loaded from the New Hampshire Newick |
---|
103 | format (newick). |
---|
104 | |
---|
105 | :argument newick: Path to the file containing the tree or, alternatively, |
---|
106 | the text string containing the same information. |
---|
107 | |
---|
108 | :argument 0 format: subnewick format |
---|
109 | |
---|
110 | .. table:: |
---|
111 | |
---|
112 | ====== ============================================== |
---|
113 | FORMAT DESCRIPTION |
---|
114 | ====== ============================================== |
---|
115 | 0 flexible with support values |
---|
116 | 1 flexible with internal node names |
---|
117 | 2 all branches + leaf names + internal supports |
---|
118 | 3 all branches + all names |
---|
119 | 4 leaf branches + leaf names |
---|
120 | 5 internal and leaf branches + leaf names |
---|
121 | 6 internal branches + leaf names |
---|
122 | 7 leaf branches + all names |
---|
123 | 8 all names |
---|
124 | 9 leaf names |
---|
125 | 100 topology only |
---|
126 | ====== ============================================== |
---|
127 | |
---|
128 | :returns: a tree node object which represents the base of the tree. |
---|
129 | |
---|
130 | ** Examples: ** |
---|
131 | |
---|
132 | :: |
---|
133 | |
---|
134 | t1 = Tree() # creates an empty tree |
---|
135 | t2 = Tree('(A:1,(B:1,(C:1,D:1):0.5):0.5);') |
---|
136 | t3 = Tree('/home/user/myNewickFile.txt') |
---|
137 | """ |
---|
138 | |
---|
139 | def _get_dist(self): |
---|
140 | return self._dist |
---|
141 | def _set_dist(self, value): |
---|
142 | try: |
---|
143 | self._dist = float(value) |
---|
144 | except ValueError: |
---|
145 | raise |
---|
146 | |
---|
147 | def _get_support(self): |
---|
148 | return self._support |
---|
149 | def _set_support(self, value): |
---|
150 | try: |
---|
151 | self._support = float(value) |
---|
152 | except ValueError: |
---|
153 | raise |
---|
154 | |
---|
155 | def _get_up(self): |
---|
156 | return self._up |
---|
157 | def _set_up(self, value): |
---|
158 | if type(value) == type(self) or value is None: |
---|
159 | self._up = value |
---|
160 | else: |
---|
161 | raise ValueError("bad node_up type") |
---|
162 | |
---|
163 | def _get_children(self): |
---|
164 | return self._children |
---|
165 | def _set_children(self, value): |
---|
166 | if type(value) == list and \ |
---|
167 | len(set([type(n)==type(self) for n in value]))<2: |
---|
168 | self._children = value |
---|
169 | else: |
---|
170 | raise ValueError("bad children type") |
---|
171 | |
---|
172 | def _get_style(self): |
---|
173 | if self._img_style is None: |
---|
174 | self._set_style(None) |
---|
175 | |
---|
176 | return self._img_style |
---|
177 | |
---|
178 | def _set_style(self, value): |
---|
179 | self.set_style(value) |
---|
180 | |
---|
181 | #: Branch length distance to parent node. Default = 0.0 |
---|
182 | img_style = property(fget=_get_style, fset=_set_style) |
---|
183 | |
---|
184 | #: Branch length distance to parent node. Default = 0.0 |
---|
185 | dist = property(fget=_get_dist, fset=_set_dist) |
---|
186 | #: Branch support for current node |
---|
187 | support = property(fget=_get_support, fset=_set_support) |
---|
188 | #: Pointer to parent node |
---|
189 | up = property(fget=_get_up, fset=_set_up) |
---|
190 | #: A list of children nodes |
---|
191 | children = property(fget=_get_children, fset=_set_children) |
---|
192 | |
---|
193 | def _set_face_areas(self, value): |
---|
194 | if isinstance(value, _FaceAreas): |
---|
195 | self._faces = value |
---|
196 | else: |
---|
197 | raise ValueError("[%s] is not a valid FaceAreas instance" %type(value)) |
---|
198 | |
---|
199 | def _get_face_areas(self): |
---|
200 | if not hasattr(self, "_faces"): |
---|
201 | self._faces = _FaceAreas() |
---|
202 | return self._faces |
---|
203 | |
---|
204 | faces = property(fget=_get_face_areas, \ |
---|
205 | fset=_set_face_areas) |
---|
206 | |
---|
207 | def __init__(self, newick=None, format=0, dist=None, support=None, |
---|
208 | name=None): |
---|
209 | self._children = [] |
---|
210 | self._up = None |
---|
211 | self._dist = DEFAULT_DIST |
---|
212 | self._support = DEFAULT_SUPPORT |
---|
213 | self._img_style = None |
---|
214 | self.features = set([]) |
---|
215 | # Add basic features |
---|
216 | self.features.update(["dist", "support", "name"]) |
---|
217 | if dist is not None: |
---|
218 | self.dist = dist |
---|
219 | if support is not None: |
---|
220 | self.support = support |
---|
221 | |
---|
222 | self.name = name if name is not None else DEFAULT_NAME |
---|
223 | |
---|
224 | # Initialize tree |
---|
225 | if newick is not None: |
---|
226 | read_newick(newick, root_node = self, format=format) |
---|
227 | |
---|
228 | def __nonzero__(self): |
---|
229 | return True |
---|
230 | |
---|
231 | def __repr__(self): |
---|
232 | return "Tree node '%s' (%s)" %(self.name, hex(self.__hash__())) |
---|
233 | |
---|
234 | def __and__(self, value): |
---|
235 | """ This allows to execute tree&'A' to obtain the descendant node |
---|
236 | whose name is A""" |
---|
237 | value=str(value) |
---|
238 | try: |
---|
239 | first_match = self.iter_search_nodes(name=value).next() |
---|
240 | return first_match |
---|
241 | except StopIteration: |
---|
242 | raise ValueError, "Node not found" |
---|
243 | |
---|
244 | def __add__(self, value): |
---|
245 | """ This allows to sum two trees.""" |
---|
246 | # Should a make the sum with two copies of the original trees? |
---|
247 | if type(value) == self.__class__: |
---|
248 | new_root = self.__class__() |
---|
249 | new_root.add_child(self) |
---|
250 | new_root.add_child(value) |
---|
251 | return new_root |
---|
252 | else: |
---|
253 | raise ValueError, "Invalid node type" |
---|
254 | |
---|
255 | def __str__(self): |
---|
256 | """ Print tree in newick format. """ |
---|
257 | return self.get_ascii(compact=DEFAULT_COMPACT, \ |
---|
258 | show_internal=DEFAULT_SHOWINTERNAL) |
---|
259 | |
---|
260 | def __contains__(self, item): |
---|
261 | """ Check if item belongs to this node. The 'item' argument must |
---|
262 | be a node instance or its associated name.""" |
---|
263 | if isinstance(item, self.__class__): |
---|
264 | return item in set(self.get_descendants()) |
---|
265 | elif type(item)==str: |
---|
266 | return item in set([n.name for n in self.traverse()]) |
---|
267 | |
---|
268 | def __len__(self): |
---|
269 | """Node len returns number of children.""" |
---|
270 | return len(self.get_leaves()) |
---|
271 | |
---|
272 | def __iter__(self): |
---|
273 | """ Iterator over leaf nodes""" |
---|
274 | return self.iter_leaves() |
---|
275 | |
---|
276 | def add_feature(self, pr_name, pr_value): |
---|
277 | """ |
---|
278 | Add or update a node's feature. |
---|
279 | """ |
---|
280 | setattr(self, pr_name, pr_value) |
---|
281 | self.features.add(pr_name) |
---|
282 | |
---|
283 | def add_features(self, **features): |
---|
284 | """ |
---|
285 | Add or update several features. """ |
---|
286 | for fname, fvalue in features.iteritems(): |
---|
287 | setattr(self, fname, fvalue) |
---|
288 | self.features.add(fname) |
---|
289 | |
---|
290 | def del_feature(self, pr_name): |
---|
291 | """ |
---|
292 | Permanently deletes a node's feature. |
---|
293 | """ |
---|
294 | if hasattr(self, pr_name): |
---|
295 | delattr(self, pr_name) |
---|
296 | self.features.remove(pr_name) |
---|
297 | |
---|
298 | # Topology management |
---|
299 | def add_child(self, child=None, name=None, dist=None, support=None): |
---|
300 | """ |
---|
301 | Adds a new child to this node. If child node is not suplied |
---|
302 | as an argument, a new node instance will be created. |
---|
303 | |
---|
304 | :argument None child: the node instance to be added as a child. |
---|
305 | :argument None name: the name that will be given to the child. |
---|
306 | :argument None dist: the distance from the node to the child. |
---|
307 | :argument None support': the support value of child partition. |
---|
308 | |
---|
309 | :returns: The child node instance |
---|
310 | |
---|
311 | """ |
---|
312 | if child is None: |
---|
313 | child = self.__class__() |
---|
314 | |
---|
315 | if name is not None: |
---|
316 | child.name = name |
---|
317 | if dist is not None: |
---|
318 | child.dist = dist |
---|
319 | if support is not None: |
---|
320 | child.support = support |
---|
321 | |
---|
322 | self.children.append(child) |
---|
323 | child.up = self |
---|
324 | return child |
---|
325 | |
---|
326 | def remove_child(self, child): |
---|
327 | """ |
---|
328 | Removes a child from this node (parent and child |
---|
329 | nodes still exit but are no longer connected). |
---|
330 | """ |
---|
331 | try: |
---|
332 | self.children.remove(child) |
---|
333 | except ValueError, e: |
---|
334 | raise TreeError, e |
---|
335 | else: |
---|
336 | child.up = None |
---|
337 | return child |
---|
338 | |
---|
339 | def add_sister(self, sister=None, name=None, dist=None): |
---|
340 | """ |
---|
341 | Adds a sister to this node. If sister node is not supplied |
---|
342 | as an argument, a new TreeNode instance will be created and |
---|
343 | returned. |
---|
344 | """ |
---|
345 | if self.up == None: |
---|
346 | raise TreeError("A parent node is required to add a sister") |
---|
347 | else: |
---|
348 | return self.up.add_child(child=sister, name=name, dist=dist) |
---|
349 | |
---|
350 | def remove_sister(self, sister=None): |
---|
351 | """ |
---|
352 | Removes a node's sister node. It has the same effect as |
---|
353 | **`TreeNode.up.remove_child(sister)`** |
---|
354 | |
---|
355 | If a sister node is not supplied, the first sister will be deleted |
---|
356 | and returned. |
---|
357 | |
---|
358 | :argument sister: A node instance |
---|
359 | |
---|
360 | :return: The node removed |
---|
361 | """ |
---|
362 | sisters = self.get_sisters() |
---|
363 | if len(sisters)>0: |
---|
364 | if sister==None: |
---|
365 | sister = sisters.pop(0) |
---|
366 | return self.up.remove_child(sister) |
---|
367 | |
---|
368 | def delete(self, prevent_nondicotomic=True, preserve_branch_length=False): |
---|
369 | """ |
---|
370 | Deletes node from the tree structure. Notice that this method |
---|
371 | makes 'disappear' the node from the tree structure. This means |
---|
372 | that children from the deleted node are transferred to the |
---|
373 | next available parent. |
---|
374 | |
---|
375 | :param True prevent_nondicotomic: When True (default), delete |
---|
376 | function will be execute recursively to prevent single-child |
---|
377 | nodes. |
---|
378 | |
---|
379 | :param False preserve_branch_length: If True, branch lengths |
---|
380 | of the deleted nodes are transferred (summed up) to its |
---|
381 | parent's branch, thus keeping original distances among nodes. |
---|
382 | |
---|
383 | **Example:** |
---|
384 | |
---|
385 | :: |
---|
386 | |
---|
387 | / C |
---|
388 | root-| |
---|
389 | | / B |
---|
390 | \--- H | |
---|
391 | \ A |
---|
392 | |
---|
393 | > root.delete(H) will produce this structure: |
---|
394 | |
---|
395 | / C |
---|
396 | | |
---|
397 | root-|--B |
---|
398 | | |
---|
399 | \ A |
---|
400 | |
---|
401 | """ |
---|
402 | parent = self.up |
---|
403 | if parent: |
---|
404 | for ch in self.children: |
---|
405 | if preserve_branch_length: |
---|
406 | ch.dist += self.dist |
---|
407 | parent.add_child(ch) |
---|
408 | |
---|
409 | parent.remove_child(self) |
---|
410 | |
---|
411 | # Avoids parents with only one child |
---|
412 | if prevent_nondicotomic and parent and\ |
---|
413 | len(parent.children)<2: |
---|
414 | parent.delete(prevent_nondicotomic=False, |
---|
415 | preserve_branch_length=preserve_branch_length) |
---|
416 | |
---|
417 | def detach(self): |
---|
418 | """ |
---|
419 | Detachs this node (and all its descendants) from its parent |
---|
420 | and returns the referent to itself. |
---|
421 | |
---|
422 | Detached node conserves all its structure of descendants, and can |
---|
423 | be attached to another node through the 'add_child' function. This |
---|
424 | mechanism can be seen as a cut and paste. |
---|
425 | """ |
---|
426 | |
---|
427 | if self.up: |
---|
428 | self.up.children.remove(self) |
---|
429 | self.up = None |
---|
430 | return self |
---|
431 | |
---|
432 | def prune(self, nodes, preserve_branch_length=False): |
---|
433 | """Prunes the topology of a node in order to conserve only a |
---|
434 | selected list of leaf or internal nodes. The minimum number of |
---|
435 | internal nodes (the deepest as possible) are kept to conserve |
---|
436 | the topological relationship among the provided list of nodes. |
---|
437 | |
---|
438 | :var nodes: a list of node names or node objects that must be kept |
---|
439 | |
---|
440 | :param False preserve_branch_length: If True, branch lengths |
---|
441 | of the deleted nodes are transferred (summed up) to its |
---|
442 | parent's branch, thus keeping original distances among nodes. |
---|
443 | |
---|
444 | **Examples:** |
---|
445 | |
---|
446 | :: |
---|
447 | |
---|
448 | t = Tree("(((A:0.1, B:0.01):0.001, C:0.0001):1.0[&&NHX:name=I], (D:0.00001):0.000001[&&NHX:name=J]):2.0[&&NHX:name=root];") |
---|
449 | node_C = t.search_nodes(name="C")[0] |
---|
450 | t.prune(["A","D", node_C]) |
---|
451 | print t |
---|
452 | |
---|
453 | """ |
---|
454 | def cmp_nodes(x, y): |
---|
455 | # if several nodes are in the same path of two kept nodes, |
---|
456 | # only one should be maintained. This prioritize internal |
---|
457 | # nodes that are already in the to_keep list and then |
---|
458 | # deeper nodes (closer to the leaves). |
---|
459 | if x in to_keep: |
---|
460 | if y not in to_keep: |
---|
461 | return -1 |
---|
462 | elif y in to_keep: |
---|
463 | return 0 |
---|
464 | elif n2depth[x] > n2depth[y]: |
---|
465 | return -1 |
---|
466 | elif n2depth[x] < n2depth[y]: |
---|
467 | return 1 |
---|
468 | else: |
---|
469 | return 0 |
---|
470 | |
---|
471 | to_keep = set(_translate_nodes(self, *nodes)) |
---|
472 | start, node2path = self.get_common_ancestor(to_keep, get_path=True) |
---|
473 | |
---|
474 | # Calculate which kept nodes are visiting the same nodes in |
---|
475 | # their path to the common ancestor. |
---|
476 | n2count = {} |
---|
477 | n2depth = {} |
---|
478 | for seed, path in node2path.iteritems(): |
---|
479 | for visited_node in path: |
---|
480 | if visited_node not in n2depth: |
---|
481 | depth = visited_node.get_distance(start, topology_only=True) |
---|
482 | n2depth[visited_node] = depth |
---|
483 | if visited_node is not seed: |
---|
484 | n2count.setdefault(visited_node, set()).add(seed) |
---|
485 | |
---|
486 | # if several internal nodes are in the path of exactly the |
---|
487 | # same kept nodes, only one should be maintain. |
---|
488 | visitors2nodes = {} |
---|
489 | for node, visitors in n2count.iteritems(): |
---|
490 | # keep nodes connection at least two other nodes |
---|
491 | if len(visitors)>1: |
---|
492 | visitor_key = frozenset(visitors) |
---|
493 | visitors2nodes.setdefault(visitor_key, set()).add(node) |
---|
494 | for visitors, nodes in visitors2nodes.iteritems(): |
---|
495 | s = sorted(nodes, cmp_nodes) |
---|
496 | to_keep.add(s[0]) |
---|
497 | |
---|
498 | # Detach unvisited branches |
---|
499 | if start is not self: |
---|
500 | start.detach() |
---|
501 | for n in self.get_children(): |
---|
502 | n.detach() |
---|
503 | for n in start.get_children(): |
---|
504 | self.add_child(child=n) |
---|
505 | |
---|
506 | for n in [self]+self.get_descendants(): |
---|
507 | if n not in to_keep: |
---|
508 | n.delete(prevent_nondicotomic=False, |
---|
509 | preserve_branch_length=preserve_branch_length) |
---|
510 | |
---|
511 | def swap_children(self): |
---|
512 | """ |
---|
513 | Swaps current children order. |
---|
514 | """ |
---|
515 | if len(self.children)>1: |
---|
516 | self.children.reverse() |
---|
517 | |
---|
518 | def get_children(self): |
---|
519 | """ |
---|
520 | Returns an independent list of node's children. |
---|
521 | """ |
---|
522 | return [ch for ch in self.children] |
---|
523 | |
---|
524 | def get_sisters(self): |
---|
525 | """ |
---|
526 | Returns an indepent list of sister nodes. |
---|
527 | """ |
---|
528 | if self.up!=None: |
---|
529 | return [ch for ch in self.up.children if ch!=self] |
---|
530 | else: |
---|
531 | return [] |
---|
532 | |
---|
533 | def iter_leaves(self, is_leaf_fn=None): |
---|
534 | """ |
---|
535 | Returns an iterator over the leaves under this node. |
---|
536 | |
---|
537 | :argument None is_leaf_fn: See :func:`TreeNode.traverse` for |
---|
538 | documentation. |
---|
539 | """ |
---|
540 | for n in self.traverse(strategy="preorder", is_leaf_fn=is_leaf_fn): |
---|
541 | if not is_leaf_fn: |
---|
542 | if n.is_leaf(): |
---|
543 | yield n |
---|
544 | else: |
---|
545 | if is_leaf_fn(n): |
---|
546 | yield n |
---|
547 | |
---|
548 | def get_leaves(self, is_leaf_fn=None): |
---|
549 | """ |
---|
550 | Returns the list of terminal nodes (leaves) under this node. |
---|
551 | |
---|
552 | :argument None is_leaf_fn: See :func:`TreeNode.traverse` for |
---|
553 | documentation. |
---|
554 | """ |
---|
555 | return [n for n in self.iter_leaves(is_leaf_fn=is_leaf_fn)] |
---|
556 | |
---|
557 | def iter_leaf_names(self, is_leaf_fn=None): |
---|
558 | """ |
---|
559 | Returns an iterator over the leaf names under this node. |
---|
560 | |
---|
561 | :argument None is_leaf_fn: See :func:`TreeNode.traverse` for |
---|
562 | documentation. |
---|
563 | """ |
---|
564 | for n in self.iter_leaves(is_leaf_fn=is_leaf_fn): |
---|
565 | yield n.name |
---|
566 | |
---|
567 | def get_leaf_names(self, is_leaf_fn=None): |
---|
568 | """ |
---|
569 | Returns the list of terminal node names under the current |
---|
570 | node. |
---|
571 | |
---|
572 | :argument None is_leaf_fn: See :func:`TreeNode.traverse` for |
---|
573 | documentation. |
---|
574 | """ |
---|
575 | return [name for name in self.iter_leaf_names(is_leaf_fn=is_leaf_fn)] |
---|
576 | |
---|
577 | def iter_descendants(self, strategy="levelorder", is_leaf_fn=None): |
---|
578 | """ |
---|
579 | Returns an iterator over all descendant nodes. |
---|
580 | |
---|
581 | :argument None is_leaf_fn: See :func:`TreeNode.traverse` for |
---|
582 | documentation. |
---|
583 | """ |
---|
584 | for n in self.traverse(strategy=strategy, is_leaf_fn=is_leaf_fn): |
---|
585 | if n is not self: |
---|
586 | yield n |
---|
587 | |
---|
588 | def get_descendants(self, strategy="levelorder", is_leaf_fn=None): |
---|
589 | """ |
---|
590 | Returns a list of all (leaves and internal) descendant nodes. |
---|
591 | |
---|
592 | :argument None is_leaf_fn: See :func:`TreeNode.traverse` for |
---|
593 | documentation. |
---|
594 | """ |
---|
595 | return [n for n in self.iter_descendants(strategy=strategy, \ |
---|
596 | is_leaf_fn=is_leaf_fn)] |
---|
597 | |
---|
598 | def traverse(self, strategy="levelorder", is_leaf_fn=None): |
---|
599 | """ |
---|
600 | Returns an iterator to traverse the tree structure under this |
---|
601 | node. |
---|
602 | |
---|
603 | :argument "levelorder" strategy: set the way in which tree |
---|
604 | will be traversed. Possible values are: "preorder" (first |
---|
605 | parent and then children) 'postorder' (first children and |
---|
606 | the parent) and "levelorder" (nodes are visited in order |
---|
607 | from root to leaves) |
---|
608 | |
---|
609 | :argument None is_leaf_fn: If supplied, ``is_leaf_fn`` |
---|
610 | function will be used to interrogate nodes about if they |
---|
611 | are terminal or internal. ``is_leaf_fn`` function should |
---|
612 | receive a node instance as first argument and return True |
---|
613 | or False. Use this argument to traverse a tree by |
---|
614 | dynamically collapsing internal nodes matching |
---|
615 | ``is_leaf_fn``. |
---|
616 | """ |
---|
617 | if strategy=="preorder": |
---|
618 | return self._iter_descendants_preorder(is_leaf_fn=is_leaf_fn) |
---|
619 | elif strategy=="levelorder": |
---|
620 | return self._iter_descendants_levelorder(is_leaf_fn=is_leaf_fn) |
---|
621 | elif strategy=="postorder": |
---|
622 | return self._iter_descendants_postorder(is_leaf_fn=is_leaf_fn) |
---|
623 | |
---|
624 | def _iter_descendants_postorder_recursive(self, is_leaf_fn=None): |
---|
625 | """ |
---|
626 | Iterate over all desdecendant nodes. |
---|
627 | """ |
---|
628 | if not is_leaf_fn or not is_leaf_fn(self): |
---|
629 | for ch in self.children: |
---|
630 | for node in ch._iter_descendants_postorder(is_leaf_fn=is_leaf_fn): |
---|
631 | yield node |
---|
632 | yield self |
---|
633 | |
---|
634 | def iter_prepostorder(self, is_leaf_fn=None): |
---|
635 | """ |
---|
636 | Iterate over all nodes in a tree yielding every node in both |
---|
637 | pre and post order. Each iteration returns a postorder flag |
---|
638 | (True if node is being visited in postorder) and a node |
---|
639 | instance. |
---|
640 | """ |
---|
641 | to_visit = [self] |
---|
642 | if is_leaf_fn is not None: |
---|
643 | _leaf = is_leaf_fn |
---|
644 | else: |
---|
645 | _leaf = self.__class__.is_leaf |
---|
646 | |
---|
647 | while to_visit: |
---|
648 | node = to_visit.pop(-1) |
---|
649 | try: |
---|
650 | node = node[1] |
---|
651 | except TypeError: |
---|
652 | # PREORDER ACTIONS |
---|
653 | yield (False, node) |
---|
654 | if not _leaf(node): |
---|
655 | # ADD CHILDREN |
---|
656 | to_visit.extend(reversed(node.children + [[1, node]])) |
---|
657 | else: |
---|
658 | #POSTORDER ACTIONS |
---|
659 | yield (True, node) |
---|
660 | |
---|
661 | def _iter_descendants_postorder(self, is_leaf_fn=None): |
---|
662 | to_visit = [self] |
---|
663 | if is_leaf_fn is not None: |
---|
664 | _leaf = is_leaf_fn |
---|
665 | else: |
---|
666 | _leaf = self.__class__.is_leaf |
---|
667 | |
---|
668 | while to_visit: |
---|
669 | node = to_visit.pop(-1) |
---|
670 | try: |
---|
671 | node = node[1] |
---|
672 | except TypeError: |
---|
673 | # PREORDER ACTIONS |
---|
674 | if not _leaf(node): |
---|
675 | # ADD CHILDREN |
---|
676 | to_visit.extend(reversed(node.children + [[1, node]])) |
---|
677 | else: |
---|
678 | yield node |
---|
679 | else: |
---|
680 | #POSTORDER ACTIONS |
---|
681 | yield node |
---|
682 | |
---|
683 | def _iter_descendants_levelorder(self, is_leaf_fn=None): |
---|
684 | """ |
---|
685 | Iterate over all desdecendant nodes. |
---|
686 | """ |
---|
687 | tovisit = deque([self]) |
---|
688 | while len(tovisit)>0: |
---|
689 | node = tovisit.popleft() |
---|
690 | yield node |
---|
691 | if not is_leaf_fn or not is_leaf_fn(node): |
---|
692 | tovisit.extend(node.children) |
---|
693 | |
---|
694 | def _iter_descendants_preorder(self, is_leaf_fn=None): |
---|
695 | """ |
---|
696 | Iterator over all descendant nodes. |
---|
697 | """ |
---|
698 | to_visit = deque() |
---|
699 | node = self |
---|
700 | while node is not None: |
---|
701 | yield node |
---|
702 | if not is_leaf_fn or not is_leaf_fn(node): |
---|
703 | to_visit.extendleft(reversed(node.children)) |
---|
704 | try: |
---|
705 | node = to_visit.popleft() |
---|
706 | except: |
---|
707 | node = None |
---|
708 | |
---|
709 | def iter_ancestors(self): |
---|
710 | '''versionadded: 2.2 |
---|
711 | |
---|
712 | Iterates over the list of all ancestor nodes from current node |
---|
713 | to the current tree root. |
---|
714 | |
---|
715 | ''' |
---|
716 | node = self |
---|
717 | while node.up is not None: |
---|
718 | yield node.up |
---|
719 | node = node.up |
---|
720 | |
---|
721 | def get_ancestors(self): |
---|
722 | '''versionadded: 2.2 |
---|
723 | |
---|
724 | Returns the list of all ancestor nodes from current node to |
---|
725 | the current tree root. |
---|
726 | |
---|
727 | ''' |
---|
728 | return [n for n in self.iter_ancestors()] |
---|
729 | |
---|
730 | def describe(self): |
---|
731 | """ |
---|
732 | Prints general information about this node and its |
---|
733 | connections. |
---|
734 | """ |
---|
735 | if len(self.get_tree_root().children)==2: |
---|
736 | rooting = "Yes" |
---|
737 | elif len(self.get_tree_root().children)>2: |
---|
738 | rooting = "No" |
---|
739 | else: |
---|
740 | rooting = "Unknown" |
---|
741 | max_node, max_dist = self.get_farthest_leaf() |
---|
742 | cached_content = self.get_cached_content() |
---|
743 | print "Number of leaf nodes:\t%d" % len(cached_content[self]) |
---|
744 | print "Number of internal nodes:\t%d" % len(cached_content) |
---|
745 | print "Rooted:\t%s" %rooting |
---|
746 | print "Most distant node:\t%s" %max_node.name |
---|
747 | print "Max. distance:\t%f" %max_dist |
---|
748 | |
---|
749 | def write(self, features=None, outfile=None, format=0, is_leaf_fn=None, |
---|
750 | format_root_node=False): |
---|
751 | """ |
---|
752 | Returns the newick representation of current node. Several |
---|
753 | arguments control the way in which extra data is shown for |
---|
754 | every node: |
---|
755 | |
---|
756 | :argument features: a list of feature names to be exported |
---|
757 | using the Extended Newick Format (i.e. features=["name", |
---|
758 | "dist"]). Use an empty list to export all available features |
---|
759 | in each node (features=[]) |
---|
760 | |
---|
761 | :argument outfile: writes the output to a given file |
---|
762 | |
---|
763 | :argument format: defines the newick standard used to encode the |
---|
764 | tree. See tutorial for details. |
---|
765 | |
---|
766 | :argument False format_root_node: If True, it allows features |
---|
767 | and branch information from root node to be exported as a |
---|
768 | part of the newick text string. For newick compatibility |
---|
769 | reasons, this is False by default. |
---|
770 | |
---|
771 | :argument is_leaf_fn: See :func:`TreeNode.traverse` for |
---|
772 | documentation. |
---|
773 | |
---|
774 | **Example:** |
---|
775 | |
---|
776 | :: |
---|
777 | |
---|
778 | t.get_newick(features=["species","name"], format=1) |
---|
779 | |
---|
780 | """ |
---|
781 | |
---|
782 | nw = write_newick(self, features = features, format=format, |
---|
783 | is_leaf_fn=is_leaf_fn, |
---|
784 | format_root_node=format_root_node) |
---|
785 | if outfile is not None: |
---|
786 | open(outfile, "w").write(nw) |
---|
787 | else: |
---|
788 | return nw |
---|
789 | |
---|
790 | def get_tree_root(self): |
---|
791 | """ |
---|
792 | Returns the absolute root node of current tree structure. |
---|
793 | """ |
---|
794 | root = self |
---|
795 | while root.up is not None: |
---|
796 | root = root.up |
---|
797 | return root |
---|
798 | |
---|
799 | def get_common_ancestor(self, *target_nodes, **kargs): |
---|
800 | """ |
---|
801 | Returns the first common ancestor between this node and a given |
---|
802 | list of 'target_nodes'. |
---|
803 | |
---|
804 | **Examples:** |
---|
805 | |
---|
806 | :: |
---|
807 | |
---|
808 | t = tree.Tree("(((A:0.1, B:0.01):0.001, C:0.0001):1.0[&&NHX:name=common], (D:0.00001):0.000001):2.0[&&NHX:name=root];") |
---|
809 | A = t.get_descendants_by_name("A")[0] |
---|
810 | C = t.get_descendants_by_name("C")[0] |
---|
811 | common = A.get_common_ancestor(C) |
---|
812 | print common.name |
---|
813 | |
---|
814 | """ |
---|
815 | |
---|
816 | get_path = kargs.get("get_path", False) |
---|
817 | |
---|
818 | if len(target_nodes) == 1 and type(target_nodes[0]) \ |
---|
819 | in set([set, tuple, list, frozenset]): |
---|
820 | target_nodes = target_nodes[0] |
---|
821 | |
---|
822 | # Convert node names into node instances |
---|
823 | target_nodes = _translate_nodes(self, *target_nodes) |
---|
824 | |
---|
825 | # If only one node is provided, use self as the second target |
---|
826 | if type(target_nodes) != list: |
---|
827 | target_nodes = [target_nodes, self] |
---|
828 | elif len(target_nodes)==1: |
---|
829 | target_nodes = tree_nodes.append(self) |
---|
830 | |
---|
831 | n2path = {} |
---|
832 | reference = [] |
---|
833 | ref_node = None |
---|
834 | for n in target_nodes: |
---|
835 | current = n |
---|
836 | while current: |
---|
837 | n2path.setdefault(n, set()).add(current) |
---|
838 | if not ref_node: |
---|
839 | reference.append(current) |
---|
840 | current = current.up |
---|
841 | if not ref_node: |
---|
842 | ref_node = n |
---|
843 | |
---|
844 | common = None |
---|
845 | for n in reference: |
---|
846 | broken = False |
---|
847 | for node, path in n2path.iteritems(): |
---|
848 | if node is not ref_node and n not in path: |
---|
849 | broken = True |
---|
850 | break |
---|
851 | |
---|
852 | if not broken: |
---|
853 | common = n |
---|
854 | break |
---|
855 | if not common: |
---|
856 | raise ValueError("Nodes are not connected!") |
---|
857 | |
---|
858 | if get_path: |
---|
859 | return common, n2path |
---|
860 | else: |
---|
861 | return common |
---|
862 | |
---|
863 | def iter_search_nodes(self, **conditions): |
---|
864 | """ |
---|
865 | Search nodes in an interative way. Matches are being yield as |
---|
866 | they are being found. This avoids to scan the full tree |
---|
867 | topology before returning the first matches. Useful when |
---|
868 | dealing with huge trees. |
---|
869 | """ |
---|
870 | |
---|
871 | for n in self.traverse(): |
---|
872 | conditions_passed = 0 |
---|
873 | for key, value in conditions.iteritems(): |
---|
874 | if hasattr(n, key) and getattr(n, key) == value: |
---|
875 | conditions_passed +=1 |
---|
876 | if conditions_passed == len(conditions): |
---|
877 | yield n |
---|
878 | |
---|
879 | def search_nodes(self, **conditions): |
---|
880 | """ |
---|
881 | Returns the list of nodes matching a given set of conditions. |
---|
882 | |
---|
883 | **Example:** |
---|
884 | |
---|
885 | :: |
---|
886 | |
---|
887 | tree.search_nodes(dist=0.0, name="human") |
---|
888 | |
---|
889 | """ |
---|
890 | matching_nodes = [] |
---|
891 | for n in self.iter_search_nodes(**conditions): |
---|
892 | matching_nodes.append(n) |
---|
893 | return matching_nodes |
---|
894 | |
---|
895 | def get_leaves_by_name(self,name): |
---|
896 | """ |
---|
897 | Returns a list of leaf nodes matching a given name. |
---|
898 | """ |
---|
899 | return self.search_nodes(name=name, children=[]) |
---|
900 | |
---|
901 | def is_leaf(self): |
---|
902 | """ |
---|
903 | Return True if current node is a leaf. |
---|
904 | """ |
---|
905 | return len(self.children) == 0 |
---|
906 | |
---|
907 | def is_root(self): |
---|
908 | """ |
---|
909 | Returns True if current node has no parent |
---|
910 | """ |
---|
911 | if self.up is None: |
---|
912 | return True |
---|
913 | else: |
---|
914 | return False |
---|
915 | |
---|
916 | # ########################### |
---|
917 | # Distance related functions |
---|
918 | # ########################### |
---|
919 | def get_distance(self, target, target2=None, topology_only=False): |
---|
920 | """ |
---|
921 | Returns the distance between two nodes. If only one target is |
---|
922 | specified, it returns the distance bewtween the target and the |
---|
923 | current node. |
---|
924 | |
---|
925 | :argument target: a node within the same tree structure. |
---|
926 | |
---|
927 | :argument target2: a node within the same tree structure. If |
---|
928 | not specified, current node is used as target2. |
---|
929 | |
---|
930 | :argument False topology_only: If set to True, distance will |
---|
931 | refer to the number of nodes between target and target2. |
---|
932 | |
---|
933 | :returns: branch length distance between target and |
---|
934 | target2. If topology_only flag is True, returns the number |
---|
935 | of nodes between target and target2. |
---|
936 | |
---|
937 | """ |
---|
938 | |
---|
939 | if target2 is None: |
---|
940 | target2 = self |
---|
941 | root = self.get_tree_root() |
---|
942 | else: |
---|
943 | # is target node under current node? |
---|
944 | root = self |
---|
945 | |
---|
946 | target, target2 = _translate_nodes(root, target, target2) |
---|
947 | ancestor = root.get_common_ancestor(target, target2) |
---|
948 | if ancestor is None: |
---|
949 | raise TreeError, "Nodes are not connected" |
---|
950 | |
---|
951 | dist = 0.0 |
---|
952 | for n in [target2, target]: |
---|
953 | current = n |
---|
954 | while current != ancestor: |
---|
955 | if topology_only: |
---|
956 | if current!=target: |
---|
957 | dist += 1 |
---|
958 | else: |
---|
959 | dist += current.dist |
---|
960 | current = current.up |
---|
961 | return dist |
---|
962 | |
---|
963 | def get_farthest_node(self, topology_only=False): |
---|
964 | """ |
---|
965 | Returns the node's farthest descendant or ancestor node, and the |
---|
966 | distance to it. |
---|
967 | |
---|
968 | :argument False topology_only: If set to True, distance |
---|
969 | between nodes will be referred to the number of nodes |
---|
970 | between them. In other words, topological distance will be |
---|
971 | used instead of branch length distances. |
---|
972 | |
---|
973 | :return: A tuple containing the farthest node referred to the |
---|
974 | current node and the distance to it. |
---|
975 | |
---|
976 | """ |
---|
977 | # Init fasthest node to current farthest leaf |
---|
978 | farthest_node,farthest_dist = self.get_farthest_leaf(topology_only=topology_only) |
---|
979 | prev = self |
---|
980 | if topology_only: |
---|
981 | cdist = 0 |
---|
982 | else: |
---|
983 | cdist = prev.dist |
---|
984 | current = prev.up |
---|
985 | while current is not None: |
---|
986 | for ch in current.children: |
---|
987 | if ch != prev: |
---|
988 | if not ch.is_leaf(): |
---|
989 | fnode, fdist = ch.get_farthest_leaf(topology_only=topology_only) |
---|
990 | else: |
---|
991 | fnode = ch |
---|
992 | fdist = 0 |
---|
993 | if topology_only: |
---|
994 | fdist += 1.0 |
---|
995 | else: |
---|
996 | fdist += ch.dist |
---|
997 | if cdist+fdist > farthest_dist: |
---|
998 | farthest_dist = cdist + fdist |
---|
999 | farthest_node = fnode |
---|
1000 | prev = current |
---|
1001 | if topology_only: |
---|
1002 | cdist += 1 |
---|
1003 | else: |
---|
1004 | cdist += prev.dist |
---|
1005 | current = prev.up |
---|
1006 | return farthest_node, farthest_dist |
---|
1007 | |
---|
1008 | def get_farthest_leaf(self, topology_only=False): |
---|
1009 | """ |
---|
1010 | Returns node's farthest descendant node (which is always a leaf), and the |
---|
1011 | distance to it. |
---|
1012 | |
---|
1013 | :argument False topology_only: If set to True, distance |
---|
1014 | between nodes will be referred to the number of nodes |
---|
1015 | between them. In other words, topological distance will be |
---|
1016 | used instead of branch length distances. |
---|
1017 | |
---|
1018 | :return: A tuple containing the farthest leaf referred to the |
---|
1019 | current node and the distance to it. |
---|
1020 | """ |
---|
1021 | max_dist = 0.0 |
---|
1022 | max_node = None |
---|
1023 | if self.is_leaf(): |
---|
1024 | return self, 0.0 |
---|
1025 | else: |
---|
1026 | for ch in self.children: |
---|
1027 | node, d = ch.get_farthest_leaf(topology_only=topology_only) |
---|
1028 | if topology_only: |
---|
1029 | d += 1.0 |
---|
1030 | else: |
---|
1031 | d += ch.dist |
---|
1032 | if d>=max_dist: |
---|
1033 | max_dist = d |
---|
1034 | max_node = node |
---|
1035 | return max_node, max_dist |
---|
1036 | |
---|
1037 | def get_closest_leaf(self, topology_only=False): |
---|
1038 | """Returns node's closest descendant leaf and the distance to |
---|
1039 | it. |
---|
1040 | |
---|
1041 | :argument False topology_only: If set to True, distance |
---|
1042 | between nodes will be referred to the number of nodes |
---|
1043 | between them. In other words, topological distance will be |
---|
1044 | used instead of branch length distances. |
---|
1045 | |
---|
1046 | :return: A tuple containing the closest leaf referred to the |
---|
1047 | current node and the distance to it. |
---|
1048 | |
---|
1049 | """ |
---|
1050 | min_dist = None |
---|
1051 | min_node = None |
---|
1052 | if self.is_leaf(): |
---|
1053 | return self, 0.0 |
---|
1054 | else: |
---|
1055 | for ch in self.children: |
---|
1056 | node, d = ch.get_closest_leaf(topology_only=topology_only) |
---|
1057 | if topology_only: |
---|
1058 | d += 1.0 |
---|
1059 | else: |
---|
1060 | d += ch.dist |
---|
1061 | if min_dist is None or d<min_dist: |
---|
1062 | min_dist = d |
---|
1063 | min_node = node |
---|
1064 | return min_node, min_dist |
---|
1065 | |
---|
1066 | |
---|
1067 | |
---|
1068 | def get_midpoint_outgroup(self): |
---|
1069 | """ |
---|
1070 | Returns the node that divides the current tree into two distance-balanced |
---|
1071 | partitions. |
---|
1072 | """ |
---|
1073 | # Gets the farthest node to the current root |
---|
1074 | root = self.get_tree_root() |
---|
1075 | nA , r2A_dist = root.get_farthest_leaf() |
---|
1076 | nB , A2B_dist = nA.get_farthest_node() |
---|
1077 | |
---|
1078 | outgroup = nA |
---|
1079 | middist = A2B_dist / 2.0 |
---|
1080 | cdist = 0 |
---|
1081 | current = nA |
---|
1082 | while current is not None: |
---|
1083 | cdist += current.dist |
---|
1084 | if cdist > (middist): # Deja de subir cuando se pasa del maximo |
---|
1085 | break |
---|
1086 | else: |
---|
1087 | current = current.up |
---|
1088 | return current |
---|
1089 | |
---|
1090 | def populate(self, size, names_library=None, reuse_names=False, |
---|
1091 | random_branches=False, branch_range=(0,1), |
---|
1092 | support_range=(0,1)): |
---|
1093 | """ |
---|
1094 | Generates a random topology by populating current node. |
---|
1095 | |
---|
1096 | :argument None names_library: If provided, names library |
---|
1097 | (list, set, dict, etc.) will be used to name nodes. |
---|
1098 | |
---|
1099 | :argument False reuse_names: If True, node names will not be |
---|
1100 | necessarily unique, which makes the process a bit more |
---|
1101 | efficient. |
---|
1102 | |
---|
1103 | :argument False random_branches: If True, branch distances and support |
---|
1104 | values will be randomized. |
---|
1105 | |
---|
1106 | :argument (0,1) branch_range: If random_branches is True, this |
---|
1107 | range of values will be used to generate random distances. |
---|
1108 | |
---|
1109 | :argument (0,1) support_range: If random_branches is True, |
---|
1110 | this range of values will be used to generate random branch |
---|
1111 | support values. |
---|
1112 | |
---|
1113 | """ |
---|
1114 | NewNode = self.__class__ |
---|
1115 | |
---|
1116 | if len(self.children) > 1: |
---|
1117 | connector = NewNode() |
---|
1118 | for ch in self.get_children(): |
---|
1119 | ch.detach() |
---|
1120 | connector.add_child(child = ch) |
---|
1121 | root = NewNode() |
---|
1122 | self.add_child(child = connector) |
---|
1123 | self.add_child(child = root) |
---|
1124 | else: |
---|
1125 | root = self |
---|
1126 | |
---|
1127 | next = deque([root]) |
---|
1128 | for i in xrange(size-1): |
---|
1129 | if random.randint(0, 1): |
---|
1130 | p = next.pop() |
---|
1131 | else: |
---|
1132 | p = next.popleft() |
---|
1133 | |
---|
1134 | c1 = p.add_child() |
---|
1135 | c2 = p.add_child() |
---|
1136 | next.extend([c1, c2]) |
---|
1137 | if random_branches: |
---|
1138 | c1.dist = random.uniform(*branch_range) |
---|
1139 | c2.dist = random.uniform(*branch_range) |
---|
1140 | c1.support = random.uniform(*branch_range) |
---|
1141 | c2.support = random.uniform(*branch_range) |
---|
1142 | else: |
---|
1143 | c1.dist = 1.0 |
---|
1144 | c2.dist = 1.0 |
---|
1145 | c1.support = 1.0 |
---|
1146 | c2.support = 1.0 |
---|
1147 | |
---|
1148 | # next contains leaf nodes |
---|
1149 | charset = "abcdefghijklmnopqrstuvwxyz" |
---|
1150 | if names_library: |
---|
1151 | names_library = deque(names_library) |
---|
1152 | else: |
---|
1153 | avail_names = itertools.combinations_with_replacement(charset, 10) |
---|
1154 | for n in next: |
---|
1155 | if names_library: |
---|
1156 | if reuse_names: |
---|
1157 | tname = random.sample(names_library, 1)[0] |
---|
1158 | else: |
---|
1159 | tname = names_library.pop() |
---|
1160 | else: |
---|
1161 | tname = ''.join(avail_names.next()) |
---|
1162 | n.name = tname |
---|
1163 | |
---|
1164 | def set_outgroup(self, outgroup): |
---|
1165 | """ |
---|
1166 | Sets a descendant node as the outgroup of a tree. This function |
---|
1167 | can be used to root a tree or even an internal node. |
---|
1168 | |
---|
1169 | :argument outgroup: a node instance within the same tree |
---|
1170 | structure that will be used as a basal node. |
---|
1171 | |
---|
1172 | """ |
---|
1173 | |
---|
1174 | outgroup = _translate_nodes(self, outgroup) |
---|
1175 | |
---|
1176 | if self == outgroup: |
---|
1177 | raise ValueError, "Cannot set myself as outgroup" |
---|
1178 | |
---|
1179 | parent_outgroup = outgroup.up |
---|
1180 | |
---|
1181 | # Detects (sub)tree root |
---|
1182 | n = outgroup |
---|
1183 | while n.up is not self: |
---|
1184 | n = n.up |
---|
1185 | |
---|
1186 | # If outgroup is a child from root, but with more than one |
---|
1187 | # sister nodes, creates a new node to group them |
---|
1188 | |
---|
1189 | self.children.remove(n) |
---|
1190 | if len(self.children) != 1: |
---|
1191 | down_branch_connector = self.__class__() |
---|
1192 | down_branch_connector.dist = 0.0 |
---|
1193 | down_branch_connector.support = n.support |
---|
1194 | for ch in self.get_children(): |
---|
1195 | down_branch_connector.children.append(ch) |
---|
1196 | ch.up = down_branch_connector |
---|
1197 | self.children.remove(ch) |
---|
1198 | else: |
---|
1199 | down_branch_connector = self.children[0] |
---|
1200 | |
---|
1201 | # Connects down branch to myself or to outgroup |
---|
1202 | quien_va_ser_padre = parent_outgroup |
---|
1203 | if quien_va_ser_padre is not self: |
---|
1204 | # Parent-child swapping |
---|
1205 | quien_va_ser_hijo = quien_va_ser_padre.up |
---|
1206 | quien_fue_padre = None |
---|
1207 | buffered_dist = quien_va_ser_padre.dist |
---|
1208 | buffered_support = quien_va_ser_padre.support |
---|
1209 | |
---|
1210 | while quien_va_ser_hijo is not self: |
---|
1211 | quien_va_ser_padre.children.append(quien_va_ser_hijo) |
---|
1212 | quien_va_ser_hijo.children.remove(quien_va_ser_padre) |
---|
1213 | |
---|
1214 | buffered_dist2 = quien_va_ser_hijo.dist |
---|
1215 | buffered_support2 = quien_va_ser_hijo.support |
---|
1216 | quien_va_ser_hijo.dist = buffered_dist |
---|
1217 | quien_va_ser_hijo.support = buffered_support |
---|
1218 | buffered_dist = buffered_dist2 |
---|
1219 | buffered_support = buffered_support2 |
---|
1220 | |
---|
1221 | quien_va_ser_padre.up = quien_fue_padre |
---|
1222 | quien_fue_padre = quien_va_ser_padre |
---|
1223 | |
---|
1224 | quien_va_ser_padre = quien_va_ser_hijo |
---|
1225 | quien_va_ser_hijo = quien_va_ser_padre.up |
---|
1226 | |
---|
1227 | quien_va_ser_padre.children.append(down_branch_connector) |
---|
1228 | down_branch_connector.up = quien_va_ser_padre |
---|
1229 | quien_va_ser_padre.up = quien_fue_padre |
---|
1230 | |
---|
1231 | down_branch_connector.dist += buffered_dist |
---|
1232 | outgroup2 = parent_outgroup |
---|
1233 | parent_outgroup.children.remove(outgroup) |
---|
1234 | outgroup2.dist = 0 |
---|
1235 | |
---|
1236 | else: |
---|
1237 | outgroup2 = down_branch_connector |
---|
1238 | |
---|
1239 | outgroup.up = self |
---|
1240 | outgroup2.up = self |
---|
1241 | # outgroup is always the first children. Some function my |
---|
1242 | # trust on this fact, so do no change this. |
---|
1243 | self.children = [outgroup,outgroup2] |
---|
1244 | middist = (outgroup2.dist + outgroup.dist)/2 |
---|
1245 | outgroup.dist = middist |
---|
1246 | outgroup2.dist = middist |
---|
1247 | outgroup2.support = outgroup.support |
---|
1248 | |
---|
1249 | def unroot(self): |
---|
1250 | """ |
---|
1251 | Unroots current node. This function is expected to be used on |
---|
1252 | the absolute tree root node, but it can be also be applied to |
---|
1253 | any other internal node. It will convert a split into a |
---|
1254 | multifurcation. |
---|
1255 | """ |
---|
1256 | # if is rooted |
---|
1257 | if not self.is_root(): |
---|
1258 | print >>sys.stderr, "Warning. You are unrooting an internal node.!!" |
---|
1259 | if len(self.children)==2: |
---|
1260 | if not self.children[0].is_leaf(): |
---|
1261 | self.children[0].delete() |
---|
1262 | elif not self.children[1].is_leaf(): |
---|
1263 | self.children[1].delete() |
---|
1264 | else: |
---|
1265 | raise TreeError, "Cannot unroot a tree with only two leaves" |
---|
1266 | |
---|
1267 | def show(self, layout=None, tree_style=None, name="ETE"): |
---|
1268 | """ |
---|
1269 | Starts an interative session to visualize current node |
---|
1270 | structure using provided layout and TreeStyle. |
---|
1271 | |
---|
1272 | """ |
---|
1273 | try: |
---|
1274 | from ete2.treeview import drawer |
---|
1275 | except ImportError, e: |
---|
1276 | print "'treeview' module could not be loaded.\n",e |
---|
1277 | print "\n\n" |
---|
1278 | print e |
---|
1279 | else: |
---|
1280 | drawer.show_tree(self, layout=layout, |
---|
1281 | tree_style=tree_style, win_name=name) |
---|
1282 | |
---|
1283 | def render(self, file_name, layout=None, w=None, h=None, \ |
---|
1284 | tree_style=None, units="px", dpi=90): |
---|
1285 | """ |
---|
1286 | Renders the node structure as an image. |
---|
1287 | |
---|
1288 | :var file_name: path to the output image file. valid |
---|
1289 | extensions are .SVG, .PDF, .PNG |
---|
1290 | |
---|
1291 | :var layout: a layout function or a valid layout function name |
---|
1292 | |
---|
1293 | :var tree_style: a `TreeStyle` instance containing the image |
---|
1294 | properties |
---|
1295 | |
---|
1296 | :var px units: "px": pixels, "mm": millimeters, "in": inches |
---|
1297 | :var None h: height of the image in :attr:`units` |
---|
1298 | :var None w: weight of the image in :attr:`units` |
---|
1299 | :var 300 dpi: dots per inches. |
---|
1300 | |
---|
1301 | """ |
---|
1302 | |
---|
1303 | try: |
---|
1304 | from ete2.treeview import drawer |
---|
1305 | except ImportError, e: |
---|
1306 | print "'treeview' module could not be loaded.\n",e |
---|
1307 | print "\n\n" |
---|
1308 | print e |
---|
1309 | else: |
---|
1310 | return drawer.render_tree(self, file_name, w=w, h=h, |
---|
1311 | layout=layout, tree_style=tree_style, |
---|
1312 | units=units, dpi=dpi) |
---|
1313 | |
---|
1314 | def copy(self, method="cpickle"): |
---|
1315 | """.. versionadded: 2.1 |
---|
1316 | |
---|
1317 | Returns a copy of the current node. |
---|
1318 | |
---|
1319 | :var cpickle method: Protocol used to copy the node |
---|
1320 | structure. The following values are accepted: |
---|
1321 | |
---|
1322 | - "newick": Tree topology, node names, branch lengths and |
---|
1323 | branch support values will be copied by as represented in |
---|
1324 | the newick string (copy by newick string serialisation). |
---|
1325 | |
---|
1326 | - "newick-extended": Tree topology and all node features |
---|
1327 | will be copied based on the extended newick format |
---|
1328 | representation. Only node features will be copied, thus |
---|
1329 | excluding other node attributes. As this method is also |
---|
1330 | based on newick serialisation, features will be converted |
---|
1331 | into text strings when making the copy. |
---|
1332 | |
---|
1333 | - "cpickle": The whole node structure and its content is |
---|
1334 | cloned based on cPickle object serialisation (slower, but |
---|
1335 | recommended for full tree copying) |
---|
1336 | |
---|
1337 | - "deepcopy": The whole node structure and its content is |
---|
1338 | copied based on the standard "copy" Python functionality |
---|
1339 | (this is the slowest method but it allows to copy complex |
---|
1340 | objects even if attributes point to lambda functions, |
---|
1341 | etc.) |
---|
1342 | |
---|
1343 | """ |
---|
1344 | if method=="newick": |
---|
1345 | new_node = self.__class__(self.write(features=["name"], format_root_node=True)) |
---|
1346 | elif method=="newick-extended": |
---|
1347 | self.write(features=[], format_root_node=True) |
---|
1348 | new_node = self.__class__(self.write(features=[])) |
---|
1349 | elif method == "deepcopy": |
---|
1350 | parent = self.up |
---|
1351 | self.up = None |
---|
1352 | new_node = copy.deepcopy(self) |
---|
1353 | self.up = parent |
---|
1354 | elif method == "cpickle": |
---|
1355 | parent = self.up |
---|
1356 | self.up = None |
---|
1357 | new_node = cPickle.loads(cPickle.dumps(self, 2)) |
---|
1358 | self.up = parent |
---|
1359 | else: |
---|
1360 | raise ValuerError("Invalid copy method") |
---|
1361 | |
---|
1362 | return new_node |
---|
1363 | |
---|
1364 | def _asciiArt(self, char1='-', show_internal=True, compact=False, attributes=None): |
---|
1365 | """ |
---|
1366 | Returns the ASCII representation of the tree. |
---|
1367 | |
---|
1368 | Code based on the PyCogent GPL project. |
---|
1369 | """ |
---|
1370 | if not attributes: |
---|
1371 | attributes = ["name"] |
---|
1372 | node_name = ', '.join(map(str, [getattr(self, v) for v in attributes if hasattr(self, v)])) |
---|
1373 | |
---|
1374 | LEN = max(3, len(node_name) if not self.children or show_internal else 3) |
---|
1375 | PAD = ' ' * LEN |
---|
1376 | PA = ' ' * (LEN-1) |
---|
1377 | if not self.is_leaf(): |
---|
1378 | mids = [] |
---|
1379 | result = [] |
---|
1380 | for c in self.children: |
---|
1381 | if len(self.children) == 1: |
---|
1382 | char2 = '/' |
---|
1383 | elif c is self.children[0]: |
---|
1384 | char2 = '/' |
---|
1385 | elif c is self.children[-1]: |
---|
1386 | char2 = '\\' |
---|
1387 | else: |
---|
1388 | char2 = '-' |
---|
1389 | (clines, mid) = c._asciiArt(char2, show_internal, compact, attributes) |
---|
1390 | mids.append(mid+len(result)) |
---|
1391 | result.extend(clines) |
---|
1392 | if not compact: |
---|
1393 | result.append('') |
---|
1394 | if not compact: |
---|
1395 | result.pop() |
---|
1396 | (lo, hi, end) = (mids[0], mids[-1], len(result)) |
---|
1397 | prefixes = [PAD] * (lo+1) + [PA+'|'] * (hi-lo-1) + [PAD] * (end-hi) |
---|
1398 | mid = (lo + hi) / 2 |
---|
1399 | prefixes[mid] = char1 + '-'*(LEN-2) + prefixes[mid][-1] |
---|
1400 | result = [p+l for (p,l) in zip(prefixes, result)] |
---|
1401 | if show_internal: |
---|
1402 | stem = result[mid] |
---|
1403 | result[mid] = stem[0] + node_name + stem[len(node_name)+1:] |
---|
1404 | return (result, mid) |
---|
1405 | else: |
---|
1406 | return ([char1 + '-' + node_name], 0) |
---|
1407 | |
---|
1408 | def get_ascii(self, show_internal=True, compact=False, attributes=None): |
---|
1409 | """ |
---|
1410 | Returns a string containing an ascii drawing of the tree. |
---|
1411 | |
---|
1412 | :argument show_internal: includes internal edge names. |
---|
1413 | :argument compact: use exactly one line per tip. |
---|
1414 | |
---|
1415 | :param attributes: A list of node attributes to shown in the |
---|
1416 | ASCII representation. |
---|
1417 | |
---|
1418 | """ |
---|
1419 | (lines, mid) = self._asciiArt(show_internal=show_internal, |
---|
1420 | compact=compact, attributes=attributes) |
---|
1421 | return '\n'+'\n'.join(lines) |
---|
1422 | |
---|
1423 | def ladderize(self, direction=0): |
---|
1424 | """ |
---|
1425 | .. versionadded: 2.1 |
---|
1426 | |
---|
1427 | Sort the branches of a given tree (swapping children nodes) |
---|
1428 | according to the size of each partition. |
---|
1429 | |
---|
1430 | :: |
---|
1431 | |
---|
1432 | t = Tree("(f,((d, ((a,b),c)),e));") |
---|
1433 | |
---|
1434 | print t |
---|
1435 | |
---|
1436 | # |
---|
1437 | # /-f |
---|
1438 | # | |
---|
1439 | # | /-d |
---|
1440 | # ----| | |
---|
1441 | # | /---| /-a |
---|
1442 | # | | | /---| |
---|
1443 | # | | \---| \-b |
---|
1444 | # \---| | |
---|
1445 | # | \-c |
---|
1446 | # | |
---|
1447 | # \-e |
---|
1448 | |
---|
1449 | t.ladderize() |
---|
1450 | print t |
---|
1451 | |
---|
1452 | # /-f |
---|
1453 | # ----| |
---|
1454 | # | /-e |
---|
1455 | # \---| |
---|
1456 | # | /-d |
---|
1457 | # \---| |
---|
1458 | # | /-c |
---|
1459 | # \---| |
---|
1460 | # | /-a |
---|
1461 | # \---| |
---|
1462 | # \-b |
---|
1463 | |
---|
1464 | """ |
---|
1465 | |
---|
1466 | if not self.is_leaf(): |
---|
1467 | n2s = {} |
---|
1468 | for n in self.get_children(): |
---|
1469 | s = n.ladderize(direction=direction) |
---|
1470 | n2s[n] = s |
---|
1471 | |
---|
1472 | self.children.sort(lambda x,y: cmp(n2s[x], n2s[y])) |
---|
1473 | if direction == 1: |
---|
1474 | self.children.reverse() |
---|
1475 | size = sum(n2s.values()) |
---|
1476 | else: |
---|
1477 | size = 1 |
---|
1478 | |
---|
1479 | return size |
---|
1480 | |
---|
1481 | def sort_descendants(self): |
---|
1482 | """ |
---|
1483 | .. versionadded: 2.1 |
---|
1484 | |
---|
1485 | This function sort the branches of a given tree by |
---|
1486 | considerening node names. After the tree is sorted, nodes are |
---|
1487 | labeled using ascendent numbers. This can be used to ensure |
---|
1488 | that nodes in a tree with the same node names are always |
---|
1489 | labeled in the same way. Note that if duplicated names are |
---|
1490 | present, extra criteria should be added to sort nodes. |
---|
1491 | |
---|
1492 | Unique id is stored as a node._nid attribute |
---|
1493 | |
---|
1494 | """ |
---|
1495 | |
---|
1496 | node2content = self.get_cached_content() |
---|
1497 | def sort_by_content(x, y): |
---|
1498 | return cmp(str(sorted([i.name for i in node2content[x]])), |
---|
1499 | str(sorted([i.name for i in node2content[y]]))) |
---|
1500 | |
---|
1501 | for n in self.traverse(): |
---|
1502 | if not n.is_leaf(): |
---|
1503 | n.children.sort(sort_by_content) |
---|
1504 | return node2content |
---|
1505 | |
---|
1506 | def get_cached_content(self, store_attr=None, _store=None): |
---|
1507 | """ |
---|
1508 | .. versionadded: 2.2 |
---|
1509 | |
---|
1510 | Returns a dictionary pointing to the preloaded content of each |
---|
1511 | internal node under this tree. Such a dictionary is intended |
---|
1512 | to work as a cache for operations that require many traversal |
---|
1513 | operations. |
---|
1514 | |
---|
1515 | :param None store_attr: Specifies the node attribute that |
---|
1516 | should be cached (i.e. name, distance, etc.). When none, the |
---|
1517 | whole node instance is cached. |
---|
1518 | |
---|
1519 | :param _store: (internal use) |
---|
1520 | |
---|
1521 | """ |
---|
1522 | if _store is None: |
---|
1523 | _store = {} |
---|
1524 | |
---|
1525 | for ch in self.children: |
---|
1526 | ch.get_cached_content(store_attr=store_attr, _store=_store) |
---|
1527 | |
---|
1528 | if self.children: |
---|
1529 | val = set() |
---|
1530 | for ch in self.children: |
---|
1531 | val.update(_store[ch]) |
---|
1532 | _store[self] = val |
---|
1533 | else: |
---|
1534 | if store_attr is None: |
---|
1535 | val = self |
---|
1536 | else: |
---|
1537 | val = getattr(self, store_attr) |
---|
1538 | _store[self] = set([val]) |
---|
1539 | return _store |
---|
1540 | |
---|
1541 | def robinson_foulds(self, t2, attr_t1="name", attr_t2="name"): |
---|
1542 | """ |
---|
1543 | .. versionadded: 2.2 |
---|
1544 | |
---|
1545 | Returns the Robinson-Foulds symmetric distance between current |
---|
1546 | tree and a different tree instance. |
---|
1547 | |
---|
1548 | :param t2: target tree |
---|
1549 | |
---|
1550 | :param name attr_t1: Compare trees using a custom node |
---|
1551 | attribute as a node name. |
---|
1552 | |
---|
1553 | :param name attr_t2: Compare trees using a custom node |
---|
1554 | attribute as a node name in target tree. |
---|
1555 | |
---|
1556 | :returns: (symmetric distance, total partitions, common node |
---|
1557 | names, partitions in current tree, partitions in target tree) |
---|
1558 | |
---|
1559 | """ |
---|
1560 | |
---|
1561 | t1 = self |
---|
1562 | t1content = t1.get_cached_content() |
---|
1563 | t2content = t2.get_cached_content() |
---|
1564 | target_names = set([getattr(_n, attr_t1) for _n in t1content[t1]]) |
---|
1565 | ref_names = set([getattr(_n, attr_t2) for _n in t2content[t2]]) |
---|
1566 | common_names = target_names & ref_names |
---|
1567 | if len(common_names) < 2: |
---|
1568 | raise ValueError("Trees share less than 2 nodes") |
---|
1569 | |
---|
1570 | r1 = set([",".join(sorted([getattr(_c, attr_t1) for _c in cont |
---|
1571 | if getattr(_c, attr_t1) in common_names])) |
---|
1572 | for cont in t1content.values() if len(cont)>1]) |
---|
1573 | r2 = set([",".join(sorted([getattr(_c, attr_t2) for _c in cont |
---|
1574 | if getattr(_c, attr_t2) in common_names])) |
---|
1575 | for cont in t2content.values() if len(cont)>1]) |
---|
1576 | r1.discard("") |
---|
1577 | r2.discard("") |
---|
1578 | inters = r1.intersection(r2) |
---|
1579 | if len(r1) == len(r2): |
---|
1580 | rf = (len(r1) - len(inters)) * 2 |
---|
1581 | else : |
---|
1582 | rf = (len(r1) - len(inters)) + (len(r2) - len(inters)) |
---|
1583 | max_parts = len(r1) + len(r2) |
---|
1584 | return rf, max_parts, common_names, r1, r2 |
---|
1585 | |
---|
1586 | def get_partitions(self): |
---|
1587 | """ |
---|
1588 | .. versionadded: 2.1 |
---|
1589 | |
---|
1590 | It returns the set of all possible partitions under a |
---|
1591 | node. Note that current implementation is quite inefficient |
---|
1592 | when used in very large trees. |
---|
1593 | |
---|
1594 | t = Tree("((a, b), e);") |
---|
1595 | partitions = t.get_partitions() |
---|
1596 | |
---|
1597 | # Will return: |
---|
1598 | # a,b,e |
---|
1599 | # a,e |
---|
1600 | # b,e |
---|
1601 | # a,b |
---|
1602 | # e |
---|
1603 | # b |
---|
1604 | # a |
---|
1605 | """ |
---|
1606 | all_leaves = frozenset(self.get_leaf_names()) |
---|
1607 | all_partitions = set([all_leaves]) |
---|
1608 | for n in self.iter_descendants(): |
---|
1609 | p1 = frozenset(n.get_leaf_names()) |
---|
1610 | p2 = frozenset(all_leaves - p1) |
---|
1611 | all_partitions.add(p1) |
---|
1612 | all_partitions.add(p2) |
---|
1613 | return all_partitions |
---|
1614 | |
---|
1615 | def convert_to_ultrametric(self, tree_length, strategy="balanced"): |
---|
1616 | """ |
---|
1617 | .. versionadded: 2.1 |
---|
1618 | |
---|
1619 | Converts a tree to ultrametric topology (all leaves must have |
---|
1620 | the same distance to root). Note that, for visual inspection |
---|
1621 | of ultrametric trees, node.img_style["size"] should be set to |
---|
1622 | 0. |
---|
1623 | """ |
---|
1624 | |
---|
1625 | # pre-calculate how many splits remain under each node |
---|
1626 | node2max_depth = {} |
---|
1627 | for node in self.traverse("postorder"): |
---|
1628 | if not node.is_leaf(): |
---|
1629 | max_depth = max([node2max_depth[c] for c in node.children]) + 1 |
---|
1630 | node2max_depth[node] = max_depth |
---|
1631 | else: |
---|
1632 | node2max_depth[node] = 1 |
---|
1633 | node2dist = {self: 0.0} |
---|
1634 | tree_length = float(tree_length) |
---|
1635 | step = tree_length / node2max_depth[self] |
---|
1636 | for node in self.iter_descendants("levelorder"): |
---|
1637 | if strategy == "balanced": |
---|
1638 | node.dist = (tree_length - node2dist[node.up]) / node2max_depth[node] |
---|
1639 | node2dist[node] = node.dist + node2dist[node.up] |
---|
1640 | elif strategy == "fixed": |
---|
1641 | if not node.is_leaf(): |
---|
1642 | node.dist = step |
---|
1643 | else: |
---|
1644 | node.dist = tree_length - ((node2dist[node.up]) * step) |
---|
1645 | node2dist[node] = node2dist[node.up] + 1 |
---|
1646 | node.dist = node.dist |
---|
1647 | |
---|
1648 | def check_monophyly(self, values, target_attr, ignore_missing=False): |
---|
1649 | """ |
---|
1650 | Returns True if a given target attribute is monophyletic under |
---|
1651 | this node for the provided set of values. |
---|
1652 | |
---|
1653 | If not all values are represented in the current tree |
---|
1654 | structure, a ValueError exception will be raised to warn that |
---|
1655 | strict monophyly could never be reached (this behaviour can be |
---|
1656 | avoided by enabling the `ignore_missing` flag. |
---|
1657 | |
---|
1658 | :param values: a set of values for which monophyly is |
---|
1659 | expected. |
---|
1660 | |
---|
1661 | :param target_attr: node attribute being used to check |
---|
1662 | monophyly (i.e. species for species trees, names for gene |
---|
1663 | family trees, or any custom feature present in the tree). |
---|
1664 | |
---|
1665 | :param False ignore_missing: Avoid raising an Exception when |
---|
1666 | missing attributes are found. |
---|
1667 | """ |
---|
1668 | if type(values) != set: |
---|
1669 | values = set(values) |
---|
1670 | |
---|
1671 | # This is the only time I traverse the tree, then I use cached |
---|
1672 | # leaf content |
---|
1673 | n2leaves = self.get_cached_content() |
---|
1674 | # Locate leaves matching requested attribute values |
---|
1675 | targets = [leaf for leaf in n2leaves[self] |
---|
1676 | if getattr(leaf, target_attr) in values] |
---|
1677 | |
---|
1678 | # Raise an error if requested attribute values are not even present |
---|
1679 | if not ignore_missing: |
---|
1680 | missing_values = values - set([getattr(n, target_attr) for n |
---|
1681 | in targets]) |
---|
1682 | if missing_values: |
---|
1683 | raise ValueError("Expected '%s' value(s) not found: %s" %( |
---|
1684 | target_attr, ','.join(missing_values))) |
---|
1685 | |
---|
1686 | # Check monophyly with get_common_ancestor. Note that this |
---|
1687 | # step does not require traversing the tree again because |
---|
1688 | # targets are node instances instead of node names, and |
---|
1689 | # get_common_ancestor function is smart enough to detect it |
---|
1690 | # and avoid unnecessary traversing. |
---|
1691 | common = self.get_common_ancestor(targets) |
---|
1692 | observed = n2leaves[common] |
---|
1693 | foreign_leaves = [leaf for leaf in observed |
---|
1694 | if getattr(leaf, target_attr) not in values] |
---|
1695 | if not foreign_leaves: |
---|
1696 | return True, "monophyletic" |
---|
1697 | else: |
---|
1698 | # if the requested attribute is not monophyletic in this |
---|
1699 | # node, let's differentiate between poly and paraphyly. |
---|
1700 | poly_common = self.get_common_ancestor(foreign_leaves) |
---|
1701 | # if the common ancestor of all foreign leaves is self |
---|
1702 | # contained, we have a paraphyly. Otherwise, polyphyly. |
---|
1703 | polyphyletic = [leaf for leaf in poly_common if |
---|
1704 | getattr(leaf, target_attr) in values] |
---|
1705 | if polyphyletic: |
---|
1706 | return False, "polyphyletic" |
---|
1707 | else: |
---|
1708 | return False, "paraphyletic" |
---|
1709 | |
---|
1710 | def get_monophyletic(self, values, target_attr): |
---|
1711 | """ |
---|
1712 | .. versionadded:: 2.2 |
---|
1713 | |
---|
1714 | Returns a list of nodes matching the provided monophyly |
---|
1715 | criteria. For a node to be considered a match, all |
---|
1716 | `target_attr` values within and node, and exclusively them, |
---|
1717 | should be grouped. |
---|
1718 | |
---|
1719 | :param values: a set of values for which monophyly is |
---|
1720 | expected. |
---|
1721 | |
---|
1722 | :param target_attr: node attribute being used to check |
---|
1723 | monophyly (i.e. species for species trees, names for gene |
---|
1724 | family trees). |
---|
1725 | |
---|
1726 | """ |
---|
1727 | |
---|
1728 | if type(values) != set: |
---|
1729 | values = set(values) |
---|
1730 | |
---|
1731 | n2values = self.get_cached_content(store_attr=target_attr) |
---|
1732 | |
---|
1733 | is_monophyletic = lambda node: n2values[node] == values |
---|
1734 | for match in self.iter_leaves(is_leaf_fn=is_monophyletic): |
---|
1735 | if is_monophyletic(match): |
---|
1736 | yield match |
---|
1737 | |
---|
1738 | def resolve_polytomy(self, default_dist=0.0, default_support=0.0, |
---|
1739 | recursive=True): |
---|
1740 | """ |
---|
1741 | .. versionadded: 2.2 |
---|
1742 | |
---|
1743 | Resolve all polytomies under current node by creating an |
---|
1744 | arbitrary dicotomic structure among the affected nodes. This |
---|
1745 | function randomly modifies current tree topology and should |
---|
1746 | only be used for compatibility reasons (i.e. programs |
---|
1747 | rejecting multifurcated node in the newick representation). |
---|
1748 | |
---|
1749 | :param 0.0 default_dist: artificial branch distance of new |
---|
1750 | nodes. |
---|
1751 | |
---|
1752 | :param 0.0 default_support: artificial branch support of new |
---|
1753 | nodes. |
---|
1754 | |
---|
1755 | :param True recursive: Resolve any polytomy under this |
---|
1756 | node. When False, only current node will be checked and fixed. |
---|
1757 | """ |
---|
1758 | |
---|
1759 | |
---|
1760 | def _resolve(node): |
---|
1761 | if len(node.children) > 2: |
---|
1762 | children = list(node.children) |
---|
1763 | node.children = [] |
---|
1764 | next_node = root = node |
---|
1765 | for i in xrange(len(children)-2): |
---|
1766 | next_node = next_node.add_child() |
---|
1767 | next_node.dist = default_dist |
---|
1768 | next_node.support = default_support |
---|
1769 | |
---|
1770 | next_node = root |
---|
1771 | for ch in children: |
---|
1772 | next_node.add_child(ch) |
---|
1773 | if ch != children[-2]: |
---|
1774 | next_node = next_node.children[0] |
---|
1775 | target = [self] |
---|
1776 | if recursive: |
---|
1777 | target.extend([n for n in self.get_descendants()]) |
---|
1778 | for n in target: |
---|
1779 | _resolve(n) |
---|
1780 | |
---|
1781 | def add_face(self, face, column, position="branch-right"): |
---|
1782 | """ |
---|
1783 | .. versionadded: 2.1 |
---|
1784 | |
---|
1785 | Add a fixed face to the node. This type of faces will be |
---|
1786 | always attached to nodes, independently of the layout |
---|
1787 | function. |
---|
1788 | |
---|
1789 | :argument face: a Face or inherited instance |
---|
1790 | :argument column: An integer number starting from 0 |
---|
1791 | :argument "branch-right" position: Posible values are: |
---|
1792 | "branch-right", "branch-top", "branch-bottom", "float", |
---|
1793 | "aligned" |
---|
1794 | """ |
---|
1795 | |
---|
1796 | if not hasattr(self, "_faces"): |
---|
1797 | self._faces = _FaceAreas() |
---|
1798 | |
---|
1799 | if position not in FACE_POSITIONS: |
---|
1800 | raise ValueError("face position not in %s" %FACE_POSITIONS) |
---|
1801 | |
---|
1802 | if isinstance(face, Face): |
---|
1803 | getattr(self._faces, position).add_face(face, column=column) |
---|
1804 | else: |
---|
1805 | raise ValueError("not a Face instance") |
---|
1806 | |
---|
1807 | def set_style(self, node_style): |
---|
1808 | """ |
---|
1809 | .. versionadded: 2.1 |
---|
1810 | |
---|
1811 | Set 'node_style' as the fixed style for the current node. |
---|
1812 | """ |
---|
1813 | if TREEVIEW: |
---|
1814 | if node_style is None: |
---|
1815 | node_style = NodeStyle() |
---|
1816 | if type(node_style) is NodeStyle: |
---|
1817 | self._img_style = node_style |
---|
1818 | else: |
---|
1819 | raise ValueError("Treeview module is disabled") |
---|
1820 | |
---|
1821 | def phonehome(self): |
---|
1822 | from ete2 import _ph |
---|
1823 | _ph.call() |
---|
1824 | |
---|
1825 | def _translate_nodes(root, *nodes): |
---|
1826 | name2node = dict([ [n, None] for n in nodes if type(n) is str]) |
---|
1827 | for n in root.traverse(): |
---|
1828 | if n.name in name2node: |
---|
1829 | if name2node[n.name] is not None: |
---|
1830 | raise ValueError, "Ambiguous node name: "+str(n.name) |
---|
1831 | else: |
---|
1832 | name2node[n.name] = n |
---|
1833 | |
---|
1834 | if None in name2node.values(): |
---|
1835 | notfound = [key for key, value in name2node.iteritems() if value is None] |
---|
1836 | raise ValueError("Node names not found: "+str(notfound)) |
---|
1837 | |
---|
1838 | valid_nodes = [] |
---|
1839 | for n in nodes: |
---|
1840 | if type(n) is not str: |
---|
1841 | if type(n) is not root.__class__ : |
---|
1842 | raise ValueError, "Invalid target node: "+str(n) |
---|
1843 | else: |
---|
1844 | valid_nodes.append(n) |
---|
1845 | |
---|
1846 | valid_nodes.extend(name2node.values()) |
---|
1847 | if len(valid_nodes) == 1: |
---|
1848 | return valid_nodes[0] |
---|
1849 | else: |
---|
1850 | return valid_nodes |
---|
1851 | |
---|
1852 | def OLD_translate_nodes(root, *nodes): |
---|
1853 | target_nodes = [] |
---|
1854 | for n in nodes: |
---|
1855 | if type(n) is str: |
---|
1856 | mnodes = root.search_nodes(name=n) |
---|
1857 | if len(mnodes) == 0: |
---|
1858 | raise ValueError, "Node name not found: "+str(n) |
---|
1859 | elif len(mnodes)>1: |
---|
1860 | raise ValueError, "Ambiguous node name: "+str(n) |
---|
1861 | else: |
---|
1862 | target_nodes.append(mnodes[0]) |
---|
1863 | elif type(n) != root.__class__: |
---|
1864 | raise ValueError, "Invalid target node: "+str(n) |
---|
1865 | else: |
---|
1866 | target_nodes.append(n) |
---|
1867 | |
---|
1868 | if len(target_nodes) == 1: |
---|
1869 | return target_nodes[0] |
---|
1870 | else: |
---|
1871 | return target_nodes |
---|
1872 | |
---|
1873 | |
---|
1874 | # Alias |
---|
1875 | #: .. currentmodule:: ete2 |
---|
1876 | Tree = TreeNode |
---|
1877 | |
---|