| 1 | # -*- coding: utf-8 -*- |
|---|
| 2 | # #START_LICENSE########################################################### |
|---|
| 3 | # |
|---|
| 4 | # |
|---|
| 5 | # This file is part of the Environment for Tree Exploration program |
|---|
| 6 | # (ETE). http://ete.cgenomics.org |
|---|
| 7 | # |
|---|
| 8 | # ETE is free software: you can redistribute it and/or modify it |
|---|
| 9 | # under the terms of the GNU General Public License as published by |
|---|
| 10 | # the Free Software Foundation, either version 3 of the License, or |
|---|
| 11 | # (at your option) any later version. |
|---|
| 12 | # |
|---|
| 13 | # ETE is distributed in the hope that it will be useful, but WITHOUT |
|---|
| 14 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
|---|
| 15 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public |
|---|
| 16 | # License for more details. |
|---|
| 17 | # |
|---|
| 18 | # You should have received a copy of the GNU General Public License |
|---|
| 19 | # along with ETE. If not, see <http://www.gnu.org/licenses/>. |
|---|
| 20 | # |
|---|
| 21 | # |
|---|
| 22 | # ABOUT THE ETE PACKAGE |
|---|
| 23 | # ===================== |
|---|
| 24 | # |
|---|
| 25 | # ETE is distributed under the GPL copyleft license (2008-2011). |
|---|
| 26 | # |
|---|
| 27 | # If you make use of ETE in published work, please cite: |
|---|
| 28 | # |
|---|
| 29 | # Jaime Huerta-Cepas, Joaquin Dopazo and Toni Gabaldon. |
|---|
| 30 | # ETE: a python Environment for Tree Exploration. Jaime BMC |
|---|
| 31 | # Bioinformatics 2010,:24doi:10.1186/1471-2105-11-24 |
|---|
| 32 | # |
|---|
| 33 | # Note that extra references to the specific methods implemented in |
|---|
| 34 | # the toolkit are available in the documentation. |
|---|
| 35 | # |
|---|
| 36 | # More info at http://ete.cgenomics.org |
|---|
| 37 | # |
|---|
| 38 | # |
|---|
| 39 | # #END_LICENSE############################################################# |
|---|
| 40 | __VERSION__="ete2-2.2rev1026" |
|---|
| 41 | #START_LICENSE########################################################### |
|---|
| 42 | # |
|---|
| 43 | # Copyright (C) 2009 by Jaime Huerta Cepas. All rights reserved. |
|---|
| 44 | # email: jhcepas@gmail.com |
|---|
| 45 | # |
|---|
| 46 | # This file is part of the Environment for Tree Exploration program (ETE). |
|---|
| 47 | # http://ete.cgenomics.org |
|---|
| 48 | # |
|---|
| 49 | # ETE is free software: you can redistribute it and/or modify it |
|---|
| 50 | # under the terms of the GNU General Public License as published by |
|---|
| 51 | # the Free Software Foundation, either version 3 of the License, or |
|---|
| 52 | # (at your option) any later version. |
|---|
| 53 | # |
|---|
| 54 | # ETE is distributed in the hope that it will be useful, |
|---|
| 55 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 56 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 57 | # GNU General Public License for more details. |
|---|
| 58 | # |
|---|
| 59 | # You should have received a copy of the GNU General Public License |
|---|
| 60 | # along with ETE. If not, see <http://www.gnu.org/licenses/>. |
|---|
| 61 | # |
|---|
| 62 | # #END_LICENSE############################################################# |
|---|
| 63 | import os |
|---|
| 64 | import cPickle |
|---|
| 65 | import random |
|---|
| 66 | import copy |
|---|
| 67 | from collections import deque |
|---|
| 68 | import itertools |
|---|
| 69 | from newick import read_newick, write_newick |
|---|
| 70 | |
|---|
| 71 | # the following imports are necessary to set fixed styles and faces |
|---|
| 72 | #try: |
|---|
| 73 | # from ete2.treeview.main import NodeStyle, _FaceAreas, FaceContainer, FACE_POSITIONS |
|---|
| 74 | # from ete2.treeview.faces import Face |
|---|
| 75 | #except ImportError: |
|---|
| 76 | # TREEVIEW = False |
|---|
| 77 | #else: |
|---|
| 78 | # TREEVIEW = True |
|---|
| 79 | |
|---|
| 80 | __all__ = ["Tree", "TreeNode"] |
|---|
| 81 | |
|---|
| 82 | TREEVIEW = False |
|---|
| 83 | DEFAULT_COMPACT = False |
|---|
| 84 | DEFAULT_SHOWINTERNAL = False |
|---|
| 85 | DEFAULT_DIST = 1.0 |
|---|
| 86 | DEFAULT_SUPPORT = 1.0 |
|---|
| 87 | DEFAULT_NAME = "NoName" |
|---|
| 88 | |
|---|
| 89 | class TreeError(Exception): |
|---|
| 90 | """ |
|---|
| 91 | A problem occurred during a TreeNode operation |
|---|
| 92 | """ |
|---|
| 93 | def __init__(self, value=''): |
|---|
| 94 | self.value = value |
|---|
| 95 | def __str__(self): |
|---|
| 96 | return repr(self.value) |
|---|
| 97 | |
|---|
| 98 | class TreeNode(object): |
|---|
| 99 | """ |
|---|
| 100 | TreeNode (Tree) class is used to store a tree structure. A tree |
|---|
| 101 | consists of a collection of TreeNode instances connected in a |
|---|
| 102 | hierarchical way. Trees can be loaded from the New Hampshire Newick |
|---|
| 103 | format (newick). |
|---|
| 104 | |
|---|
| 105 | :argument newick: Path to the file containing the tree or, alternatively, |
|---|
| 106 | the text string containing the same information. |
|---|
| 107 | |
|---|
| 108 | :argument 0 format: subnewick format |
|---|
| 109 | |
|---|
| 110 | .. table:: |
|---|
| 111 | |
|---|
| 112 | ====== ============================================== |
|---|
| 113 | FORMAT DESCRIPTION |
|---|
| 114 | ====== ============================================== |
|---|
| 115 | 0 flexible with support values |
|---|
| 116 | 1 flexible with internal node names |
|---|
| 117 | 2 all branches + leaf names + internal supports |
|---|
| 118 | 3 all branches + all names |
|---|
| 119 | 4 leaf branches + leaf names |
|---|
| 120 | 5 internal and leaf branches + leaf names |
|---|
| 121 | 6 internal branches + leaf names |
|---|
| 122 | 7 leaf branches + all names |
|---|
| 123 | 8 all names |
|---|
| 124 | 9 leaf names |
|---|
| 125 | 100 topology only |
|---|
| 126 | ====== ============================================== |
|---|
| 127 | |
|---|
| 128 | :returns: a tree node object which represents the base of the tree. |
|---|
| 129 | |
|---|
| 130 | ** Examples: ** |
|---|
| 131 | |
|---|
| 132 | :: |
|---|
| 133 | |
|---|
| 134 | t1 = Tree() # creates an empty tree |
|---|
| 135 | t2 = Tree('(A:1,(B:1,(C:1,D:1):0.5):0.5);') |
|---|
| 136 | t3 = Tree('/home/user/myNewickFile.txt') |
|---|
| 137 | """ |
|---|
| 138 | |
|---|
| 139 | def _get_dist(self): |
|---|
| 140 | return self._dist |
|---|
| 141 | def _set_dist(self, value): |
|---|
| 142 | try: |
|---|
| 143 | self._dist = float(value) |
|---|
| 144 | except ValueError: |
|---|
| 145 | raise |
|---|
| 146 | |
|---|
| 147 | def _get_support(self): |
|---|
| 148 | return self._support |
|---|
| 149 | def _set_support(self, value): |
|---|
| 150 | try: |
|---|
| 151 | self._support = float(value) |
|---|
| 152 | except ValueError: |
|---|
| 153 | raise |
|---|
| 154 | |
|---|
| 155 | def _get_up(self): |
|---|
| 156 | return self._up |
|---|
| 157 | def _set_up(self, value): |
|---|
| 158 | if type(value) == type(self) or value is None: |
|---|
| 159 | self._up = value |
|---|
| 160 | else: |
|---|
| 161 | raise ValueError("bad node_up type") |
|---|
| 162 | |
|---|
| 163 | def _get_children(self): |
|---|
| 164 | return self._children |
|---|
| 165 | def _set_children(self, value): |
|---|
| 166 | if type(value) == list and \ |
|---|
| 167 | len(set([type(n)==type(self) for n in value]))<2: |
|---|
| 168 | self._children = value |
|---|
| 169 | else: |
|---|
| 170 | raise ValueError("bad children type") |
|---|
| 171 | |
|---|
| 172 | def _get_style(self): |
|---|
| 173 | if self._img_style is None: |
|---|
| 174 | self._set_style(None) |
|---|
| 175 | |
|---|
| 176 | return self._img_style |
|---|
| 177 | |
|---|
| 178 | def _set_style(self, value): |
|---|
| 179 | self.set_style(value) |
|---|
| 180 | |
|---|
| 181 | #: Branch length distance to parent node. Default = 0.0 |
|---|
| 182 | img_style = property(fget=_get_style, fset=_set_style) |
|---|
| 183 | |
|---|
| 184 | #: Branch length distance to parent node. Default = 0.0 |
|---|
| 185 | dist = property(fget=_get_dist, fset=_set_dist) |
|---|
| 186 | #: Branch support for current node |
|---|
| 187 | support = property(fget=_get_support, fset=_set_support) |
|---|
| 188 | #: Pointer to parent node |
|---|
| 189 | up = property(fget=_get_up, fset=_set_up) |
|---|
| 190 | #: A list of children nodes |
|---|
| 191 | children = property(fget=_get_children, fset=_set_children) |
|---|
| 192 | |
|---|
| 193 | def _set_face_areas(self, value): |
|---|
| 194 | if isinstance(value, _FaceAreas): |
|---|
| 195 | self._faces = value |
|---|
| 196 | else: |
|---|
| 197 | raise ValueError("[%s] is not a valid FaceAreas instance" %type(value)) |
|---|
| 198 | |
|---|
| 199 | def _get_face_areas(self): |
|---|
| 200 | if not hasattr(self, "_faces"): |
|---|
| 201 | self._faces = _FaceAreas() |
|---|
| 202 | return self._faces |
|---|
| 203 | |
|---|
| 204 | faces = property(fget=_get_face_areas, \ |
|---|
| 205 | fset=_set_face_areas) |
|---|
| 206 | |
|---|
| 207 | def __init__(self, newick=None, format=0, dist=None, support=None, |
|---|
| 208 | name=None): |
|---|
| 209 | self._children = [] |
|---|
| 210 | self._up = None |
|---|
| 211 | self._dist = DEFAULT_DIST |
|---|
| 212 | self._support = DEFAULT_SUPPORT |
|---|
| 213 | self._img_style = None |
|---|
| 214 | self.features = set([]) |
|---|
| 215 | # Add basic features |
|---|
| 216 | self.features.update(["dist", "support", "name"]) |
|---|
| 217 | if dist is not None: |
|---|
| 218 | self.dist = dist |
|---|
| 219 | if support is not None: |
|---|
| 220 | self.support = support |
|---|
| 221 | |
|---|
| 222 | self.name = name if name is not None else DEFAULT_NAME |
|---|
| 223 | |
|---|
| 224 | # Initialize tree |
|---|
| 225 | if newick is not None: |
|---|
| 226 | read_newick(newick, root_node = self, format=format) |
|---|
| 227 | |
|---|
| 228 | def __nonzero__(self): |
|---|
| 229 | return True |
|---|
| 230 | |
|---|
| 231 | def __repr__(self): |
|---|
| 232 | return "Tree node '%s' (%s)" %(self.name, hex(self.__hash__())) |
|---|
| 233 | |
|---|
| 234 | def __and__(self, value): |
|---|
| 235 | """ This allows to execute tree&'A' to obtain the descendant node |
|---|
| 236 | whose name is A""" |
|---|
| 237 | value=str(value) |
|---|
| 238 | try: |
|---|
| 239 | first_match = self.iter_search_nodes(name=value).next() |
|---|
| 240 | return first_match |
|---|
| 241 | except StopIteration: |
|---|
| 242 | raise ValueError, "Node not found" |
|---|
| 243 | |
|---|
| 244 | def __add__(self, value): |
|---|
| 245 | """ This allows to sum two trees.""" |
|---|
| 246 | # Should a make the sum with two copies of the original trees? |
|---|
| 247 | if type(value) == self.__class__: |
|---|
| 248 | new_root = self.__class__() |
|---|
| 249 | new_root.add_child(self) |
|---|
| 250 | new_root.add_child(value) |
|---|
| 251 | return new_root |
|---|
| 252 | else: |
|---|
| 253 | raise ValueError, "Invalid node type" |
|---|
| 254 | |
|---|
| 255 | def __str__(self): |
|---|
| 256 | """ Print tree in newick format. """ |
|---|
| 257 | return self.get_ascii(compact=DEFAULT_COMPACT, \ |
|---|
| 258 | show_internal=DEFAULT_SHOWINTERNAL) |
|---|
| 259 | |
|---|
| 260 | def __contains__(self, item): |
|---|
| 261 | """ Check if item belongs to this node. The 'item' argument must |
|---|
| 262 | be a node instance or its associated name.""" |
|---|
| 263 | if isinstance(item, self.__class__): |
|---|
| 264 | return item in set(self.get_descendants()) |
|---|
| 265 | elif type(item)==str: |
|---|
| 266 | return item in set([n.name for n in self.traverse()]) |
|---|
| 267 | |
|---|
| 268 | def __len__(self): |
|---|
| 269 | """Node len returns number of children.""" |
|---|
| 270 | return len(self.get_leaves()) |
|---|
| 271 | |
|---|
| 272 | def __iter__(self): |
|---|
| 273 | """ Iterator over leaf nodes""" |
|---|
| 274 | return self.iter_leaves() |
|---|
| 275 | |
|---|
| 276 | def add_feature(self, pr_name, pr_value): |
|---|
| 277 | """ |
|---|
| 278 | Add or update a node's feature. |
|---|
| 279 | """ |
|---|
| 280 | setattr(self, pr_name, pr_value) |
|---|
| 281 | self.features.add(pr_name) |
|---|
| 282 | |
|---|
| 283 | def add_features(self, **features): |
|---|
| 284 | """ |
|---|
| 285 | Add or update several features. """ |
|---|
| 286 | for fname, fvalue in features.iteritems(): |
|---|
| 287 | setattr(self, fname, fvalue) |
|---|
| 288 | self.features.add(fname) |
|---|
| 289 | |
|---|
| 290 | def del_feature(self, pr_name): |
|---|
| 291 | """ |
|---|
| 292 | Permanently deletes a node's feature. |
|---|
| 293 | """ |
|---|
| 294 | if hasattr(self, pr_name): |
|---|
| 295 | delattr(self, pr_name) |
|---|
| 296 | self.features.remove(pr_name) |
|---|
| 297 | |
|---|
| 298 | # Topology management |
|---|
| 299 | def add_child(self, child=None, name=None, dist=None, support=None): |
|---|
| 300 | """ |
|---|
| 301 | Adds a new child to this node. If child node is not suplied |
|---|
| 302 | as an argument, a new node instance will be created. |
|---|
| 303 | |
|---|
| 304 | :argument None child: the node instance to be added as a child. |
|---|
| 305 | :argument None name: the name that will be given to the child. |
|---|
| 306 | :argument None dist: the distance from the node to the child. |
|---|
| 307 | :argument None support': the support value of child partition. |
|---|
| 308 | |
|---|
| 309 | :returns: The child node instance |
|---|
| 310 | |
|---|
| 311 | """ |
|---|
| 312 | if child is None: |
|---|
| 313 | child = self.__class__() |
|---|
| 314 | |
|---|
| 315 | if name is not None: |
|---|
| 316 | child.name = name |
|---|
| 317 | if dist is not None: |
|---|
| 318 | child.dist = dist |
|---|
| 319 | if support is not None: |
|---|
| 320 | child.support = support |
|---|
| 321 | |
|---|
| 322 | self.children.append(child) |
|---|
| 323 | child.up = self |
|---|
| 324 | return child |
|---|
| 325 | |
|---|
| 326 | def remove_child(self, child): |
|---|
| 327 | """ |
|---|
| 328 | Removes a child from this node (parent and child |
|---|
| 329 | nodes still exit but are no longer connected). |
|---|
| 330 | """ |
|---|
| 331 | try: |
|---|
| 332 | self.children.remove(child) |
|---|
| 333 | except ValueError, e: |
|---|
| 334 | raise TreeError, e |
|---|
| 335 | else: |
|---|
| 336 | child.up = None |
|---|
| 337 | return child |
|---|
| 338 | |
|---|
| 339 | def add_sister(self, sister=None, name=None, dist=None): |
|---|
| 340 | """ |
|---|
| 341 | Adds a sister to this node. If sister node is not supplied |
|---|
| 342 | as an argument, a new TreeNode instance will be created and |
|---|
| 343 | returned. |
|---|
| 344 | """ |
|---|
| 345 | if self.up == None: |
|---|
| 346 | raise TreeError("A parent node is required to add a sister") |
|---|
| 347 | else: |
|---|
| 348 | return self.up.add_child(child=sister, name=name, dist=dist) |
|---|
| 349 | |
|---|
| 350 | def remove_sister(self, sister=None): |
|---|
| 351 | """ |
|---|
| 352 | Removes a node's sister node. It has the same effect as |
|---|
| 353 | **`TreeNode.up.remove_child(sister)`** |
|---|
| 354 | |
|---|
| 355 | If a sister node is not supplied, the first sister will be deleted |
|---|
| 356 | and returned. |
|---|
| 357 | |
|---|
| 358 | :argument sister: A node instance |
|---|
| 359 | |
|---|
| 360 | :return: The node removed |
|---|
| 361 | """ |
|---|
| 362 | sisters = self.get_sisters() |
|---|
| 363 | if len(sisters)>0: |
|---|
| 364 | if sister==None: |
|---|
| 365 | sister = sisters.pop(0) |
|---|
| 366 | return self.up.remove_child(sister) |
|---|
| 367 | |
|---|
| 368 | def delete(self, prevent_nondicotomic=True, preserve_branch_length=False): |
|---|
| 369 | """ |
|---|
| 370 | Deletes node from the tree structure. Notice that this method |
|---|
| 371 | makes 'disappear' the node from the tree structure. This means |
|---|
| 372 | that children from the deleted node are transferred to the |
|---|
| 373 | next available parent. |
|---|
| 374 | |
|---|
| 375 | :param True prevent_nondicotomic: When True (default), delete |
|---|
| 376 | function will be execute recursively to prevent single-child |
|---|
| 377 | nodes. |
|---|
| 378 | |
|---|
| 379 | :param False preserve_branch_length: If True, branch lengths |
|---|
| 380 | of the deleted nodes are transferred (summed up) to its |
|---|
| 381 | parent's branch, thus keeping original distances among nodes. |
|---|
| 382 | |
|---|
| 383 | **Example:** |
|---|
| 384 | |
|---|
| 385 | :: |
|---|
| 386 | |
|---|
| 387 | / C |
|---|
| 388 | root-| |
|---|
| 389 | | / B |
|---|
| 390 | \--- H | |
|---|
| 391 | \ A |
|---|
| 392 | |
|---|
| 393 | > root.delete(H) will produce this structure: |
|---|
| 394 | |
|---|
| 395 | / C |
|---|
| 396 | | |
|---|
| 397 | root-|--B |
|---|
| 398 | | |
|---|
| 399 | \ A |
|---|
| 400 | |
|---|
| 401 | """ |
|---|
| 402 | parent = self.up |
|---|
| 403 | if parent: |
|---|
| 404 | for ch in self.children: |
|---|
| 405 | if preserve_branch_length: |
|---|
| 406 | ch.dist += self.dist |
|---|
| 407 | parent.add_child(ch) |
|---|
| 408 | |
|---|
| 409 | parent.remove_child(self) |
|---|
| 410 | |
|---|
| 411 | # Avoids parents with only one child |
|---|
| 412 | if prevent_nondicotomic and parent and\ |
|---|
| 413 | len(parent.children)<2: |
|---|
| 414 | parent.delete(prevent_nondicotomic=False, |
|---|
| 415 | preserve_branch_length=preserve_branch_length) |
|---|
| 416 | |
|---|
| 417 | def detach(self): |
|---|
| 418 | """ |
|---|
| 419 | Detachs this node (and all its descendants) from its parent |
|---|
| 420 | and returns the referent to itself. |
|---|
| 421 | |
|---|
| 422 | Detached node conserves all its structure of descendants, and can |
|---|
| 423 | be attached to another node through the 'add_child' function. This |
|---|
| 424 | mechanism can be seen as a cut and paste. |
|---|
| 425 | """ |
|---|
| 426 | |
|---|
| 427 | if self.up: |
|---|
| 428 | self.up.children.remove(self) |
|---|
| 429 | self.up = None |
|---|
| 430 | return self |
|---|
| 431 | |
|---|
| 432 | def prune(self, nodes, preserve_branch_length=False): |
|---|
| 433 | """Prunes the topology of a node in order to conserve only a |
|---|
| 434 | selected list of leaf or internal nodes. The minimum number of |
|---|
| 435 | internal nodes (the deepest as possible) are kept to conserve |
|---|
| 436 | the topological relationship among the provided list of nodes. |
|---|
| 437 | |
|---|
| 438 | :var nodes: a list of node names or node objects that must be kept |
|---|
| 439 | |
|---|
| 440 | :param False preserve_branch_length: If True, branch lengths |
|---|
| 441 | of the deleted nodes are transferred (summed up) to its |
|---|
| 442 | parent's branch, thus keeping original distances among nodes. |
|---|
| 443 | |
|---|
| 444 | **Examples:** |
|---|
| 445 | |
|---|
| 446 | :: |
|---|
| 447 | |
|---|
| 448 | t = Tree("(((A:0.1, B:0.01):0.001, C:0.0001):1.0[&&NHX:name=I], (D:0.00001):0.000001[&&NHX:name=J]):2.0[&&NHX:name=root];") |
|---|
| 449 | node_C = t.search_nodes(name="C")[0] |
|---|
| 450 | t.prune(["A","D", node_C]) |
|---|
| 451 | print t |
|---|
| 452 | |
|---|
| 453 | """ |
|---|
| 454 | def cmp_nodes(x, y): |
|---|
| 455 | # if several nodes are in the same path of two kept nodes, |
|---|
| 456 | # only one should be maintained. This prioritize internal |
|---|
| 457 | # nodes that are already in the to_keep list and then |
|---|
| 458 | # deeper nodes (closer to the leaves). |
|---|
| 459 | if x in to_keep: |
|---|
| 460 | if y not in to_keep: |
|---|
| 461 | return -1 |
|---|
| 462 | elif y in to_keep: |
|---|
| 463 | return 0 |
|---|
| 464 | elif n2depth[x] > n2depth[y]: |
|---|
| 465 | return -1 |
|---|
| 466 | elif n2depth[x] < n2depth[y]: |
|---|
| 467 | return 1 |
|---|
| 468 | else: |
|---|
| 469 | return 0 |
|---|
| 470 | |
|---|
| 471 | to_keep = set(_translate_nodes(self, *nodes)) |
|---|
| 472 | start, node2path = self.get_common_ancestor(to_keep, get_path=True) |
|---|
| 473 | |
|---|
| 474 | # Calculate which kept nodes are visiting the same nodes in |
|---|
| 475 | # their path to the common ancestor. |
|---|
| 476 | n2count = {} |
|---|
| 477 | n2depth = {} |
|---|
| 478 | for seed, path in node2path.iteritems(): |
|---|
| 479 | for visited_node in path: |
|---|
| 480 | if visited_node not in n2depth: |
|---|
| 481 | depth = visited_node.get_distance(start, topology_only=True) |
|---|
| 482 | n2depth[visited_node] = depth |
|---|
| 483 | if visited_node is not seed: |
|---|
| 484 | n2count.setdefault(visited_node, set()).add(seed) |
|---|
| 485 | |
|---|
| 486 | # if several internal nodes are in the path of exactly the |
|---|
| 487 | # same kept nodes, only one should be maintain. |
|---|
| 488 | visitors2nodes = {} |
|---|
| 489 | for node, visitors in n2count.iteritems(): |
|---|
| 490 | # keep nodes connection at least two other nodes |
|---|
| 491 | if len(visitors)>1: |
|---|
| 492 | visitor_key = frozenset(visitors) |
|---|
| 493 | visitors2nodes.setdefault(visitor_key, set()).add(node) |
|---|
| 494 | for visitors, nodes in visitors2nodes.iteritems(): |
|---|
| 495 | s = sorted(nodes, cmp_nodes) |
|---|
| 496 | to_keep.add(s[0]) |
|---|
| 497 | |
|---|
| 498 | # Detach unvisited branches |
|---|
| 499 | if start is not self: |
|---|
| 500 | start.detach() |
|---|
| 501 | for n in self.get_children(): |
|---|
| 502 | n.detach() |
|---|
| 503 | for n in start.get_children(): |
|---|
| 504 | self.add_child(child=n) |
|---|
| 505 | |
|---|
| 506 | for n in [self]+self.get_descendants(): |
|---|
| 507 | if n not in to_keep: |
|---|
| 508 | n.delete(prevent_nondicotomic=False, |
|---|
| 509 | preserve_branch_length=preserve_branch_length) |
|---|
| 510 | |
|---|
| 511 | def swap_children(self): |
|---|
| 512 | """ |
|---|
| 513 | Swaps current children order. |
|---|
| 514 | """ |
|---|
| 515 | if len(self.children)>1: |
|---|
| 516 | self.children.reverse() |
|---|
| 517 | |
|---|
| 518 | def get_children(self): |
|---|
| 519 | """ |
|---|
| 520 | Returns an independent list of node's children. |
|---|
| 521 | """ |
|---|
| 522 | return [ch for ch in self.children] |
|---|
| 523 | |
|---|
| 524 | def get_sisters(self): |
|---|
| 525 | """ |
|---|
| 526 | Returns an indepent list of sister nodes. |
|---|
| 527 | """ |
|---|
| 528 | if self.up!=None: |
|---|
| 529 | return [ch for ch in self.up.children if ch!=self] |
|---|
| 530 | else: |
|---|
| 531 | return [] |
|---|
| 532 | |
|---|
| 533 | def iter_leaves(self, is_leaf_fn=None): |
|---|
| 534 | """ |
|---|
| 535 | Returns an iterator over the leaves under this node. |
|---|
| 536 | |
|---|
| 537 | :argument None is_leaf_fn: See :func:`TreeNode.traverse` for |
|---|
| 538 | documentation. |
|---|
| 539 | """ |
|---|
| 540 | for n in self.traverse(strategy="preorder", is_leaf_fn=is_leaf_fn): |
|---|
| 541 | if not is_leaf_fn: |
|---|
| 542 | if n.is_leaf(): |
|---|
| 543 | yield n |
|---|
| 544 | else: |
|---|
| 545 | if is_leaf_fn(n): |
|---|
| 546 | yield n |
|---|
| 547 | |
|---|
| 548 | def get_leaves(self, is_leaf_fn=None): |
|---|
| 549 | """ |
|---|
| 550 | Returns the list of terminal nodes (leaves) under this node. |
|---|
| 551 | |
|---|
| 552 | :argument None is_leaf_fn: See :func:`TreeNode.traverse` for |
|---|
| 553 | documentation. |
|---|
| 554 | """ |
|---|
| 555 | return [n for n in self.iter_leaves(is_leaf_fn=is_leaf_fn)] |
|---|
| 556 | |
|---|
| 557 | def iter_leaf_names(self, is_leaf_fn=None): |
|---|
| 558 | """ |
|---|
| 559 | Returns an iterator over the leaf names under this node. |
|---|
| 560 | |
|---|
| 561 | :argument None is_leaf_fn: See :func:`TreeNode.traverse` for |
|---|
| 562 | documentation. |
|---|
| 563 | """ |
|---|
| 564 | for n in self.iter_leaves(is_leaf_fn=is_leaf_fn): |
|---|
| 565 | yield n.name |
|---|
| 566 | |
|---|
| 567 | def get_leaf_names(self, is_leaf_fn=None): |
|---|
| 568 | """ |
|---|
| 569 | Returns the list of terminal node names under the current |
|---|
| 570 | node. |
|---|
| 571 | |
|---|
| 572 | :argument None is_leaf_fn: See :func:`TreeNode.traverse` for |
|---|
| 573 | documentation. |
|---|
| 574 | """ |
|---|
| 575 | return [name for name in self.iter_leaf_names(is_leaf_fn=is_leaf_fn)] |
|---|
| 576 | |
|---|
| 577 | def iter_descendants(self, strategy="levelorder", is_leaf_fn=None): |
|---|
| 578 | """ |
|---|
| 579 | Returns an iterator over all descendant nodes. |
|---|
| 580 | |
|---|
| 581 | :argument None is_leaf_fn: See :func:`TreeNode.traverse` for |
|---|
| 582 | documentation. |
|---|
| 583 | """ |
|---|
| 584 | for n in self.traverse(strategy=strategy, is_leaf_fn=is_leaf_fn): |
|---|
| 585 | if n is not self: |
|---|
| 586 | yield n |
|---|
| 587 | |
|---|
| 588 | def get_descendants(self, strategy="levelorder", is_leaf_fn=None): |
|---|
| 589 | """ |
|---|
| 590 | Returns a list of all (leaves and internal) descendant nodes. |
|---|
| 591 | |
|---|
| 592 | :argument None is_leaf_fn: See :func:`TreeNode.traverse` for |
|---|
| 593 | documentation. |
|---|
| 594 | """ |
|---|
| 595 | return [n for n in self.iter_descendants(strategy=strategy, \ |
|---|
| 596 | is_leaf_fn=is_leaf_fn)] |
|---|
| 597 | |
|---|
| 598 | def traverse(self, strategy="levelorder", is_leaf_fn=None): |
|---|
| 599 | """ |
|---|
| 600 | Returns an iterator to traverse the tree structure under this |
|---|
| 601 | node. |
|---|
| 602 | |
|---|
| 603 | :argument "levelorder" strategy: set the way in which tree |
|---|
| 604 | will be traversed. Possible values are: "preorder" (first |
|---|
| 605 | parent and then children) 'postorder' (first children and |
|---|
| 606 | the parent) and "levelorder" (nodes are visited in order |
|---|
| 607 | from root to leaves) |
|---|
| 608 | |
|---|
| 609 | :argument None is_leaf_fn: If supplied, ``is_leaf_fn`` |
|---|
| 610 | function will be used to interrogate nodes about if they |
|---|
| 611 | are terminal or internal. ``is_leaf_fn`` function should |
|---|
| 612 | receive a node instance as first argument and return True |
|---|
| 613 | or False. Use this argument to traverse a tree by |
|---|
| 614 | dynamically collapsing internal nodes matching |
|---|
| 615 | ``is_leaf_fn``. |
|---|
| 616 | """ |
|---|
| 617 | if strategy=="preorder": |
|---|
| 618 | return self._iter_descendants_preorder(is_leaf_fn=is_leaf_fn) |
|---|
| 619 | elif strategy=="levelorder": |
|---|
| 620 | return self._iter_descendants_levelorder(is_leaf_fn=is_leaf_fn) |
|---|
| 621 | elif strategy=="postorder": |
|---|
| 622 | return self._iter_descendants_postorder(is_leaf_fn=is_leaf_fn) |
|---|
| 623 | |
|---|
| 624 | def _iter_descendants_postorder_recursive(self, is_leaf_fn=None): |
|---|
| 625 | """ |
|---|
| 626 | Iterate over all desdecendant nodes. |
|---|
| 627 | """ |
|---|
| 628 | if not is_leaf_fn or not is_leaf_fn(self): |
|---|
| 629 | for ch in self.children: |
|---|
| 630 | for node in ch._iter_descendants_postorder(is_leaf_fn=is_leaf_fn): |
|---|
| 631 | yield node |
|---|
| 632 | yield self |
|---|
| 633 | |
|---|
| 634 | def iter_prepostorder(self, is_leaf_fn=None): |
|---|
| 635 | """ |
|---|
| 636 | Iterate over all nodes in a tree yielding every node in both |
|---|
| 637 | pre and post order. Each iteration returns a postorder flag |
|---|
| 638 | (True if node is being visited in postorder) and a node |
|---|
| 639 | instance. |
|---|
| 640 | """ |
|---|
| 641 | to_visit = [self] |
|---|
| 642 | if is_leaf_fn is not None: |
|---|
| 643 | _leaf = is_leaf_fn |
|---|
| 644 | else: |
|---|
| 645 | _leaf = self.__class__.is_leaf |
|---|
| 646 | |
|---|
| 647 | while to_visit: |
|---|
| 648 | node = to_visit.pop(-1) |
|---|
| 649 | try: |
|---|
| 650 | node = node[1] |
|---|
| 651 | except TypeError: |
|---|
| 652 | # PREORDER ACTIONS |
|---|
| 653 | yield (False, node) |
|---|
| 654 | if not _leaf(node): |
|---|
| 655 | # ADD CHILDREN |
|---|
| 656 | to_visit.extend(reversed(node.children + [[1, node]])) |
|---|
| 657 | else: |
|---|
| 658 | #POSTORDER ACTIONS |
|---|
| 659 | yield (True, node) |
|---|
| 660 | |
|---|
| 661 | def _iter_descendants_postorder(self, is_leaf_fn=None): |
|---|
| 662 | to_visit = [self] |
|---|
| 663 | if is_leaf_fn is not None: |
|---|
| 664 | _leaf = is_leaf_fn |
|---|
| 665 | else: |
|---|
| 666 | _leaf = self.__class__.is_leaf |
|---|
| 667 | |
|---|
| 668 | while to_visit: |
|---|
| 669 | node = to_visit.pop(-1) |
|---|
| 670 | try: |
|---|
| 671 | node = node[1] |
|---|
| 672 | except TypeError: |
|---|
| 673 | # PREORDER ACTIONS |
|---|
| 674 | if not _leaf(node): |
|---|
| 675 | # ADD CHILDREN |
|---|
| 676 | to_visit.extend(reversed(node.children + [[1, node]])) |
|---|
| 677 | else: |
|---|
| 678 | yield node |
|---|
| 679 | else: |
|---|
| 680 | #POSTORDER ACTIONS |
|---|
| 681 | yield node |
|---|
| 682 | |
|---|
| 683 | def _iter_descendants_levelorder(self, is_leaf_fn=None): |
|---|
| 684 | """ |
|---|
| 685 | Iterate over all desdecendant nodes. |
|---|
| 686 | """ |
|---|
| 687 | tovisit = deque([self]) |
|---|
| 688 | while len(tovisit)>0: |
|---|
| 689 | node = tovisit.popleft() |
|---|
| 690 | yield node |
|---|
| 691 | if not is_leaf_fn or not is_leaf_fn(node): |
|---|
| 692 | tovisit.extend(node.children) |
|---|
| 693 | |
|---|
| 694 | def _iter_descendants_preorder(self, is_leaf_fn=None): |
|---|
| 695 | """ |
|---|
| 696 | Iterator over all descendant nodes. |
|---|
| 697 | """ |
|---|
| 698 | to_visit = deque() |
|---|
| 699 | node = self |
|---|
| 700 | while node is not None: |
|---|
| 701 | yield node |
|---|
| 702 | if not is_leaf_fn or not is_leaf_fn(node): |
|---|
| 703 | to_visit.extendleft(reversed(node.children)) |
|---|
| 704 | try: |
|---|
| 705 | node = to_visit.popleft() |
|---|
| 706 | except: |
|---|
| 707 | node = None |
|---|
| 708 | |
|---|
| 709 | def iter_ancestors(self): |
|---|
| 710 | '''versionadded: 2.2 |
|---|
| 711 | |
|---|
| 712 | Iterates over the list of all ancestor nodes from current node |
|---|
| 713 | to the current tree root. |
|---|
| 714 | |
|---|
| 715 | ''' |
|---|
| 716 | node = self |
|---|
| 717 | while node.up is not None: |
|---|
| 718 | yield node.up |
|---|
| 719 | node = node.up |
|---|
| 720 | |
|---|
| 721 | def get_ancestors(self): |
|---|
| 722 | '''versionadded: 2.2 |
|---|
| 723 | |
|---|
| 724 | Returns the list of all ancestor nodes from current node to |
|---|
| 725 | the current tree root. |
|---|
| 726 | |
|---|
| 727 | ''' |
|---|
| 728 | return [n for n in self.iter_ancestors()] |
|---|
| 729 | |
|---|
| 730 | def describe(self): |
|---|
| 731 | """ |
|---|
| 732 | Prints general information about this node and its |
|---|
| 733 | connections. |
|---|
| 734 | """ |
|---|
| 735 | if len(self.get_tree_root().children)==2: |
|---|
| 736 | rooting = "Yes" |
|---|
| 737 | elif len(self.get_tree_root().children)>2: |
|---|
| 738 | rooting = "No" |
|---|
| 739 | else: |
|---|
| 740 | rooting = "Unknown" |
|---|
| 741 | max_node, max_dist = self.get_farthest_leaf() |
|---|
| 742 | cached_content = self.get_cached_content() |
|---|
| 743 | print "Number of leaf nodes:\t%d" % len(cached_content[self]) |
|---|
| 744 | print "Number of internal nodes:\t%d" % len(cached_content) |
|---|
| 745 | print "Rooted:\t%s" %rooting |
|---|
| 746 | print "Most distant node:\t%s" %max_node.name |
|---|
| 747 | print "Max. distance:\t%f" %max_dist |
|---|
| 748 | |
|---|
| 749 | def write(self, features=None, outfile=None, format=0, is_leaf_fn=None, |
|---|
| 750 | format_root_node=False): |
|---|
| 751 | """ |
|---|
| 752 | Returns the newick representation of current node. Several |
|---|
| 753 | arguments control the way in which extra data is shown for |
|---|
| 754 | every node: |
|---|
| 755 | |
|---|
| 756 | :argument features: a list of feature names to be exported |
|---|
| 757 | using the Extended Newick Format (i.e. features=["name", |
|---|
| 758 | "dist"]). Use an empty list to export all available features |
|---|
| 759 | in each node (features=[]) |
|---|
| 760 | |
|---|
| 761 | :argument outfile: writes the output to a given file |
|---|
| 762 | |
|---|
| 763 | :argument format: defines the newick standard used to encode the |
|---|
| 764 | tree. See tutorial for details. |
|---|
| 765 | |
|---|
| 766 | :argument False format_root_node: If True, it allows features |
|---|
| 767 | and branch information from root node to be exported as a |
|---|
| 768 | part of the newick text string. For newick compatibility |
|---|
| 769 | reasons, this is False by default. |
|---|
| 770 | |
|---|
| 771 | :argument is_leaf_fn: See :func:`TreeNode.traverse` for |
|---|
| 772 | documentation. |
|---|
| 773 | |
|---|
| 774 | **Example:** |
|---|
| 775 | |
|---|
| 776 | :: |
|---|
| 777 | |
|---|
| 778 | t.get_newick(features=["species","name"], format=1) |
|---|
| 779 | |
|---|
| 780 | """ |
|---|
| 781 | |
|---|
| 782 | nw = write_newick(self, features = features, format=format, |
|---|
| 783 | is_leaf_fn=is_leaf_fn, |
|---|
| 784 | format_root_node=format_root_node) |
|---|
| 785 | if outfile is not None: |
|---|
| 786 | open(outfile, "w").write(nw) |
|---|
| 787 | else: |
|---|
| 788 | return nw |
|---|
| 789 | |
|---|
| 790 | def get_tree_root(self): |
|---|
| 791 | """ |
|---|
| 792 | Returns the absolute root node of current tree structure. |
|---|
| 793 | """ |
|---|
| 794 | root = self |
|---|
| 795 | while root.up is not None: |
|---|
| 796 | root = root.up |
|---|
| 797 | return root |
|---|
| 798 | |
|---|
| 799 | def get_common_ancestor(self, *target_nodes, **kargs): |
|---|
| 800 | """ |
|---|
| 801 | Returns the first common ancestor between this node and a given |
|---|
| 802 | list of 'target_nodes'. |
|---|
| 803 | |
|---|
| 804 | **Examples:** |
|---|
| 805 | |
|---|
| 806 | :: |
|---|
| 807 | |
|---|
| 808 | t = tree.Tree("(((A:0.1, B:0.01):0.001, C:0.0001):1.0[&&NHX:name=common], (D:0.00001):0.000001):2.0[&&NHX:name=root];") |
|---|
| 809 | A = t.get_descendants_by_name("A")[0] |
|---|
| 810 | C = t.get_descendants_by_name("C")[0] |
|---|
| 811 | common = A.get_common_ancestor(C) |
|---|
| 812 | print common.name |
|---|
| 813 | |
|---|
| 814 | """ |
|---|
| 815 | |
|---|
| 816 | get_path = kargs.get("get_path", False) |
|---|
| 817 | |
|---|
| 818 | if len(target_nodes) == 1 and type(target_nodes[0]) \ |
|---|
| 819 | in set([set, tuple, list, frozenset]): |
|---|
| 820 | target_nodes = target_nodes[0] |
|---|
| 821 | |
|---|
| 822 | # Convert node names into node instances |
|---|
| 823 | target_nodes = _translate_nodes(self, *target_nodes) |
|---|
| 824 | |
|---|
| 825 | # If only one node is provided, use self as the second target |
|---|
| 826 | if type(target_nodes) != list: |
|---|
| 827 | target_nodes = [target_nodes, self] |
|---|
| 828 | elif len(target_nodes)==1: |
|---|
| 829 | target_nodes = tree_nodes.append(self) |
|---|
| 830 | |
|---|
| 831 | n2path = {} |
|---|
| 832 | reference = [] |
|---|
| 833 | ref_node = None |
|---|
| 834 | for n in target_nodes: |
|---|
| 835 | current = n |
|---|
| 836 | while current: |
|---|
| 837 | n2path.setdefault(n, set()).add(current) |
|---|
| 838 | if not ref_node: |
|---|
| 839 | reference.append(current) |
|---|
| 840 | current = current.up |
|---|
| 841 | if not ref_node: |
|---|
| 842 | ref_node = n |
|---|
| 843 | |
|---|
| 844 | common = None |
|---|
| 845 | for n in reference: |
|---|
| 846 | broken = False |
|---|
| 847 | for node, path in n2path.iteritems(): |
|---|
| 848 | if node is not ref_node and n not in path: |
|---|
| 849 | broken = True |
|---|
| 850 | break |
|---|
| 851 | |
|---|
| 852 | if not broken: |
|---|
| 853 | common = n |
|---|
| 854 | break |
|---|
| 855 | if not common: |
|---|
| 856 | raise ValueError("Nodes are not connected!") |
|---|
| 857 | |
|---|
| 858 | if get_path: |
|---|
| 859 | return common, n2path |
|---|
| 860 | else: |
|---|
| 861 | return common |
|---|
| 862 | |
|---|
| 863 | def iter_search_nodes(self, **conditions): |
|---|
| 864 | """ |
|---|
| 865 | Search nodes in an interative way. Matches are being yield as |
|---|
| 866 | they are being found. This avoids to scan the full tree |
|---|
| 867 | topology before returning the first matches. Useful when |
|---|
| 868 | dealing with huge trees. |
|---|
| 869 | """ |
|---|
| 870 | |
|---|
| 871 | for n in self.traverse(): |
|---|
| 872 | conditions_passed = 0 |
|---|
| 873 | for key, value in conditions.iteritems(): |
|---|
| 874 | if hasattr(n, key) and getattr(n, key) == value: |
|---|
| 875 | conditions_passed +=1 |
|---|
| 876 | if conditions_passed == len(conditions): |
|---|
| 877 | yield n |
|---|
| 878 | |
|---|
| 879 | def search_nodes(self, **conditions): |
|---|
| 880 | """ |
|---|
| 881 | Returns the list of nodes matching a given set of conditions. |
|---|
| 882 | |
|---|
| 883 | **Example:** |
|---|
| 884 | |
|---|
| 885 | :: |
|---|
| 886 | |
|---|
| 887 | tree.search_nodes(dist=0.0, name="human") |
|---|
| 888 | |
|---|
| 889 | """ |
|---|
| 890 | matching_nodes = [] |
|---|
| 891 | for n in self.iter_search_nodes(**conditions): |
|---|
| 892 | matching_nodes.append(n) |
|---|
| 893 | return matching_nodes |
|---|
| 894 | |
|---|
| 895 | def get_leaves_by_name(self,name): |
|---|
| 896 | """ |
|---|
| 897 | Returns a list of leaf nodes matching a given name. |
|---|
| 898 | """ |
|---|
| 899 | return self.search_nodes(name=name, children=[]) |
|---|
| 900 | |
|---|
| 901 | def is_leaf(self): |
|---|
| 902 | """ |
|---|
| 903 | Return True if current node is a leaf. |
|---|
| 904 | """ |
|---|
| 905 | return len(self.children) == 0 |
|---|
| 906 | |
|---|
| 907 | def is_root(self): |
|---|
| 908 | """ |
|---|
| 909 | Returns True if current node has no parent |
|---|
| 910 | """ |
|---|
| 911 | if self.up is None: |
|---|
| 912 | return True |
|---|
| 913 | else: |
|---|
| 914 | return False |
|---|
| 915 | |
|---|
| 916 | # ########################### |
|---|
| 917 | # Distance related functions |
|---|
| 918 | # ########################### |
|---|
| 919 | def get_distance(self, target, target2=None, topology_only=False): |
|---|
| 920 | """ |
|---|
| 921 | Returns the distance between two nodes. If only one target is |
|---|
| 922 | specified, it returns the distance bewtween the target and the |
|---|
| 923 | current node. |
|---|
| 924 | |
|---|
| 925 | :argument target: a node within the same tree structure. |
|---|
| 926 | |
|---|
| 927 | :argument target2: a node within the same tree structure. If |
|---|
| 928 | not specified, current node is used as target2. |
|---|
| 929 | |
|---|
| 930 | :argument False topology_only: If set to True, distance will |
|---|
| 931 | refer to the number of nodes between target and target2. |
|---|
| 932 | |
|---|
| 933 | :returns: branch length distance between target and |
|---|
| 934 | target2. If topology_only flag is True, returns the number |
|---|
| 935 | of nodes between target and target2. |
|---|
| 936 | |
|---|
| 937 | """ |
|---|
| 938 | |
|---|
| 939 | if target2 is None: |
|---|
| 940 | target2 = self |
|---|
| 941 | root = self.get_tree_root() |
|---|
| 942 | else: |
|---|
| 943 | # is target node under current node? |
|---|
| 944 | root = self |
|---|
| 945 | |
|---|
| 946 | target, target2 = _translate_nodes(root, target, target2) |
|---|
| 947 | ancestor = root.get_common_ancestor(target, target2) |
|---|
| 948 | if ancestor is None: |
|---|
| 949 | raise TreeError, "Nodes are not connected" |
|---|
| 950 | |
|---|
| 951 | dist = 0.0 |
|---|
| 952 | for n in [target2, target]: |
|---|
| 953 | current = n |
|---|
| 954 | while current != ancestor: |
|---|
| 955 | if topology_only: |
|---|
| 956 | if current!=target: |
|---|
| 957 | dist += 1 |
|---|
| 958 | else: |
|---|
| 959 | dist += current.dist |
|---|
| 960 | current = current.up |
|---|
| 961 | return dist |
|---|
| 962 | |
|---|
| 963 | def get_farthest_node(self, topology_only=False): |
|---|
| 964 | """ |
|---|
| 965 | Returns the node's farthest descendant or ancestor node, and the |
|---|
| 966 | distance to it. |
|---|
| 967 | |
|---|
| 968 | :argument False topology_only: If set to True, distance |
|---|
| 969 | between nodes will be referred to the number of nodes |
|---|
| 970 | between them. In other words, topological distance will be |
|---|
| 971 | used instead of branch length distances. |
|---|
| 972 | |
|---|
| 973 | :return: A tuple containing the farthest node referred to the |
|---|
| 974 | current node and the distance to it. |
|---|
| 975 | |
|---|
| 976 | """ |
|---|
| 977 | # Init fasthest node to current farthest leaf |
|---|
| 978 | farthest_node,farthest_dist = self.get_farthest_leaf(topology_only=topology_only) |
|---|
| 979 | prev = self |
|---|
| 980 | if topology_only: |
|---|
| 981 | cdist = 0 |
|---|
| 982 | else: |
|---|
| 983 | cdist = prev.dist |
|---|
| 984 | current = prev.up |
|---|
| 985 | while current is not None: |
|---|
| 986 | for ch in current.children: |
|---|
| 987 | if ch != prev: |
|---|
| 988 | if not ch.is_leaf(): |
|---|
| 989 | fnode, fdist = ch.get_farthest_leaf(topology_only=topology_only) |
|---|
| 990 | else: |
|---|
| 991 | fnode = ch |
|---|
| 992 | fdist = 0 |
|---|
| 993 | if topology_only: |
|---|
| 994 | fdist += 1.0 |
|---|
| 995 | else: |
|---|
| 996 | fdist += ch.dist |
|---|
| 997 | if cdist+fdist > farthest_dist: |
|---|
| 998 | farthest_dist = cdist + fdist |
|---|
| 999 | farthest_node = fnode |
|---|
| 1000 | prev = current |
|---|
| 1001 | if topology_only: |
|---|
| 1002 | cdist += 1 |
|---|
| 1003 | else: |
|---|
| 1004 | cdist += prev.dist |
|---|
| 1005 | current = prev.up |
|---|
| 1006 | return farthest_node, farthest_dist |
|---|
| 1007 | |
|---|
| 1008 | def get_farthest_leaf(self, topology_only=False): |
|---|
| 1009 | """ |
|---|
| 1010 | Returns node's farthest descendant node (which is always a leaf), and the |
|---|
| 1011 | distance to it. |
|---|
| 1012 | |
|---|
| 1013 | :argument False topology_only: If set to True, distance |
|---|
| 1014 | between nodes will be referred to the number of nodes |
|---|
| 1015 | between them. In other words, topological distance will be |
|---|
| 1016 | used instead of branch length distances. |
|---|
| 1017 | |
|---|
| 1018 | :return: A tuple containing the farthest leaf referred to the |
|---|
| 1019 | current node and the distance to it. |
|---|
| 1020 | """ |
|---|
| 1021 | max_dist = 0.0 |
|---|
| 1022 | max_node = None |
|---|
| 1023 | if self.is_leaf(): |
|---|
| 1024 | return self, 0.0 |
|---|
| 1025 | else: |
|---|
| 1026 | for ch in self.children: |
|---|
| 1027 | node, d = ch.get_farthest_leaf(topology_only=topology_only) |
|---|
| 1028 | if topology_only: |
|---|
| 1029 | d += 1.0 |
|---|
| 1030 | else: |
|---|
| 1031 | d += ch.dist |
|---|
| 1032 | if d>=max_dist: |
|---|
| 1033 | max_dist = d |
|---|
| 1034 | max_node = node |
|---|
| 1035 | return max_node, max_dist |
|---|
| 1036 | |
|---|
| 1037 | def get_closest_leaf(self, topology_only=False): |
|---|
| 1038 | """Returns node's closest descendant leaf and the distance to |
|---|
| 1039 | it. |
|---|
| 1040 | |
|---|
| 1041 | :argument False topology_only: If set to True, distance |
|---|
| 1042 | between nodes will be referred to the number of nodes |
|---|
| 1043 | between them. In other words, topological distance will be |
|---|
| 1044 | used instead of branch length distances. |
|---|
| 1045 | |
|---|
| 1046 | :return: A tuple containing the closest leaf referred to the |
|---|
| 1047 | current node and the distance to it. |
|---|
| 1048 | |
|---|
| 1049 | """ |
|---|
| 1050 | min_dist = None |
|---|
| 1051 | min_node = None |
|---|
| 1052 | if self.is_leaf(): |
|---|
| 1053 | return self, 0.0 |
|---|
| 1054 | else: |
|---|
| 1055 | for ch in self.children: |
|---|
| 1056 | node, d = ch.get_closest_leaf(topology_only=topology_only) |
|---|
| 1057 | if topology_only: |
|---|
| 1058 | d += 1.0 |
|---|
| 1059 | else: |
|---|
| 1060 | d += ch.dist |
|---|
| 1061 | if min_dist is None or d<min_dist: |
|---|
| 1062 | min_dist = d |
|---|
| 1063 | min_node = node |
|---|
| 1064 | return min_node, min_dist |
|---|
| 1065 | |
|---|
| 1066 | |
|---|
| 1067 | |
|---|
| 1068 | def get_midpoint_outgroup(self): |
|---|
| 1069 | """ |
|---|
| 1070 | Returns the node that divides the current tree into two distance-balanced |
|---|
| 1071 | partitions. |
|---|
| 1072 | """ |
|---|
| 1073 | # Gets the farthest node to the current root |
|---|
| 1074 | root = self.get_tree_root() |
|---|
| 1075 | nA , r2A_dist = root.get_farthest_leaf() |
|---|
| 1076 | nB , A2B_dist = nA.get_farthest_node() |
|---|
| 1077 | |
|---|
| 1078 | outgroup = nA |
|---|
| 1079 | middist = A2B_dist / 2.0 |
|---|
| 1080 | cdist = 0 |
|---|
| 1081 | current = nA |
|---|
| 1082 | while current is not None: |
|---|
| 1083 | cdist += current.dist |
|---|
| 1084 | if cdist > (middist): # Deja de subir cuando se pasa del maximo |
|---|
| 1085 | break |
|---|
| 1086 | else: |
|---|
| 1087 | current = current.up |
|---|
| 1088 | return current |
|---|
| 1089 | |
|---|
| 1090 | def populate(self, size, names_library=None, reuse_names=False, |
|---|
| 1091 | random_branches=False, branch_range=(0,1), |
|---|
| 1092 | support_range=(0,1)): |
|---|
| 1093 | """ |
|---|
| 1094 | Generates a random topology by populating current node. |
|---|
| 1095 | |
|---|
| 1096 | :argument None names_library: If provided, names library |
|---|
| 1097 | (list, set, dict, etc.) will be used to name nodes. |
|---|
| 1098 | |
|---|
| 1099 | :argument False reuse_names: If True, node names will not be |
|---|
| 1100 | necessarily unique, which makes the process a bit more |
|---|
| 1101 | efficient. |
|---|
| 1102 | |
|---|
| 1103 | :argument False random_branches: If True, branch distances and support |
|---|
| 1104 | values will be randomized. |
|---|
| 1105 | |
|---|
| 1106 | :argument (0,1) branch_range: If random_branches is True, this |
|---|
| 1107 | range of values will be used to generate random distances. |
|---|
| 1108 | |
|---|
| 1109 | :argument (0,1) support_range: If random_branches is True, |
|---|
| 1110 | this range of values will be used to generate random branch |
|---|
| 1111 | support values. |
|---|
| 1112 | |
|---|
| 1113 | """ |
|---|
| 1114 | NewNode = self.__class__ |
|---|
| 1115 | |
|---|
| 1116 | if len(self.children) > 1: |
|---|
| 1117 | connector = NewNode() |
|---|
| 1118 | for ch in self.get_children(): |
|---|
| 1119 | ch.detach() |
|---|
| 1120 | connector.add_child(child = ch) |
|---|
| 1121 | root = NewNode() |
|---|
| 1122 | self.add_child(child = connector) |
|---|
| 1123 | self.add_child(child = root) |
|---|
| 1124 | else: |
|---|
| 1125 | root = self |
|---|
| 1126 | |
|---|
| 1127 | next = deque([root]) |
|---|
| 1128 | for i in xrange(size-1): |
|---|
| 1129 | if random.randint(0, 1): |
|---|
| 1130 | p = next.pop() |
|---|
| 1131 | else: |
|---|
| 1132 | p = next.popleft() |
|---|
| 1133 | |
|---|
| 1134 | c1 = p.add_child() |
|---|
| 1135 | c2 = p.add_child() |
|---|
| 1136 | next.extend([c1, c2]) |
|---|
| 1137 | if random_branches: |
|---|
| 1138 | c1.dist = random.uniform(*branch_range) |
|---|
| 1139 | c2.dist = random.uniform(*branch_range) |
|---|
| 1140 | c1.support = random.uniform(*branch_range) |
|---|
| 1141 | c2.support = random.uniform(*branch_range) |
|---|
| 1142 | else: |
|---|
| 1143 | c1.dist = 1.0 |
|---|
| 1144 | c2.dist = 1.0 |
|---|
| 1145 | c1.support = 1.0 |
|---|
| 1146 | c2.support = 1.0 |
|---|
| 1147 | |
|---|
| 1148 | # next contains leaf nodes |
|---|
| 1149 | charset = "abcdefghijklmnopqrstuvwxyz" |
|---|
| 1150 | if names_library: |
|---|
| 1151 | names_library = deque(names_library) |
|---|
| 1152 | else: |
|---|
| 1153 | avail_names = itertools.combinations_with_replacement(charset, 10) |
|---|
| 1154 | for n in next: |
|---|
| 1155 | if names_library: |
|---|
| 1156 | if reuse_names: |
|---|
| 1157 | tname = random.sample(names_library, 1)[0] |
|---|
| 1158 | else: |
|---|
| 1159 | tname = names_library.pop() |
|---|
| 1160 | else: |
|---|
| 1161 | tname = ''.join(avail_names.next()) |
|---|
| 1162 | n.name = tname |
|---|
| 1163 | |
|---|
| 1164 | def set_outgroup(self, outgroup): |
|---|
| 1165 | """ |
|---|
| 1166 | Sets a descendant node as the outgroup of a tree. This function |
|---|
| 1167 | can be used to root a tree or even an internal node. |
|---|
| 1168 | |
|---|
| 1169 | :argument outgroup: a node instance within the same tree |
|---|
| 1170 | structure that will be used as a basal node. |
|---|
| 1171 | |
|---|
| 1172 | """ |
|---|
| 1173 | |
|---|
| 1174 | outgroup = _translate_nodes(self, outgroup) |
|---|
| 1175 | |
|---|
| 1176 | if self == outgroup: |
|---|
| 1177 | raise ValueError, "Cannot set myself as outgroup" |
|---|
| 1178 | |
|---|
| 1179 | parent_outgroup = outgroup.up |
|---|
| 1180 | |
|---|
| 1181 | # Detects (sub)tree root |
|---|
| 1182 | n = outgroup |
|---|
| 1183 | while n.up is not self: |
|---|
| 1184 | n = n.up |
|---|
| 1185 | |
|---|
| 1186 | # If outgroup is a child from root, but with more than one |
|---|
| 1187 | # sister nodes, creates a new node to group them |
|---|
| 1188 | |
|---|
| 1189 | self.children.remove(n) |
|---|
| 1190 | if len(self.children) != 1: |
|---|
| 1191 | down_branch_connector = self.__class__() |
|---|
| 1192 | down_branch_connector.dist = 0.0 |
|---|
| 1193 | down_branch_connector.support = n.support |
|---|
| 1194 | for ch in self.get_children(): |
|---|
| 1195 | down_branch_connector.children.append(ch) |
|---|
| 1196 | ch.up = down_branch_connector |
|---|
| 1197 | self.children.remove(ch) |
|---|
| 1198 | else: |
|---|
| 1199 | down_branch_connector = self.children[0] |
|---|
| 1200 | |
|---|
| 1201 | # Connects down branch to myself or to outgroup |
|---|
| 1202 | quien_va_ser_padre = parent_outgroup |
|---|
| 1203 | if quien_va_ser_padre is not self: |
|---|
| 1204 | # Parent-child swapping |
|---|
| 1205 | quien_va_ser_hijo = quien_va_ser_padre.up |
|---|
| 1206 | quien_fue_padre = None |
|---|
| 1207 | buffered_dist = quien_va_ser_padre.dist |
|---|
| 1208 | buffered_support = quien_va_ser_padre.support |
|---|
| 1209 | |
|---|
| 1210 | while quien_va_ser_hijo is not self: |
|---|
| 1211 | quien_va_ser_padre.children.append(quien_va_ser_hijo) |
|---|
| 1212 | quien_va_ser_hijo.children.remove(quien_va_ser_padre) |
|---|
| 1213 | |
|---|
| 1214 | buffered_dist2 = quien_va_ser_hijo.dist |
|---|
| 1215 | buffered_support2 = quien_va_ser_hijo.support |
|---|
| 1216 | quien_va_ser_hijo.dist = buffered_dist |
|---|
| 1217 | quien_va_ser_hijo.support = buffered_support |
|---|
| 1218 | buffered_dist = buffered_dist2 |
|---|
| 1219 | buffered_support = buffered_support2 |
|---|
| 1220 | |
|---|
| 1221 | quien_va_ser_padre.up = quien_fue_padre |
|---|
| 1222 | quien_fue_padre = quien_va_ser_padre |
|---|
| 1223 | |
|---|
| 1224 | quien_va_ser_padre = quien_va_ser_hijo |
|---|
| 1225 | quien_va_ser_hijo = quien_va_ser_padre.up |
|---|
| 1226 | |
|---|
| 1227 | quien_va_ser_padre.children.append(down_branch_connector) |
|---|
| 1228 | down_branch_connector.up = quien_va_ser_padre |
|---|
| 1229 | quien_va_ser_padre.up = quien_fue_padre |
|---|
| 1230 | |
|---|
| 1231 | down_branch_connector.dist += buffered_dist |
|---|
| 1232 | outgroup2 = parent_outgroup |
|---|
| 1233 | parent_outgroup.children.remove(outgroup) |
|---|
| 1234 | outgroup2.dist = 0 |
|---|
| 1235 | |
|---|
| 1236 | else: |
|---|
| 1237 | outgroup2 = down_branch_connector |
|---|
| 1238 | |
|---|
| 1239 | outgroup.up = self |
|---|
| 1240 | outgroup2.up = self |
|---|
| 1241 | # outgroup is always the first children. Some function my |
|---|
| 1242 | # trust on this fact, so do no change this. |
|---|
| 1243 | self.children = [outgroup,outgroup2] |
|---|
| 1244 | middist = (outgroup2.dist + outgroup.dist)/2 |
|---|
| 1245 | outgroup.dist = middist |
|---|
| 1246 | outgroup2.dist = middist |
|---|
| 1247 | outgroup2.support = outgroup.support |
|---|
| 1248 | |
|---|
| 1249 | def unroot(self): |
|---|
| 1250 | """ |
|---|
| 1251 | Unroots current node. This function is expected to be used on |
|---|
| 1252 | the absolute tree root node, but it can be also be applied to |
|---|
| 1253 | any other internal node. It will convert a split into a |
|---|
| 1254 | multifurcation. |
|---|
| 1255 | """ |
|---|
| 1256 | # if is rooted |
|---|
| 1257 | if not self.is_root(): |
|---|
| 1258 | print >>sys.stderr, "Warning. You are unrooting an internal node.!!" |
|---|
| 1259 | if len(self.children)==2: |
|---|
| 1260 | if not self.children[0].is_leaf(): |
|---|
| 1261 | self.children[0].delete() |
|---|
| 1262 | elif not self.children[1].is_leaf(): |
|---|
| 1263 | self.children[1].delete() |
|---|
| 1264 | else: |
|---|
| 1265 | raise TreeError, "Cannot unroot a tree with only two leaves" |
|---|
| 1266 | |
|---|
| 1267 | def show(self, layout=None, tree_style=None, name="ETE"): |
|---|
| 1268 | """ |
|---|
| 1269 | Starts an interative session to visualize current node |
|---|
| 1270 | structure using provided layout and TreeStyle. |
|---|
| 1271 | |
|---|
| 1272 | """ |
|---|
| 1273 | try: |
|---|
| 1274 | from ete2.treeview import drawer |
|---|
| 1275 | except ImportError, e: |
|---|
| 1276 | print "'treeview' module could not be loaded.\n",e |
|---|
| 1277 | print "\n\n" |
|---|
| 1278 | print e |
|---|
| 1279 | else: |
|---|
| 1280 | drawer.show_tree(self, layout=layout, |
|---|
| 1281 | tree_style=tree_style, win_name=name) |
|---|
| 1282 | |
|---|
| 1283 | def render(self, file_name, layout=None, w=None, h=None, \ |
|---|
| 1284 | tree_style=None, units="px", dpi=90): |
|---|
| 1285 | """ |
|---|
| 1286 | Renders the node structure as an image. |
|---|
| 1287 | |
|---|
| 1288 | :var file_name: path to the output image file. valid |
|---|
| 1289 | extensions are .SVG, .PDF, .PNG |
|---|
| 1290 | |
|---|
| 1291 | :var layout: a layout function or a valid layout function name |
|---|
| 1292 | |
|---|
| 1293 | :var tree_style: a `TreeStyle` instance containing the image |
|---|
| 1294 | properties |
|---|
| 1295 | |
|---|
| 1296 | :var px units: "px": pixels, "mm": millimeters, "in": inches |
|---|
| 1297 | :var None h: height of the image in :attr:`units` |
|---|
| 1298 | :var None w: weight of the image in :attr:`units` |
|---|
| 1299 | :var 300 dpi: dots per inches. |
|---|
| 1300 | |
|---|
| 1301 | """ |
|---|
| 1302 | |
|---|
| 1303 | try: |
|---|
| 1304 | from ete2.treeview import drawer |
|---|
| 1305 | except ImportError, e: |
|---|
| 1306 | print "'treeview' module could not be loaded.\n",e |
|---|
| 1307 | print "\n\n" |
|---|
| 1308 | print e |
|---|
| 1309 | else: |
|---|
| 1310 | return drawer.render_tree(self, file_name, w=w, h=h, |
|---|
| 1311 | layout=layout, tree_style=tree_style, |
|---|
| 1312 | units=units, dpi=dpi) |
|---|
| 1313 | |
|---|
| 1314 | def copy(self, method="cpickle"): |
|---|
| 1315 | """.. versionadded: 2.1 |
|---|
| 1316 | |
|---|
| 1317 | Returns a copy of the current node. |
|---|
| 1318 | |
|---|
| 1319 | :var cpickle method: Protocol used to copy the node |
|---|
| 1320 | structure. The following values are accepted: |
|---|
| 1321 | |
|---|
| 1322 | - "newick": Tree topology, node names, branch lengths and |
|---|
| 1323 | branch support values will be copied by as represented in |
|---|
| 1324 | the newick string (copy by newick string serialisation). |
|---|
| 1325 | |
|---|
| 1326 | - "newick-extended": Tree topology and all node features |
|---|
| 1327 | will be copied based on the extended newick format |
|---|
| 1328 | representation. Only node features will be copied, thus |
|---|
| 1329 | excluding other node attributes. As this method is also |
|---|
| 1330 | based on newick serialisation, features will be converted |
|---|
| 1331 | into text strings when making the copy. |
|---|
| 1332 | |
|---|
| 1333 | - "cpickle": The whole node structure and its content is |
|---|
| 1334 | cloned based on cPickle object serialisation (slower, but |
|---|
| 1335 | recommended for full tree copying) |
|---|
| 1336 | |
|---|
| 1337 | - "deepcopy": The whole node structure and its content is |
|---|
| 1338 | copied based on the standard "copy" Python functionality |
|---|
| 1339 | (this is the slowest method but it allows to copy complex |
|---|
| 1340 | objects even if attributes point to lambda functions, |
|---|
| 1341 | etc.) |
|---|
| 1342 | |
|---|
| 1343 | """ |
|---|
| 1344 | if method=="newick": |
|---|
| 1345 | new_node = self.__class__(self.write(features=["name"], format_root_node=True)) |
|---|
| 1346 | elif method=="newick-extended": |
|---|
| 1347 | self.write(features=[], format_root_node=True) |
|---|
| 1348 | new_node = self.__class__(self.write(features=[])) |
|---|
| 1349 | elif method == "deepcopy": |
|---|
| 1350 | parent = self.up |
|---|
| 1351 | self.up = None |
|---|
| 1352 | new_node = copy.deepcopy(self) |
|---|
| 1353 | self.up = parent |
|---|
| 1354 | elif method == "cpickle": |
|---|
| 1355 | parent = self.up |
|---|
| 1356 | self.up = None |
|---|
| 1357 | new_node = cPickle.loads(cPickle.dumps(self, 2)) |
|---|
| 1358 | self.up = parent |
|---|
| 1359 | else: |
|---|
| 1360 | raise ValuerError("Invalid copy method") |
|---|
| 1361 | |
|---|
| 1362 | return new_node |
|---|
| 1363 | |
|---|
| 1364 | def _asciiArt(self, char1='-', show_internal=True, compact=False, attributes=None): |
|---|
| 1365 | """ |
|---|
| 1366 | Returns the ASCII representation of the tree. |
|---|
| 1367 | |
|---|
| 1368 | Code based on the PyCogent GPL project. |
|---|
| 1369 | """ |
|---|
| 1370 | if not attributes: |
|---|
| 1371 | attributes = ["name"] |
|---|
| 1372 | node_name = ', '.join(map(str, [getattr(self, v) for v in attributes if hasattr(self, v)])) |
|---|
| 1373 | |
|---|
| 1374 | LEN = max(3, len(node_name) if not self.children or show_internal else 3) |
|---|
| 1375 | PAD = ' ' * LEN |
|---|
| 1376 | PA = ' ' * (LEN-1) |
|---|
| 1377 | if not self.is_leaf(): |
|---|
| 1378 | mids = [] |
|---|
| 1379 | result = [] |
|---|
| 1380 | for c in self.children: |
|---|
| 1381 | if len(self.children) == 1: |
|---|
| 1382 | char2 = '/' |
|---|
| 1383 | elif c is self.children[0]: |
|---|
| 1384 | char2 = '/' |
|---|
| 1385 | elif c is self.children[-1]: |
|---|
| 1386 | char2 = '\\' |
|---|
| 1387 | else: |
|---|
| 1388 | char2 = '-' |
|---|
| 1389 | (clines, mid) = c._asciiArt(char2, show_internal, compact, attributes) |
|---|
| 1390 | mids.append(mid+len(result)) |
|---|
| 1391 | result.extend(clines) |
|---|
| 1392 | if not compact: |
|---|
| 1393 | result.append('') |
|---|
| 1394 | if not compact: |
|---|
| 1395 | result.pop() |
|---|
| 1396 | (lo, hi, end) = (mids[0], mids[-1], len(result)) |
|---|
| 1397 | prefixes = [PAD] * (lo+1) + [PA+'|'] * (hi-lo-1) + [PAD] * (end-hi) |
|---|
| 1398 | mid = (lo + hi) / 2 |
|---|
| 1399 | prefixes[mid] = char1 + '-'*(LEN-2) + prefixes[mid][-1] |
|---|
| 1400 | result = [p+l for (p,l) in zip(prefixes, result)] |
|---|
| 1401 | if show_internal: |
|---|
| 1402 | stem = result[mid] |
|---|
| 1403 | result[mid] = stem[0] + node_name + stem[len(node_name)+1:] |
|---|
| 1404 | return (result, mid) |
|---|
| 1405 | else: |
|---|
| 1406 | return ([char1 + '-' + node_name], 0) |
|---|
| 1407 | |
|---|
| 1408 | def get_ascii(self, show_internal=True, compact=False, attributes=None): |
|---|
| 1409 | """ |
|---|
| 1410 | Returns a string containing an ascii drawing of the tree. |
|---|
| 1411 | |
|---|
| 1412 | :argument show_internal: includes internal edge names. |
|---|
| 1413 | :argument compact: use exactly one line per tip. |
|---|
| 1414 | |
|---|
| 1415 | :param attributes: A list of node attributes to shown in the |
|---|
| 1416 | ASCII representation. |
|---|
| 1417 | |
|---|
| 1418 | """ |
|---|
| 1419 | (lines, mid) = self._asciiArt(show_internal=show_internal, |
|---|
| 1420 | compact=compact, attributes=attributes) |
|---|
| 1421 | return '\n'+'\n'.join(lines) |
|---|
| 1422 | |
|---|
| 1423 | def ladderize(self, direction=0): |
|---|
| 1424 | """ |
|---|
| 1425 | .. versionadded: 2.1 |
|---|
| 1426 | |
|---|
| 1427 | Sort the branches of a given tree (swapping children nodes) |
|---|
| 1428 | according to the size of each partition. |
|---|
| 1429 | |
|---|
| 1430 | :: |
|---|
| 1431 | |
|---|
| 1432 | t = Tree("(f,((d, ((a,b),c)),e));") |
|---|
| 1433 | |
|---|
| 1434 | print t |
|---|
| 1435 | |
|---|
| 1436 | # |
|---|
| 1437 | # /-f |
|---|
| 1438 | # | |
|---|
| 1439 | # | /-d |
|---|
| 1440 | # ----| | |
|---|
| 1441 | # | /---| /-a |
|---|
| 1442 | # | | | /---| |
|---|
| 1443 | # | | \---| \-b |
|---|
| 1444 | # \---| | |
|---|
| 1445 | # | \-c |
|---|
| 1446 | # | |
|---|
| 1447 | # \-e |
|---|
| 1448 | |
|---|
| 1449 | t.ladderize() |
|---|
| 1450 | print t |
|---|
| 1451 | |
|---|
| 1452 | # /-f |
|---|
| 1453 | # ----| |
|---|
| 1454 | # | /-e |
|---|
| 1455 | # \---| |
|---|
| 1456 | # | /-d |
|---|
| 1457 | # \---| |
|---|
| 1458 | # | /-c |
|---|
| 1459 | # \---| |
|---|
| 1460 | # | /-a |
|---|
| 1461 | # \---| |
|---|
| 1462 | # \-b |
|---|
| 1463 | |
|---|
| 1464 | """ |
|---|
| 1465 | |
|---|
| 1466 | if not self.is_leaf(): |
|---|
| 1467 | n2s = {} |
|---|
| 1468 | for n in self.get_children(): |
|---|
| 1469 | s = n.ladderize(direction=direction) |
|---|
| 1470 | n2s[n] = s |
|---|
| 1471 | |
|---|
| 1472 | self.children.sort(lambda x,y: cmp(n2s[x], n2s[y])) |
|---|
| 1473 | if direction == 1: |
|---|
| 1474 | self.children.reverse() |
|---|
| 1475 | size = sum(n2s.values()) |
|---|
| 1476 | else: |
|---|
| 1477 | size = 1 |
|---|
| 1478 | |
|---|
| 1479 | return size |
|---|
| 1480 | |
|---|
| 1481 | def sort_descendants(self): |
|---|
| 1482 | """ |
|---|
| 1483 | .. versionadded: 2.1 |
|---|
| 1484 | |
|---|
| 1485 | This function sort the branches of a given tree by |
|---|
| 1486 | considerening node names. After the tree is sorted, nodes are |
|---|
| 1487 | labeled using ascendent numbers. This can be used to ensure |
|---|
| 1488 | that nodes in a tree with the same node names are always |
|---|
| 1489 | labeled in the same way. Note that if duplicated names are |
|---|
| 1490 | present, extra criteria should be added to sort nodes. |
|---|
| 1491 | |
|---|
| 1492 | Unique id is stored as a node._nid attribute |
|---|
| 1493 | |
|---|
| 1494 | """ |
|---|
| 1495 | |
|---|
| 1496 | node2content = self.get_cached_content() |
|---|
| 1497 | def sort_by_content(x, y): |
|---|
| 1498 | return cmp(str(sorted([i.name for i in node2content[x]])), |
|---|
| 1499 | str(sorted([i.name for i in node2content[y]]))) |
|---|
| 1500 | |
|---|
| 1501 | for n in self.traverse(): |
|---|
| 1502 | if not n.is_leaf(): |
|---|
| 1503 | n.children.sort(sort_by_content) |
|---|
| 1504 | return node2content |
|---|
| 1505 | |
|---|
| 1506 | def get_cached_content(self, store_attr=None, _store=None): |
|---|
| 1507 | """ |
|---|
| 1508 | .. versionadded: 2.2 |
|---|
| 1509 | |
|---|
| 1510 | Returns a dictionary pointing to the preloaded content of each |
|---|
| 1511 | internal node under this tree. Such a dictionary is intended |
|---|
| 1512 | to work as a cache for operations that require many traversal |
|---|
| 1513 | operations. |
|---|
| 1514 | |
|---|
| 1515 | :param None store_attr: Specifies the node attribute that |
|---|
| 1516 | should be cached (i.e. name, distance, etc.). When none, the |
|---|
| 1517 | whole node instance is cached. |
|---|
| 1518 | |
|---|
| 1519 | :param _store: (internal use) |
|---|
| 1520 | |
|---|
| 1521 | """ |
|---|
| 1522 | if _store is None: |
|---|
| 1523 | _store = {} |
|---|
| 1524 | |
|---|
| 1525 | for ch in self.children: |
|---|
| 1526 | ch.get_cached_content(store_attr=store_attr, _store=_store) |
|---|
| 1527 | |
|---|
| 1528 | if self.children: |
|---|
| 1529 | val = set() |
|---|
| 1530 | for ch in self.children: |
|---|
| 1531 | val.update(_store[ch]) |
|---|
| 1532 | _store[self] = val |
|---|
| 1533 | else: |
|---|
| 1534 | if store_attr is None: |
|---|
| 1535 | val = self |
|---|
| 1536 | else: |
|---|
| 1537 | val = getattr(self, store_attr) |
|---|
| 1538 | _store[self] = set([val]) |
|---|
| 1539 | return _store |
|---|
| 1540 | |
|---|
| 1541 | def robinson_foulds(self, t2, attr_t1="name", attr_t2="name"): |
|---|
| 1542 | """ |
|---|
| 1543 | .. versionadded: 2.2 |
|---|
| 1544 | |
|---|
| 1545 | Returns the Robinson-Foulds symmetric distance between current |
|---|
| 1546 | tree and a different tree instance. |
|---|
| 1547 | |
|---|
| 1548 | :param t2: target tree |
|---|
| 1549 | |
|---|
| 1550 | :param name attr_t1: Compare trees using a custom node |
|---|
| 1551 | attribute as a node name. |
|---|
| 1552 | |
|---|
| 1553 | :param name attr_t2: Compare trees using a custom node |
|---|
| 1554 | attribute as a node name in target tree. |
|---|
| 1555 | |
|---|
| 1556 | :returns: (symmetric distance, total partitions, common node |
|---|
| 1557 | names, partitions in current tree, partitions in target tree) |
|---|
| 1558 | |
|---|
| 1559 | """ |
|---|
| 1560 | |
|---|
| 1561 | t1 = self |
|---|
| 1562 | t1content = t1.get_cached_content() |
|---|
| 1563 | t2content = t2.get_cached_content() |
|---|
| 1564 | target_names = set([getattr(_n, attr_t1) for _n in t1content[t1]]) |
|---|
| 1565 | ref_names = set([getattr(_n, attr_t2) for _n in t2content[t2]]) |
|---|
| 1566 | common_names = target_names & ref_names |
|---|
| 1567 | if len(common_names) < 2: |
|---|
| 1568 | raise ValueError("Trees share less than 2 nodes") |
|---|
| 1569 | |
|---|
| 1570 | r1 = set([",".join(sorted([getattr(_c, attr_t1) for _c in cont |
|---|
| 1571 | if getattr(_c, attr_t1) in common_names])) |
|---|
| 1572 | for cont in t1content.values() if len(cont)>1]) |
|---|
| 1573 | r2 = set([",".join(sorted([getattr(_c, attr_t2) for _c in cont |
|---|
| 1574 | if getattr(_c, attr_t2) in common_names])) |
|---|
| 1575 | for cont in t2content.values() if len(cont)>1]) |
|---|
| 1576 | r1.discard("") |
|---|
| 1577 | r2.discard("") |
|---|
| 1578 | inters = r1.intersection(r2) |
|---|
| 1579 | if len(r1) == len(r2): |
|---|
| 1580 | rf = (len(r1) - len(inters)) * 2 |
|---|
| 1581 | else : |
|---|
| 1582 | rf = (len(r1) - len(inters)) + (len(r2) - len(inters)) |
|---|
| 1583 | max_parts = len(r1) + len(r2) |
|---|
| 1584 | return rf, max_parts, common_names, r1, r2 |
|---|
| 1585 | |
|---|
| 1586 | def get_partitions(self): |
|---|
| 1587 | """ |
|---|
| 1588 | .. versionadded: 2.1 |
|---|
| 1589 | |
|---|
| 1590 | It returns the set of all possible partitions under a |
|---|
| 1591 | node. Note that current implementation is quite inefficient |
|---|
| 1592 | when used in very large trees. |
|---|
| 1593 | |
|---|
| 1594 | t = Tree("((a, b), e);") |
|---|
| 1595 | partitions = t.get_partitions() |
|---|
| 1596 | |
|---|
| 1597 | # Will return: |
|---|
| 1598 | # a,b,e |
|---|
| 1599 | # a,e |
|---|
| 1600 | # b,e |
|---|
| 1601 | # a,b |
|---|
| 1602 | # e |
|---|
| 1603 | # b |
|---|
| 1604 | # a |
|---|
| 1605 | """ |
|---|
| 1606 | all_leaves = frozenset(self.get_leaf_names()) |
|---|
| 1607 | all_partitions = set([all_leaves]) |
|---|
| 1608 | for n in self.iter_descendants(): |
|---|
| 1609 | p1 = frozenset(n.get_leaf_names()) |
|---|
| 1610 | p2 = frozenset(all_leaves - p1) |
|---|
| 1611 | all_partitions.add(p1) |
|---|
| 1612 | all_partitions.add(p2) |
|---|
| 1613 | return all_partitions |
|---|
| 1614 | |
|---|
| 1615 | def convert_to_ultrametric(self, tree_length, strategy="balanced"): |
|---|
| 1616 | """ |
|---|
| 1617 | .. versionadded: 2.1 |
|---|
| 1618 | |
|---|
| 1619 | Converts a tree to ultrametric topology (all leaves must have |
|---|
| 1620 | the same distance to root). Note that, for visual inspection |
|---|
| 1621 | of ultrametric trees, node.img_style["size"] should be set to |
|---|
| 1622 | 0. |
|---|
| 1623 | """ |
|---|
| 1624 | |
|---|
| 1625 | # pre-calculate how many splits remain under each node |
|---|
| 1626 | node2max_depth = {} |
|---|
| 1627 | for node in self.traverse("postorder"): |
|---|
| 1628 | if not node.is_leaf(): |
|---|
| 1629 | max_depth = max([node2max_depth[c] for c in node.children]) + 1 |
|---|
| 1630 | node2max_depth[node] = max_depth |
|---|
| 1631 | else: |
|---|
| 1632 | node2max_depth[node] = 1 |
|---|
| 1633 | node2dist = {self: 0.0} |
|---|
| 1634 | tree_length = float(tree_length) |
|---|
| 1635 | step = tree_length / node2max_depth[self] |
|---|
| 1636 | for node in self.iter_descendants("levelorder"): |
|---|
| 1637 | if strategy == "balanced": |
|---|
| 1638 | node.dist = (tree_length - node2dist[node.up]) / node2max_depth[node] |
|---|
| 1639 | node2dist[node] = node.dist + node2dist[node.up] |
|---|
| 1640 | elif strategy == "fixed": |
|---|
| 1641 | if not node.is_leaf(): |
|---|
| 1642 | node.dist = step |
|---|
| 1643 | else: |
|---|
| 1644 | node.dist = tree_length - ((node2dist[node.up]) * step) |
|---|
| 1645 | node2dist[node] = node2dist[node.up] + 1 |
|---|
| 1646 | node.dist = node.dist |
|---|
| 1647 | |
|---|
| 1648 | def check_monophyly(self, values, target_attr, ignore_missing=False): |
|---|
| 1649 | """ |
|---|
| 1650 | Returns True if a given target attribute is monophyletic under |
|---|
| 1651 | this node for the provided set of values. |
|---|
| 1652 | |
|---|
| 1653 | If not all values are represented in the current tree |
|---|
| 1654 | structure, a ValueError exception will be raised to warn that |
|---|
| 1655 | strict monophyly could never be reached (this behaviour can be |
|---|
| 1656 | avoided by enabling the `ignore_missing` flag. |
|---|
| 1657 | |
|---|
| 1658 | :param values: a set of values for which monophyly is |
|---|
| 1659 | expected. |
|---|
| 1660 | |
|---|
| 1661 | :param target_attr: node attribute being used to check |
|---|
| 1662 | monophyly (i.e. species for species trees, names for gene |
|---|
| 1663 | family trees, or any custom feature present in the tree). |
|---|
| 1664 | |
|---|
| 1665 | :param False ignore_missing: Avoid raising an Exception when |
|---|
| 1666 | missing attributes are found. |
|---|
| 1667 | """ |
|---|
| 1668 | if type(values) != set: |
|---|
| 1669 | values = set(values) |
|---|
| 1670 | |
|---|
| 1671 | # This is the only time I traverse the tree, then I use cached |
|---|
| 1672 | # leaf content |
|---|
| 1673 | n2leaves = self.get_cached_content() |
|---|
| 1674 | # Locate leaves matching requested attribute values |
|---|
| 1675 | targets = [leaf for leaf in n2leaves[self] |
|---|
| 1676 | if getattr(leaf, target_attr) in values] |
|---|
| 1677 | |
|---|
| 1678 | # Raise an error if requested attribute values are not even present |
|---|
| 1679 | if not ignore_missing: |
|---|
| 1680 | missing_values = values - set([getattr(n, target_attr) for n |
|---|
| 1681 | in targets]) |
|---|
| 1682 | if missing_values: |
|---|
| 1683 | raise ValueError("Expected '%s' value(s) not found: %s" %( |
|---|
| 1684 | target_attr, ','.join(missing_values))) |
|---|
| 1685 | |
|---|
| 1686 | # Check monophyly with get_common_ancestor. Note that this |
|---|
| 1687 | # step does not require traversing the tree again because |
|---|
| 1688 | # targets are node instances instead of node names, and |
|---|
| 1689 | # get_common_ancestor function is smart enough to detect it |
|---|
| 1690 | # and avoid unnecessary traversing. |
|---|
| 1691 | common = self.get_common_ancestor(targets) |
|---|
| 1692 | observed = n2leaves[common] |
|---|
| 1693 | foreign_leaves = [leaf for leaf in observed |
|---|
| 1694 | if getattr(leaf, target_attr) not in values] |
|---|
| 1695 | if not foreign_leaves: |
|---|
| 1696 | return True, "monophyletic" |
|---|
| 1697 | else: |
|---|
| 1698 | # if the requested attribute is not monophyletic in this |
|---|
| 1699 | # node, let's differentiate between poly and paraphyly. |
|---|
| 1700 | poly_common = self.get_common_ancestor(foreign_leaves) |
|---|
| 1701 | # if the common ancestor of all foreign leaves is self |
|---|
| 1702 | # contained, we have a paraphyly. Otherwise, polyphyly. |
|---|
| 1703 | polyphyletic = [leaf for leaf in poly_common if |
|---|
| 1704 | getattr(leaf, target_attr) in values] |
|---|
| 1705 | if polyphyletic: |
|---|
| 1706 | return False, "polyphyletic" |
|---|
| 1707 | else: |
|---|
| 1708 | return False, "paraphyletic" |
|---|
| 1709 | |
|---|
| 1710 | def get_monophyletic(self, values, target_attr): |
|---|
| 1711 | """ |
|---|
| 1712 | .. versionadded:: 2.2 |
|---|
| 1713 | |
|---|
| 1714 | Returns a list of nodes matching the provided monophyly |
|---|
| 1715 | criteria. For a node to be considered a match, all |
|---|
| 1716 | `target_attr` values within and node, and exclusively them, |
|---|
| 1717 | should be grouped. |
|---|
| 1718 | |
|---|
| 1719 | :param values: a set of values for which monophyly is |
|---|
| 1720 | expected. |
|---|
| 1721 | |
|---|
| 1722 | :param target_attr: node attribute being used to check |
|---|
| 1723 | monophyly (i.e. species for species trees, names for gene |
|---|
| 1724 | family trees). |
|---|
| 1725 | |
|---|
| 1726 | """ |
|---|
| 1727 | |
|---|
| 1728 | if type(values) != set: |
|---|
| 1729 | values = set(values) |
|---|
| 1730 | |
|---|
| 1731 | n2values = self.get_cached_content(store_attr=target_attr) |
|---|
| 1732 | |
|---|
| 1733 | is_monophyletic = lambda node: n2values[node] == values |
|---|
| 1734 | for match in self.iter_leaves(is_leaf_fn=is_monophyletic): |
|---|
| 1735 | if is_monophyletic(match): |
|---|
| 1736 | yield match |
|---|
| 1737 | |
|---|
| 1738 | def resolve_polytomy(self, default_dist=0.0, default_support=0.0, |
|---|
| 1739 | recursive=True): |
|---|
| 1740 | """ |
|---|
| 1741 | .. versionadded: 2.2 |
|---|
| 1742 | |
|---|
| 1743 | Resolve all polytomies under current node by creating an |
|---|
| 1744 | arbitrary dicotomic structure among the affected nodes. This |
|---|
| 1745 | function randomly modifies current tree topology and should |
|---|
| 1746 | only be used for compatibility reasons (i.e. programs |
|---|
| 1747 | rejecting multifurcated node in the newick representation). |
|---|
| 1748 | |
|---|
| 1749 | :param 0.0 default_dist: artificial branch distance of new |
|---|
| 1750 | nodes. |
|---|
| 1751 | |
|---|
| 1752 | :param 0.0 default_support: artificial branch support of new |
|---|
| 1753 | nodes. |
|---|
| 1754 | |
|---|
| 1755 | :param True recursive: Resolve any polytomy under this |
|---|
| 1756 | node. When False, only current node will be checked and fixed. |
|---|
| 1757 | """ |
|---|
| 1758 | |
|---|
| 1759 | |
|---|
| 1760 | def _resolve(node): |
|---|
| 1761 | if len(node.children) > 2: |
|---|
| 1762 | children = list(node.children) |
|---|
| 1763 | node.children = [] |
|---|
| 1764 | next_node = root = node |
|---|
| 1765 | for i in xrange(len(children)-2): |
|---|
| 1766 | next_node = next_node.add_child() |
|---|
| 1767 | next_node.dist = default_dist |
|---|
| 1768 | next_node.support = default_support |
|---|
| 1769 | |
|---|
| 1770 | next_node = root |
|---|
| 1771 | for ch in children: |
|---|
| 1772 | next_node.add_child(ch) |
|---|
| 1773 | if ch != children[-2]: |
|---|
| 1774 | next_node = next_node.children[0] |
|---|
| 1775 | target = [self] |
|---|
| 1776 | if recursive: |
|---|
| 1777 | target.extend([n for n in self.get_descendants()]) |
|---|
| 1778 | for n in target: |
|---|
| 1779 | _resolve(n) |
|---|
| 1780 | |
|---|
| 1781 | def add_face(self, face, column, position="branch-right"): |
|---|
| 1782 | """ |
|---|
| 1783 | .. versionadded: 2.1 |
|---|
| 1784 | |
|---|
| 1785 | Add a fixed face to the node. This type of faces will be |
|---|
| 1786 | always attached to nodes, independently of the layout |
|---|
| 1787 | function. |
|---|
| 1788 | |
|---|
| 1789 | :argument face: a Face or inherited instance |
|---|
| 1790 | :argument column: An integer number starting from 0 |
|---|
| 1791 | :argument "branch-right" position: Posible values are: |
|---|
| 1792 | "branch-right", "branch-top", "branch-bottom", "float", |
|---|
| 1793 | "aligned" |
|---|
| 1794 | """ |
|---|
| 1795 | |
|---|
| 1796 | if not hasattr(self, "_faces"): |
|---|
| 1797 | self._faces = _FaceAreas() |
|---|
| 1798 | |
|---|
| 1799 | if position not in FACE_POSITIONS: |
|---|
| 1800 | raise ValueError("face position not in %s" %FACE_POSITIONS) |
|---|
| 1801 | |
|---|
| 1802 | if isinstance(face, Face): |
|---|
| 1803 | getattr(self._faces, position).add_face(face, column=column) |
|---|
| 1804 | else: |
|---|
| 1805 | raise ValueError("not a Face instance") |
|---|
| 1806 | |
|---|
| 1807 | def set_style(self, node_style): |
|---|
| 1808 | """ |
|---|
| 1809 | .. versionadded: 2.1 |
|---|
| 1810 | |
|---|
| 1811 | Set 'node_style' as the fixed style for the current node. |
|---|
| 1812 | """ |
|---|
| 1813 | if TREEVIEW: |
|---|
| 1814 | if node_style is None: |
|---|
| 1815 | node_style = NodeStyle() |
|---|
| 1816 | if type(node_style) is NodeStyle: |
|---|
| 1817 | self._img_style = node_style |
|---|
| 1818 | else: |
|---|
| 1819 | raise ValueError("Treeview module is disabled") |
|---|
| 1820 | |
|---|
| 1821 | def phonehome(self): |
|---|
| 1822 | from ete2 import _ph |
|---|
| 1823 | _ph.call() |
|---|
| 1824 | |
|---|
| 1825 | def _translate_nodes(root, *nodes): |
|---|
| 1826 | name2node = dict([ [n, None] for n in nodes if type(n) is str]) |
|---|
| 1827 | for n in root.traverse(): |
|---|
| 1828 | if n.name in name2node: |
|---|
| 1829 | if name2node[n.name] is not None: |
|---|
| 1830 | raise ValueError, "Ambiguous node name: "+str(n.name) |
|---|
| 1831 | else: |
|---|
| 1832 | name2node[n.name] = n |
|---|
| 1833 | |
|---|
| 1834 | if None in name2node.values(): |
|---|
| 1835 | notfound = [key for key, value in name2node.iteritems() if value is None] |
|---|
| 1836 | raise ValueError("Node names not found: "+str(notfound)) |
|---|
| 1837 | |
|---|
| 1838 | valid_nodes = [] |
|---|
| 1839 | for n in nodes: |
|---|
| 1840 | if type(n) is not str: |
|---|
| 1841 | if type(n) is not root.__class__ : |
|---|
| 1842 | raise ValueError, "Invalid target node: "+str(n) |
|---|
| 1843 | else: |
|---|
| 1844 | valid_nodes.append(n) |
|---|
| 1845 | |
|---|
| 1846 | valid_nodes.extend(name2node.values()) |
|---|
| 1847 | if len(valid_nodes) == 1: |
|---|
| 1848 | return valid_nodes[0] |
|---|
| 1849 | else: |
|---|
| 1850 | return valid_nodes |
|---|
| 1851 | |
|---|
| 1852 | def OLD_translate_nodes(root, *nodes): |
|---|
| 1853 | target_nodes = [] |
|---|
| 1854 | for n in nodes: |
|---|
| 1855 | if type(n) is str: |
|---|
| 1856 | mnodes = root.search_nodes(name=n) |
|---|
| 1857 | if len(mnodes) == 0: |
|---|
| 1858 | raise ValueError, "Node name not found: "+str(n) |
|---|
| 1859 | elif len(mnodes)>1: |
|---|
| 1860 | raise ValueError, "Ambiguous node name: "+str(n) |
|---|
| 1861 | else: |
|---|
| 1862 | target_nodes.append(mnodes[0]) |
|---|
| 1863 | elif type(n) != root.__class__: |
|---|
| 1864 | raise ValueError, "Invalid target node: "+str(n) |
|---|
| 1865 | else: |
|---|
| 1866 | target_nodes.append(n) |
|---|
| 1867 | |
|---|
| 1868 | if len(target_nodes) == 1: |
|---|
| 1869 | return target_nodes[0] |
|---|
| 1870 | else: |
|---|
| 1871 | return target_nodes |
|---|
| 1872 | |
|---|
| 1873 | |
|---|
| 1874 | # Alias |
|---|
| 1875 | #: .. currentmodule:: ete2 |
|---|
| 1876 | Tree = TreeNode |
|---|
| 1877 | |
|---|