source: branches/items/GDE/SATIVA/sativa/epac/ete2/tree.py

Last change on this file was 12906, checked in by akozlov, 10 years ago

add sativa files and scripts for ARB integration

File size: 63.9 KB
Line 
1# -*- coding: utf-8 -*-
2# #START_LICENSE###########################################################
3#
4#
5# This file is part of the Environment for Tree Exploration program
6# (ETE).  http://ete.cgenomics.org
7
8# ETE is free software: you can redistribute it and/or modify it
9# under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12
13# ETE is distributed in the hope that it will be useful, but WITHOUT
14# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16# License for more details.
17
18# You should have received a copy of the GNU General Public License
19# along with ETE.  If not, see <http://www.gnu.org/licenses/>.
20#
21#
22#                     ABOUT THE ETE PACKAGE
23#                     =====================
24#
25# ETE is distributed under the GPL copyleft license (2008-2011). 
26#
27# If you make use of ETE in published work, please cite:
28#
29# Jaime Huerta-Cepas, Joaquin Dopazo and Toni Gabaldon.
30# ETE: a python Environment for Tree Exploration. Jaime BMC
31# Bioinformatics 2010,:24doi:10.1186/1471-2105-11-24
32#
33# Note that extra references to the specific methods implemented in
34# the toolkit are available in the documentation.
35#
36# More info at http://ete.cgenomics.org
37#
38#
39# #END_LICENSE#############################################################
40__VERSION__="ete2-2.2rev1026" 
41#START_LICENSE###########################################################
42#
43# Copyright (C) 2009 by Jaime Huerta Cepas. All rights reserved.
44# email: jhcepas@gmail.com
45#
46# This file is part of the Environment for Tree Exploration program (ETE).
47# http://ete.cgenomics.org
48#
49# ETE is free software: you can redistribute it and/or modify it
50# under the terms of the GNU General Public License as published by
51# the Free Software Foundation, either version 3 of the License, or
52# (at your option) any later version.
53#
54# ETE is distributed in the hope that it will be useful,
55# but WITHOUT ANY WARRANTY; without even the implied warranty of
56# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
57# GNU General Public License for more details.
58#
59# You should have received a copy of the GNU General Public License
60# along with ETE.  If not, see <http://www.gnu.org/licenses/>.
61#
62# #END_LICENSE#############################################################
63import os
64import cPickle
65import random
66import copy
67from collections import deque
68import itertools
69from newick import read_newick, write_newick
70
71# the following imports are necessary to set fixed styles and faces
72#try:
73#    from ete2.treeview.main import NodeStyle, _FaceAreas, FaceContainer, FACE_POSITIONS
74#    from ete2.treeview.faces import Face
75#except ImportError:
76#    TREEVIEW = False
77#else:
78#    TREEVIEW = True
79
80__all__ = ["Tree", "TreeNode"]
81
82TREEVIEW = False
83DEFAULT_COMPACT = False
84DEFAULT_SHOWINTERNAL = False
85DEFAULT_DIST = 1.0
86DEFAULT_SUPPORT = 1.0
87DEFAULT_NAME = "NoName"
88
89class TreeError(Exception):
90    """
91    A problem occurred during a TreeNode operation
92    """
93    def __init__(self, value=''):
94        self.value = value
95    def __str__(self):
96        return repr(self.value)
97
98class TreeNode(object):
99    """
100    TreeNode (Tree) class is used to store a tree structure. A tree
101    consists of a collection of TreeNode instances connected in a
102    hierarchical way. Trees can be loaded from the New Hampshire Newick
103    format (newick).
104
105    :argument newick: Path to the file containing the tree or, alternatively,
106       the text string containing the same information.
107
108    :argument 0 format: subnewick format
109
110      .. table::                                               
111
112          ======  ==============================================
113          FORMAT  DESCRIPTION                                   
114          ======  ==============================================
115          0        flexible with support values                 
116          1        flexible with internal node names             
117          2        all branches + leaf names + internal supports
118          3        all branches + all names                     
119          4        leaf branches + leaf names                   
120          5        internal and leaf branches + leaf names       
121          6        internal branches + leaf names               
122          7        leaf branches + all names                     
123          8        all names                                     
124          9        leaf names                                   
125          100      topology only                                 
126          ======  ==============================================
127
128    :returns: a tree node object which represents the base of the tree.
129
130    ** Examples: **
131
132    ::
133
134        t1 = Tree() # creates an empty tree
135        t2 = Tree('(A:1,(B:1,(C:1,D:1):0.5):0.5);')
136        t3 = Tree('/home/user/myNewickFile.txt')
137    """
138
139    def _get_dist(self):
140        return self._dist
141    def _set_dist(self, value):
142        try:
143            self._dist = float(value)
144        except ValueError:
145            raise
146
147    def _get_support(self):
148        return self._support
149    def _set_support(self, value):
150        try:
151            self._support = float(value)
152        except ValueError:
153            raise
154
155    def _get_up(self):
156        return self._up
157    def _set_up(self, value):
158        if type(value) == type(self) or value is None:
159            self._up = value
160        else:
161            raise ValueError("bad node_up type")
162
163    def _get_children(self):
164        return self._children
165    def _set_children(self, value):
166        if type(value) == list and \
167           len(set([type(n)==type(self) for n in value]))<2:
168            self._children = value
169        else:
170            raise ValueError("bad children type")
171
172    def _get_style(self):
173        if self._img_style is None:
174            self._set_style(None)
175           
176        return self._img_style
177
178    def _set_style(self, value):
179        self.set_style(value)
180
181    #: Branch length distance to parent node. Default = 0.0
182    img_style = property(fget=_get_style, fset=_set_style)
183             
184    #: Branch length distance to parent node. Default = 0.0
185    dist = property(fget=_get_dist, fset=_set_dist)
186    #: Branch support for current node
187    support = property(fget=_get_support, fset=_set_support)
188    #: Pointer to parent node
189    up = property(fget=_get_up, fset=_set_up)
190    #: A list of children nodes
191    children = property(fget=_get_children, fset=_set_children)
192
193    def _set_face_areas(self, value):
194        if isinstance(value, _FaceAreas):
195            self._faces = value
196        else:
197            raise ValueError("[%s] is not a valid FaceAreas instance" %type(value))
198       
199    def _get_face_areas(self):
200        if not hasattr(self, "_faces"):
201            self._faces = _FaceAreas()
202        return self._faces
203
204    faces = property(fget=_get_face_areas, \
205                         fset=_set_face_areas)
206
207    def __init__(self, newick=None, format=0, dist=None, support=None,
208                 name=None):
209        self._children = []
210        self._up = None
211        self._dist = DEFAULT_DIST
212        self._support = DEFAULT_SUPPORT
213        self._img_style = None
214        self.features = set([])
215        # Add basic features
216        self.features.update(["dist", "support", "name"])
217        if dist is not None:
218            self.dist = dist
219        if support is not None:
220            self.support = support
221
222        self.name = name if name is not None else DEFAULT_NAME
223       
224        # Initialize tree
225        if newick is not None:
226            read_newick(newick, root_node = self, format=format)
227
228    def __nonzero__(self):
229        return True
230
231    def __repr__(self):
232        return "Tree node '%s' (%s)" %(self.name, hex(self.__hash__()))
233
234    def __and__(self, value):
235        """ This allows to execute tree&'A' to obtain the descendant node
236        whose name is A"""
237        value=str(value)
238        try:
239            first_match = self.iter_search_nodes(name=value).next()
240            return first_match
241        except StopIteration:
242            raise ValueError, "Node not found"
243
244    def __add__(self, value):
245        """ This allows to sum two trees."""
246        # Should a make the sum with two copies of the original trees?
247        if type(value) == self.__class__:
248            new_root = self.__class__()
249            new_root.add_child(self)
250            new_root.add_child(value)
251            return new_root
252        else:
253            raise ValueError, "Invalid node type"
254
255    def __str__(self):
256        """ Print tree in newick format. """
257        return self.get_ascii(compact=DEFAULT_COMPACT, \
258                                show_internal=DEFAULT_SHOWINTERNAL)
259
260    def __contains__(self, item):
261        """ Check if item belongs to this node. The 'item' argument must
262        be a node instance or its associated name."""
263        if isinstance(item, self.__class__):
264            return item in set(self.get_descendants())
265        elif type(item)==str:
266            return item in set([n.name for n in self.traverse()])
267
268    def __len__(self):
269        """Node len returns number of children."""
270        return len(self.get_leaves())
271
272    def __iter__(self):
273        """ Iterator over leaf nodes"""
274        return self.iter_leaves()
275
276    def add_feature(self, pr_name, pr_value):
277        """
278        Add or update a node's feature.
279        """
280        setattr(self, pr_name, pr_value)
281        self.features.add(pr_name)
282
283    def add_features(self, **features):
284        """
285        Add or update several features. """
286        for fname, fvalue in features.iteritems():
287            setattr(self, fname, fvalue)
288            self.features.add(fname)
289
290    def del_feature(self, pr_name):
291        """
292        Permanently deletes a node's feature.
293        """
294        if hasattr(self, pr_name):
295            delattr(self, pr_name)
296            self.features.remove(pr_name)
297
298    # Topology management
299    def add_child(self, child=None, name=None, dist=None, support=None):
300        """
301        Adds a new child to this node. If child node is not suplied
302        as an argument, a new node instance will be created.
303
304        :argument None child: the node instance to be added as a child.
305        :argument None name: the name that will be given to the child.
306        :argument None dist: the distance from the node to the child.
307        :argument None support': the support value of child partition.
308
309        :returns: The child node instance
310
311        """
312        if child is None:
313            child = self.__class__()
314       
315        if name is not None:
316            child.name = name
317        if dist is not None:
318            child.dist = dist
319        if support is not None:
320            child.support = support
321           
322        self.children.append(child)
323        child.up = self
324        return child
325
326    def remove_child(self, child):
327        """
328        Removes a child from this node (parent and child
329        nodes still exit but are no longer connected).
330        """
331        try:
332            self.children.remove(child)
333        except ValueError, e:
334            raise TreeError, e
335        else:
336            child.up = None
337            return child
338
339    def add_sister(self, sister=None, name=None, dist=None):
340        """
341        Adds a sister to this node. If sister node is not supplied
342        as an argument, a new TreeNode instance will be created and
343        returned.
344        """
345        if self.up == None:
346            raise TreeError("A parent node is required to add a sister")
347        else:
348            return self.up.add_child(child=sister, name=name, dist=dist)
349
350    def remove_sister(self, sister=None):
351        """
352        Removes a node's sister node. It has the same effect as
353        **`TreeNode.up.remove_child(sister)`**
354
355        If a sister node is not supplied, the first sister will be deleted
356        and returned.
357
358        :argument sister: A node instance
359
360        :return: The node removed
361        """
362        sisters = self.get_sisters()
363        if len(sisters)>0:
364            if sister==None:
365                sister = sisters.pop(0)
366            return self.up.remove_child(sister)
367
368    def delete(self, prevent_nondicotomic=True, preserve_branch_length=False):
369        """
370        Deletes node from the tree structure. Notice that this method
371        makes 'disappear' the node from the tree structure. This means
372        that children from the deleted node are transferred to the
373        next available parent.
374
375        :param True prevent_nondicotomic: When True (default), delete
376        function will be execute recursively to prevent single-child
377        nodes.
378
379        :param False preserve_branch_length: If True, branch lengths
380        of the deleted nodes are transferred (summed up) to its
381        parent's branch, thus keeping original distances among nodes.
382               
383        **Example:**
384
385        ::
386
387                / C
388          root-|
389               |        / B
390                \--- H |
391                        \ A
392
393          > root.delete(H) will produce this structure:
394
395                / C
396               |
397          root-|--B
398               |
399                \ A
400
401        """
402        parent = self.up
403        if parent:
404            for ch in self.children:
405                if preserve_branch_length:
406                    ch.dist += self.dist
407                parent.add_child(ch)
408
409            parent.remove_child(self)
410
411        # Avoids parents with only one child
412        if prevent_nondicotomic and parent and\
413              len(parent.children)<2:
414            parent.delete(prevent_nondicotomic=False,
415                          preserve_branch_length=preserve_branch_length)
416
417    def detach(self):
418        """
419        Detachs this node (and all its descendants) from its parent
420        and returns the referent to itself.
421
422        Detached node conserves all its structure of descendants, and can
423        be attached to another node through the 'add_child' function. This
424        mechanism can be seen as a cut and paste.
425        """
426
427        if self.up:
428            self.up.children.remove(self)
429            self.up = None
430        return self
431
432    def prune(self, nodes, preserve_branch_length=False):
433        """Prunes the topology of a node in order to conserve only a
434        selected list of leaf or internal nodes. The minimum number of
435        internal nodes (the deepest as possible) are kept to conserve
436        the topological relationship among the provided list of nodes.
437
438        :var nodes: a list of node names or node objects that must be kept
439       
440        :param False preserve_branch_length: If True, branch lengths
441        of the deleted nodes are transferred (summed up) to its
442        parent's branch, thus keeping original distances among nodes.
443       
444        **Examples:**
445
446        ::
447
448          t = Tree("(((A:0.1, B:0.01):0.001, C:0.0001):1.0[&&NHX:name=I], (D:0.00001):0.000001[&&NHX:name=J]):2.0[&&NHX:name=root];")
449          node_C = t.search_nodes(name="C")[0]
450          t.prune(["A","D", node_C])
451          print t
452
453        """
454        def cmp_nodes(x, y):
455            # if several nodes are in the same path of two kept nodes,
456            # only one should be maintained. This prioritize internal
457            # nodes that are already in the to_keep list and then
458            # deeper nodes (closer to the leaves).
459            if x in to_keep:
460                if y not in to_keep:
461                    return -1
462                elif y in to_keep:
463                    return 0
464            elif n2depth[x] > n2depth[y]:
465                return -1
466            elif n2depth[x] < n2depth[y]:
467                return 1
468            else:
469                return 0
470
471        to_keep = set(_translate_nodes(self, *nodes))
472        start, node2path = self.get_common_ancestor(to_keep, get_path=True)
473
474        # Calculate which kept nodes are visiting the same nodes in
475        # their path to the common ancestor.
476        n2count = {}
477        n2depth = {}
478        for seed, path in node2path.iteritems():
479            for visited_node in path: 
480                if visited_node not in n2depth:
481                    depth = visited_node.get_distance(start, topology_only=True)
482                    n2depth[visited_node] = depth
483                if visited_node is not seed:
484                    n2count.setdefault(visited_node, set()).add(seed)
485
486        # if several internal nodes are in the path of exactly the
487        # same kept nodes, only one should be maintain.
488        visitors2nodes = {}
489        for node, visitors in n2count.iteritems():
490            # keep nodes connection at least two other nodes
491            if len(visitors)>1: 
492                visitor_key = frozenset(visitors)
493                visitors2nodes.setdefault(visitor_key, set()).add(node)
494        for visitors, nodes in visitors2nodes.iteritems():
495            s = sorted(nodes, cmp_nodes)
496            to_keep.add(s[0])
497
498        # Detach unvisited branches
499        if start is not self:
500            start.detach()
501            for n in self.get_children(): 
502                n.detach()
503            for n in start.get_children():
504                self.add_child(child=n)
505
506        for n in [self]+self.get_descendants():
507            if n not in to_keep: 
508                n.delete(prevent_nondicotomic=False,
509                         preserve_branch_length=preserve_branch_length)
510
511    def swap_children(self):
512        """
513        Swaps current children order.
514        """
515        if len(self.children)>1:
516            self.children.reverse()
517
518    def get_children(self):
519        """
520        Returns an independent list of node's children.
521        """
522        return [ch for ch in self.children]
523
524    def get_sisters(self):
525        """
526        Returns an indepent list of sister nodes.
527        """
528        if self.up!=None:
529            return [ch for ch in self.up.children if ch!=self]
530        else:
531            return []
532
533    def iter_leaves(self, is_leaf_fn=None):
534        """
535        Returns an iterator over the leaves under this node.
536
537        :argument None is_leaf_fn: See :func:`TreeNode.traverse` for
538          documentation.
539        """
540        for n in self.traverse(strategy="preorder", is_leaf_fn=is_leaf_fn):
541            if not is_leaf_fn:
542                if n.is_leaf():
543                    yield n
544            else:
545                if is_leaf_fn(n):
546                    yield n
547                   
548    def get_leaves(self, is_leaf_fn=None):
549        """
550        Returns the list of terminal nodes (leaves) under this node.
551
552        :argument None is_leaf_fn: See :func:`TreeNode.traverse` for
553          documentation.
554        """
555        return [n for n in self.iter_leaves(is_leaf_fn=is_leaf_fn)]
556
557    def iter_leaf_names(self, is_leaf_fn=None):
558        """
559        Returns an iterator over the leaf names under this node.
560
561        :argument None is_leaf_fn: See :func:`TreeNode.traverse` for
562          documentation.
563        """
564        for n in self.iter_leaves(is_leaf_fn=is_leaf_fn):
565            yield n.name
566
567    def get_leaf_names(self, is_leaf_fn=None):
568        """
569        Returns the list of terminal node names under the current
570        node.
571
572        :argument None is_leaf_fn: See :func:`TreeNode.traverse` for
573          documentation.
574        """
575        return [name for name in self.iter_leaf_names(is_leaf_fn=is_leaf_fn)]
576
577    def iter_descendants(self, strategy="levelorder", is_leaf_fn=None):
578        """
579        Returns an iterator over all descendant nodes.
580
581        :argument None is_leaf_fn: See :func:`TreeNode.traverse` for
582          documentation.
583        """
584        for n in self.traverse(strategy=strategy, is_leaf_fn=is_leaf_fn):
585            if n is not self:
586                yield n
587
588    def get_descendants(self, strategy="levelorder", is_leaf_fn=None):
589        """
590        Returns a list of all (leaves and internal) descendant nodes.
591
592        :argument None is_leaf_fn: See :func:`TreeNode.traverse` for
593          documentation.
594        """
595        return [n for n in self.iter_descendants(strategy=strategy, \
596                                                 is_leaf_fn=is_leaf_fn)]
597
598    def traverse(self, strategy="levelorder", is_leaf_fn=None):
599        """
600        Returns an iterator to traverse the tree structure under this
601        node.
602         
603        :argument "levelorder" strategy: set the way in which tree
604           will be traversed. Possible values are: "preorder" (first
605           parent and then children) 'postorder' (first children and
606           the parent) and "levelorder" (nodes are visited in order
607           from root to leaves)
608
609        :argument None is_leaf_fn: If supplied, ``is_leaf_fn``
610           function will be used to interrogate nodes about if they
611           are terminal or internal. ``is_leaf_fn`` function should
612           receive a node instance as first argument and return True
613           or False. Use this argument to traverse a tree by
614           dynamically collapsing internal nodes matching
615           ``is_leaf_fn``.
616        """
617        if strategy=="preorder":
618            return self._iter_descendants_preorder(is_leaf_fn=is_leaf_fn)
619        elif strategy=="levelorder":
620            return self._iter_descendants_levelorder(is_leaf_fn=is_leaf_fn)
621        elif strategy=="postorder":
622            return self._iter_descendants_postorder(is_leaf_fn=is_leaf_fn)
623           
624    def _iter_descendants_postorder_recursive(self, is_leaf_fn=None):
625        """
626        Iterate over all desdecendant nodes.
627        """
628        if not is_leaf_fn or not is_leaf_fn(self):
629            for ch in self.children:
630                for node in ch._iter_descendants_postorder(is_leaf_fn=is_leaf_fn):
631                    yield node
632        yield self
633   
634    def iter_prepostorder(self, is_leaf_fn=None):
635        """
636        Iterate over all nodes in a tree yielding every node in both
637        pre and post order. Each iteration returns a postorder flag
638        (True if node is being visited in postorder) and a node
639        instance.
640        """
641        to_visit = [self]
642        if is_leaf_fn is not None:
643            _leaf = is_leaf_fn
644        else:
645            _leaf = self.__class__.is_leaf
646
647        while to_visit:
648            node = to_visit.pop(-1)
649            try:
650                node = node[1]
651            except TypeError:
652                # PREORDER ACTIONS
653                yield (False, node)
654                if not _leaf(node):
655                    # ADD CHILDREN
656                    to_visit.extend(reversed(node.children + [[1, node]]))
657            else:
658                #POSTORDER ACTIONS
659                yield (True, node)
660
661    def _iter_descendants_postorder(self, is_leaf_fn=None):
662        to_visit = [self]
663        if is_leaf_fn is not None:
664            _leaf = is_leaf_fn
665        else:
666            _leaf = self.__class__.is_leaf
667
668        while to_visit:
669            node = to_visit.pop(-1)
670            try:
671                node = node[1]
672            except TypeError:
673                # PREORDER ACTIONS
674                if not _leaf(node):
675                    # ADD CHILDREN
676                    to_visit.extend(reversed(node.children + [[1, node]]))
677                else:
678                    yield node
679            else:
680                #POSTORDER ACTIONS
681                yield node
682       
683    def _iter_descendants_levelorder(self, is_leaf_fn=None):
684        """
685        Iterate over all desdecendant nodes.
686        """
687        tovisit = deque([self])
688        while len(tovisit)>0:
689            node = tovisit.popleft()
690            yield node
691            if not is_leaf_fn or not is_leaf_fn(node):
692                tovisit.extend(node.children)
693
694    def _iter_descendants_preorder(self, is_leaf_fn=None):
695        """
696        Iterator over all descendant nodes.
697        """
698        to_visit = deque()
699        node = self
700        while node is not None:
701            yield node
702            if not is_leaf_fn or not is_leaf_fn(node):
703                to_visit.extendleft(reversed(node.children))
704            try:
705                node = to_visit.popleft()
706            except:
707                node = None
708
709    def iter_ancestors(self):
710        '''versionadded: 2.2
711       
712        Iterates over the list of all ancestor nodes from current node
713        to the current tree root.
714
715        '''
716        node = self
717        while node.up is not None:
718            yield node.up
719            node = node.up
720
721    def get_ancestors(self):
722        '''versionadded: 2.2
723
724        Returns the list of all ancestor nodes from current node to
725        the current tree root.
726
727        '''
728        return [n for n in self.iter_ancestors()]
729           
730    def describe(self):
731        """
732        Prints general information about this node and its
733        connections.
734        """
735        if len(self.get_tree_root().children)==2:
736            rooting = "Yes"
737        elif len(self.get_tree_root().children)>2:
738            rooting = "No"
739        else:
740            rooting = "Unknown"
741        max_node, max_dist = self.get_farthest_leaf()
742        cached_content = self.get_cached_content()
743        print "Number of leaf nodes:\t%d" % len(cached_content[self])
744        print "Number of internal nodes:\t%d" % len(cached_content)
745        print "Rooted:\t%s" %rooting
746        print "Most distant node:\t%s" %max_node.name
747        print "Max. distance:\t%f" %max_dist
748       
749    def write(self, features=None, outfile=None, format=0, is_leaf_fn=None,
750              format_root_node=False):
751        """
752        Returns the newick representation of current node. Several
753        arguments control the way in which extra data is shown for
754        every node:
755
756        :argument features: a list of feature names to be exported
757          using the Extended Newick Format (i.e. features=["name",
758          "dist"]). Use an empty list to export all available features
759          in each node (features=[])
760
761        :argument outfile: writes the output to a given file
762
763        :argument format: defines the newick standard used to encode the
764          tree. See tutorial for details.
765
766        :argument False format_root_node: If True, it allows features
767          and branch information from root node to be exported as a
768          part of the newick text string. For newick compatibility
769          reasons, this is False by default.
770
771        :argument is_leaf_fn: See :func:`TreeNode.traverse` for
772          documentation.
773         
774        **Example:**
775
776        ::
777
778             t.get_newick(features=["species","name"], format=1)
779
780        """
781
782        nw = write_newick(self, features = features, format=format,
783                          is_leaf_fn=is_leaf_fn,
784                          format_root_node=format_root_node)
785        if outfile is not None:
786            open(outfile, "w").write(nw)
787        else:
788            return nw
789
790    def get_tree_root(self):
791        """
792        Returns the absolute root node of current tree structure.
793        """
794        root = self
795        while root.up is not None:
796            root = root.up
797        return root
798
799    def get_common_ancestor(self, *target_nodes, **kargs):
800        """
801        Returns the first common ancestor between this node and a given
802        list of 'target_nodes'.
803
804        **Examples:**
805
806        ::
807
808          t = tree.Tree("(((A:0.1, B:0.01):0.001, C:0.0001):1.0[&&NHX:name=common], (D:0.00001):0.000001):2.0[&&NHX:name=root];")
809          A = t.get_descendants_by_name("A")[0]
810          C = t.get_descendants_by_name("C")[0]
811          common =  A.get_common_ancestor(C)
812          print common.name
813
814        """
815       
816        get_path = kargs.get("get_path", False)
817
818        if len(target_nodes) == 1 and type(target_nodes[0]) \
819                in set([set, tuple, list, frozenset]):
820            target_nodes = target_nodes[0]
821
822        # Convert node names into node instances
823        target_nodes = _translate_nodes(self, *target_nodes)
824
825        # If only one node is provided, use self as the second target
826        if type(target_nodes) != list:
827            target_nodes = [target_nodes, self]
828        elif len(target_nodes)==1:
829            target_nodes = tree_nodes.append(self)
830
831        n2path = {}
832        reference = []
833        ref_node = None
834        for n in target_nodes:
835            current = n
836            while current: 
837                n2path.setdefault(n, set()).add(current)
838                if not ref_node:
839                    reference.append(current)
840                current = current.up
841            if not ref_node:
842                ref_node = n
843
844        common = None
845        for n in reference:
846            broken = False
847            for node, path in n2path.iteritems():
848                if node is not ref_node and n not in path:
849                    broken = True
850                    break
851
852            if not broken: 
853                common = n
854                break
855        if not common: 
856            raise ValueError("Nodes are not connected!")
857
858        if get_path:
859            return common, n2path
860        else:
861            return common
862
863    def iter_search_nodes(self, **conditions):
864        """
865        Search nodes in an interative way. Matches are being yield as
866        they are being found. This avoids to scan the full tree
867        topology before returning the first matches. Useful when
868        dealing with huge trees.
869        """
870       
871        for n in self.traverse():
872            conditions_passed = 0
873            for key, value in conditions.iteritems():
874                if hasattr(n, key) and getattr(n, key) == value:
875                    conditions_passed +=1
876            if conditions_passed == len(conditions):
877                yield n
878
879    def search_nodes(self, **conditions):
880        """
881        Returns the list of nodes matching a given set of conditions.
882
883        **Example:**
884
885        ::
886
887          tree.search_nodes(dist=0.0, name="human")
888
889        """
890        matching_nodes = []
891        for n in self.iter_search_nodes(**conditions):
892            matching_nodes.append(n)
893        return matching_nodes
894
895    def get_leaves_by_name(self,name):
896        """
897        Returns a list of leaf nodes matching a given name.
898        """
899        return self.search_nodes(name=name, children=[])
900
901    def is_leaf(self):
902        """
903        Return True if current node is a leaf.
904        """
905        return len(self.children) == 0
906
907    def is_root(self):
908        """
909        Returns True if current node has no parent
910        """
911        if self.up is None:
912            return True
913        else:
914            return False
915
916    # ###########################
917    # Distance related functions
918    # ###########################
919    def get_distance(self, target, target2=None, topology_only=False):
920        """
921        Returns the distance between two nodes. If only one target is
922        specified, it returns the distance bewtween the target and the
923        current node.
924       
925        :argument target: a node within the same tree structure.
926
927        :argument target2: a node within the same tree structure. If
928          not specified, current node is used as target2.
929
930        :argument False topology_only: If set to True, distance will
931          refer to the number of nodes between target and target2.
932
933        :returns: branch length distance between target and
934          target2. If topology_only flag is True, returns the number
935          of nodes between target and target2.
936 
937        """
938
939        if target2 is None:
940            target2 = self
941            root = self.get_tree_root()
942        else:
943            # is target node under current node?
944            root = self
945
946        target, target2 = _translate_nodes(root, target, target2)
947        ancestor = root.get_common_ancestor(target, target2)
948        if ancestor is None:
949            raise TreeError, "Nodes are not connected"
950
951        dist = 0.0
952        for n in [target2, target]:
953            current = n
954            while current != ancestor:
955                if topology_only:
956                    if  current!=target:
957                        dist += 1
958                else:
959                    dist += current.dist
960                current = current.up
961        return dist
962
963    def get_farthest_node(self, topology_only=False):
964        """
965        Returns the node's farthest descendant or ancestor node, and the
966        distance to it.
967
968        :argument False topology_only: If set to True, distance
969          between nodes will be referred to the number of nodes
970          between them. In other words, topological distance will be
971          used instead of branch length distances.
972
973        :return: A tuple containing the farthest node referred to the
974          current node and the distance to it.
975
976        """
977        # Init fasthest node to current farthest leaf
978        farthest_node,farthest_dist = self.get_farthest_leaf(topology_only=topology_only)
979        prev    = self
980        if topology_only:
981            cdist = 0
982        else:
983            cdist = prev.dist
984        current = prev.up
985        while current is not None:
986            for ch in current.children:
987                if ch != prev:
988                    if not ch.is_leaf():
989                        fnode, fdist = ch.get_farthest_leaf(topology_only=topology_only)
990                    else:
991                        fnode = ch
992                        fdist = 0
993                    if topology_only:
994                        fdist += 1.0
995                    else:
996                        fdist += ch.dist
997                    if cdist+fdist > farthest_dist:
998                        farthest_dist = cdist + fdist
999                        farthest_node = fnode
1000            prev = current
1001            if topology_only:
1002                cdist += 1
1003            else:
1004                cdist  += prev.dist
1005            current = prev.up
1006        return farthest_node, farthest_dist
1007
1008    def get_farthest_leaf(self, topology_only=False):
1009        """
1010        Returns node's farthest descendant node (which is always a leaf), and the
1011        distance to it.
1012
1013        :argument False topology_only: If set to True, distance
1014          between nodes will be referred to the number of nodes
1015          between them. In other words, topological distance will be
1016          used instead of branch length distances.
1017
1018        :return: A tuple containing the farthest leaf referred to the
1019          current node and the distance to it.
1020        """
1021        max_dist = 0.0
1022        max_node = None
1023        if self.is_leaf():
1024            return self, 0.0
1025        else:
1026            for ch in self.children:
1027                node, d = ch.get_farthest_leaf(topology_only=topology_only)
1028                if topology_only:
1029                    d += 1.0
1030                else:
1031                    d += ch.dist
1032                if d>=max_dist:
1033                    max_dist = d
1034                    max_node = node
1035            return max_node, max_dist
1036
1037    def get_closest_leaf(self, topology_only=False):
1038        """Returns node's closest descendant leaf and the distance to
1039        it.
1040
1041        :argument False topology_only: If set to True, distance
1042          between nodes will be referred to the number of nodes
1043          between them. In other words, topological distance will be
1044          used instead of branch length distances.
1045
1046        :return: A tuple containing the closest leaf referred to the
1047          current node and the distance to it.
1048
1049        """
1050        min_dist = None
1051        min_node = None
1052        if self.is_leaf():
1053            return self, 0.0
1054        else:
1055            for ch in self.children:
1056                node, d = ch.get_closest_leaf(topology_only=topology_only)
1057                if topology_only:
1058                    d += 1.0
1059                else:
1060                    d += ch.dist
1061                if min_dist is None or d<min_dist:
1062                    min_dist = d
1063                    min_node = node
1064            return min_node, min_dist
1065
1066
1067
1068    def get_midpoint_outgroup(self):
1069        """
1070        Returns the node that divides the current tree into two distance-balanced
1071        partitions.
1072        """
1073        # Gets the farthest node to the current root
1074        root = self.get_tree_root()
1075        nA , r2A_dist = root.get_farthest_leaf()
1076        nB , A2B_dist = nA.get_farthest_node()
1077
1078        outgroup = nA
1079        middist  = A2B_dist / 2.0
1080        cdist = 0
1081        current = nA
1082        while current is not None:
1083            cdist += current.dist
1084            if cdist > (middist): # Deja de subir cuando se pasa del maximo
1085                break
1086            else:
1087                current = current.up
1088        return current
1089
1090    def populate(self, size, names_library=None, reuse_names=False,
1091                 random_branches=False, branch_range=(0,1),
1092                 support_range=(0,1)): 
1093        """
1094        Generates a random topology by populating current node.
1095
1096        :argument None names_library: If provided, names library
1097          (list, set, dict, etc.) will be used to name nodes.
1098
1099        :argument False reuse_names: If True, node names will not be
1100          necessarily unique, which makes the process a bit more
1101          efficient.
1102
1103        :argument False random_branches: If True, branch distances and support
1104          values will be randomized.
1105       
1106        :argument (0,1) branch_range: If random_branches is True, this
1107        range of values will be used to generate random distances.
1108
1109        :argument (0,1) support_range: If random_branches is True,
1110        this range of values will be used to generate random branch
1111        support values.
1112
1113        """
1114        NewNode = self.__class__
1115
1116        if len(self.children) > 1: 
1117            connector = NewNode()
1118            for ch in self.get_children():
1119                ch.detach()
1120                connector.add_child(child = ch)
1121            root = NewNode()
1122            self.add_child(child = connector)
1123            self.add_child(child = root)
1124        else:
1125            root = self
1126
1127        next = deque([root])
1128        for i in xrange(size-1):
1129            if random.randint(0, 1):
1130                p = next.pop()
1131            else:
1132                p = next.popleft()
1133
1134            c1 = p.add_child()
1135            c2 = p.add_child()
1136            next.extend([c1, c2])
1137            if random_branches:
1138                c1.dist = random.uniform(*branch_range)
1139                c2.dist = random.uniform(*branch_range)
1140                c1.support = random.uniform(*branch_range)
1141                c2.support = random.uniform(*branch_range)
1142            else:
1143                c1.dist = 1.0
1144                c2.dist = 1.0
1145                c1.support = 1.0
1146                c2.support = 1.0
1147
1148        # next contains leaf nodes
1149        charset =  "abcdefghijklmnopqrstuvwxyz"
1150        if names_library:
1151            names_library = deque(names_library)
1152        else:
1153            avail_names = itertools.combinations_with_replacement(charset, 10)
1154        for n in next:
1155            if names_library:
1156                if reuse_names: 
1157                    tname = random.sample(names_library, 1)[0]
1158                else:
1159                    tname = names_library.pop()
1160            else:
1161                tname = ''.join(avail_names.next())
1162            n.name = tname
1163
1164    def set_outgroup(self, outgroup):
1165        """
1166        Sets a descendant node as the outgroup of a tree.  This function
1167        can be used to root a tree or even an internal node.
1168
1169        :argument outgroup: a node instance within the same tree
1170          structure that will be used as a basal node.
1171
1172        """
1173
1174        outgroup = _translate_nodes(self, outgroup)
1175
1176        if self == outgroup:
1177            raise ValueError, "Cannot set myself as outgroup"
1178
1179        parent_outgroup = outgroup.up
1180
1181        # Detects (sub)tree root
1182        n = outgroup
1183        while n.up is not self:
1184            n = n.up
1185
1186        # If outgroup is a child from root, but with more than one
1187        # sister nodes, creates a new node to group them
1188
1189        self.children.remove(n)
1190        if len(self.children) != 1:
1191            down_branch_connector = self.__class__()
1192            down_branch_connector.dist = 0.0
1193            down_branch_connector.support = n.support
1194            for ch in self.get_children():
1195                down_branch_connector.children.append(ch)
1196                ch.up = down_branch_connector
1197                self.children.remove(ch)
1198        else:
1199            down_branch_connector = self.children[0]
1200
1201        # Connects down branch to myself or to outgroup
1202        quien_va_ser_padre = parent_outgroup
1203        if quien_va_ser_padre is not self:
1204            # Parent-child swapping
1205            quien_va_ser_hijo = quien_va_ser_padre.up
1206            quien_fue_padre = None
1207            buffered_dist = quien_va_ser_padre.dist
1208            buffered_support = quien_va_ser_padre.support
1209           
1210            while quien_va_ser_hijo is not self:
1211                quien_va_ser_padre.children.append(quien_va_ser_hijo)
1212                quien_va_ser_hijo.children.remove(quien_va_ser_padre)
1213
1214                buffered_dist2 = quien_va_ser_hijo.dist
1215                buffered_support2 = quien_va_ser_hijo.support
1216                quien_va_ser_hijo.dist = buffered_dist
1217                quien_va_ser_hijo.support = buffered_support
1218                buffered_dist = buffered_dist2
1219                buffered_support = buffered_support2
1220
1221                quien_va_ser_padre.up = quien_fue_padre
1222                quien_fue_padre = quien_va_ser_padre
1223
1224                quien_va_ser_padre = quien_va_ser_hijo
1225                quien_va_ser_hijo = quien_va_ser_padre.up
1226
1227            quien_va_ser_padre.children.append(down_branch_connector)
1228            down_branch_connector.up = quien_va_ser_padre
1229            quien_va_ser_padre.up = quien_fue_padre
1230
1231            down_branch_connector.dist += buffered_dist
1232            outgroup2 = parent_outgroup
1233            parent_outgroup.children.remove(outgroup)
1234            outgroup2.dist = 0
1235           
1236        else:
1237            outgroup2 = down_branch_connector
1238
1239        outgroup.up = self
1240        outgroup2.up = self
1241        # outgroup is always the first children. Some function my
1242        # trust on this fact, so do no change this.
1243        self.children = [outgroup,outgroup2]
1244        middist = (outgroup2.dist + outgroup.dist)/2
1245        outgroup.dist = middist
1246        outgroup2.dist = middist
1247        outgroup2.support = outgroup.support
1248
1249    def unroot(self):
1250        """
1251        Unroots current node. This function is expected to be used on
1252        the absolute tree root node, but it can be also be applied to
1253        any other internal node. It will convert a split into a
1254        multifurcation.
1255        """
1256        # if is rooted
1257        if not self.is_root():
1258            print >>sys.stderr, "Warning. You are unrooting an internal node.!!"
1259        if len(self.children)==2:
1260            if not self.children[0].is_leaf():
1261                self.children[0].delete()
1262            elif not self.children[1].is_leaf():
1263                self.children[1].delete()
1264            else:
1265                raise TreeError, "Cannot unroot a tree with only two leaves"
1266
1267    def show(self, layout=None, tree_style=None, name="ETE"):
1268        """
1269        Starts an interative session to visualize current node
1270        structure using provided layout and TreeStyle.
1271
1272        """
1273        try:
1274            from ete2.treeview import drawer
1275        except ImportError, e:
1276            print "'treeview' module could not be loaded.\n",e
1277            print "\n\n"
1278            print e
1279        else:
1280            drawer.show_tree(self, layout=layout,
1281                             tree_style=tree_style, win_name=name)
1282
1283    def render(self, file_name, layout=None, w=None, h=None, \
1284                       tree_style=None, units="px", dpi=90):
1285        """
1286        Renders the node structure as an image.
1287
1288        :var file_name: path to the output image file. valid
1289          extensions are .SVG, .PDF, .PNG
1290 
1291        :var layout: a layout function or a valid layout function name
1292
1293        :var tree_style: a `TreeStyle` instance containing the image
1294          properties
1295
1296        :var px units: "px": pixels, "mm": millimeters, "in": inches
1297        :var None h: height of the image in :attr:`units`       
1298        :var None w: weight of the image in :attr:`units`       
1299        :var 300 dpi: dots per inches.
1300
1301        """
1302
1303        try:
1304            from ete2.treeview import drawer
1305        except ImportError, e:
1306            print "'treeview' module could not be loaded.\n",e
1307            print "\n\n"
1308            print e
1309        else:
1310            return drawer.render_tree(self, file_name, w=w, h=h, 
1311                                      layout=layout, tree_style=tree_style, 
1312                                      units=units, dpi=dpi)
1313
1314    def copy(self, method="cpickle"):
1315        """.. versionadded: 2.1
1316
1317        Returns a copy of the current node.
1318
1319        :var cpickle method: Protocol used to copy the node
1320        structure. The following values are accepted:
1321
1322           - "newick": Tree topology, node names, branch lengths and
1323             branch support values will be copied by as represented in
1324             the newick string (copy by newick string serialisation).
1325       
1326           - "newick-extended": Tree topology and all node features
1327             will be copied based on the extended newick format
1328             representation. Only node features will be copied, thus
1329             excluding other node attributes. As this method is also
1330             based on newick serialisation, features will be converted
1331             into text strings when making the copy.
1332
1333           - "cpickle": The whole node structure and its content is
1334             cloned based on cPickle object serialisation (slower, but
1335             recommended for full tree copying)
1336       
1337           - "deepcopy": The whole node structure and its content is
1338             copied based on the standard "copy" Python functionality
1339             (this is the slowest method but it allows to copy complex
1340             objects even if attributes point to lambda functions,
1341             etc.)
1342
1343        """
1344        if method=="newick":
1345            new_node = self.__class__(self.write(features=["name"], format_root_node=True))
1346        elif method=="newick-extended":
1347            self.write(features=[], format_root_node=True)
1348            new_node = self.__class__(self.write(features=[]))
1349        elif method == "deepcopy":
1350            parent = self.up
1351            self.up = None
1352            new_node = copy.deepcopy(self)
1353            self.up = parent
1354        elif method == "cpickle":
1355            parent = self.up
1356            self.up = None
1357            new_node = cPickle.loads(cPickle.dumps(self, 2))
1358            self.up = parent
1359        else:
1360            raise ValuerError("Invalid copy method")
1361           
1362        return new_node
1363
1364    def _asciiArt(self, char1='-', show_internal=True, compact=False, attributes=None):
1365        """
1366        Returns the ASCII representation of the tree.
1367
1368        Code based on the PyCogent GPL project.
1369        """
1370        if not attributes:
1371            attributes = ["name"]
1372        node_name = ', '.join(map(str, [getattr(self, v) for v in attributes if hasattr(self, v)]))
1373       
1374        LEN = max(3, len(node_name) if not self.children or show_internal else 3)
1375        PAD = ' ' * LEN
1376        PA = ' ' * (LEN-1)
1377        if not self.is_leaf():
1378            mids = []
1379            result = []
1380            for c in self.children:
1381                if len(self.children) == 1:
1382                    char2 = '/'
1383                elif c is self.children[0]:
1384                    char2 = '/'
1385                elif c is self.children[-1]:
1386                    char2 = '\\'
1387                else:
1388                    char2 = '-'
1389                (clines, mid) = c._asciiArt(char2, show_internal, compact, attributes)
1390                mids.append(mid+len(result))
1391                result.extend(clines)
1392                if not compact:
1393                    result.append('')
1394            if not compact:
1395                result.pop()
1396            (lo, hi, end) = (mids[0], mids[-1], len(result))
1397            prefixes = [PAD] * (lo+1) + [PA+'|'] * (hi-lo-1) + [PAD] * (end-hi)
1398            mid = (lo + hi) / 2
1399            prefixes[mid] = char1 + '-'*(LEN-2) + prefixes[mid][-1]
1400            result = [p+l for (p,l) in zip(prefixes, result)]
1401            if show_internal:
1402                stem = result[mid]
1403                result[mid] = stem[0] + node_name + stem[len(node_name)+1:]
1404            return (result, mid)
1405        else:
1406            return ([char1 + '-' + node_name], 0)
1407
1408    def get_ascii(self, show_internal=True, compact=False, attributes=None):
1409        """
1410        Returns a string containing an ascii drawing of the tree.
1411
1412        :argument show_internal: includes internal edge names.
1413        :argument compact: use exactly one line per tip.
1414
1415        :param attributes: A list of node attributes to shown in the
1416            ASCII representation.
1417       
1418        """
1419        (lines, mid) = self._asciiArt(show_internal=show_internal,
1420                                      compact=compact, attributes=attributes)
1421        return '\n'+'\n'.join(lines)
1422
1423    def ladderize(self, direction=0):
1424        """
1425        .. versionadded: 2.1
1426
1427        Sort the branches of a given tree (swapping children nodes)
1428        according to the size of each partition.
1429
1430        ::
1431
1432           t =  Tree("(f,((d, ((a,b),c)),e));")
1433
1434           print t
1435
1436           #           
1437           #      /-f
1438           #     |
1439           #     |          /-d
1440           # ----|         |
1441           #     |     /---|          /-a
1442           #     |    |    |     /---|
1443           #     |    |     \---|     \-b
1444           #      \---|         |
1445           #          |          \-c
1446           #          |
1447           #           \-e
1448
1449           t.ladderize()
1450           print t
1451
1452           #      /-f
1453           # ----|
1454           #     |     /-e
1455           #      \---|
1456           #          |     /-d
1457           #           \---|
1458           #               |     /-c
1459           #                \---|
1460           #                    |     /-a
1461           #                     \---|
1462           #                          \-b
1463
1464        """
1465       
1466        if not self.is_leaf():
1467            n2s = {}
1468            for n in self.get_children():
1469                s = n.ladderize(direction=direction)
1470                n2s[n] = s
1471
1472            self.children.sort(lambda x,y: cmp(n2s[x], n2s[y]))
1473            if direction == 1:
1474                self.children.reverse()
1475            size = sum(n2s.values())
1476        else:
1477            size = 1
1478
1479        return size
1480
1481    def sort_descendants(self):
1482        """
1483        .. versionadded: 2.1
1484
1485        This function sort the branches of a given tree by
1486        considerening node names. After the tree is sorted, nodes are
1487        labeled using ascendent numbers.  This can be used to ensure
1488        that nodes in a tree with the same node names are always
1489        labeled in the same way. Note that if duplicated names are
1490        present, extra criteria should be added to sort nodes.
1491
1492        Unique id is stored as a node._nid attribute
1493       
1494        """
1495
1496        node2content = self.get_cached_content()
1497        def sort_by_content(x, y):
1498            return cmp(str(sorted([i.name for i in node2content[x]])),
1499                       str(sorted([i.name for i in node2content[y]])))
1500
1501        for n in self.traverse():
1502            if not n.is_leaf():
1503                n.children.sort(sort_by_content)
1504        return node2content
1505
1506    def get_cached_content(self, store_attr=None,  _store=None):
1507        """
1508        .. versionadded: 2.2
1509       
1510        Returns a dictionary pointing to the preloaded content of each
1511        internal node under this tree. Such a dictionary is intended
1512        to work as a cache for operations that require many traversal
1513        operations.
1514
1515        :param None store_attr: Specifies the node attribute that
1516        should be cached (i.e. name, distance, etc.). When none, the
1517        whole node instance is cached.
1518
1519        :param _store: (internal use)
1520
1521        """
1522        if _store is None:
1523            _store = {}
1524           
1525        for ch in self.children:
1526            ch.get_cached_content(store_attr=store_attr, _store=_store)
1527
1528        if self.children:
1529            val = set()
1530            for ch in self.children:
1531                val.update(_store[ch])
1532            _store[self] = val
1533        else:
1534            if store_attr is None:
1535                val = self
1536            else:
1537                val = getattr(self, store_attr)
1538            _store[self] = set([val])
1539        return _store
1540
1541    def robinson_foulds(self, t2, attr_t1="name", attr_t2="name"):
1542        """
1543        .. versionadded: 2.2
1544       
1545        Returns the Robinson-Foulds symmetric distance between current
1546        tree and a different tree instance.
1547     
1548        :param t2: target tree
1549       
1550        :param name attr_t1: Compare trees using a custom node
1551                              attribute as a node name.
1552       
1553        :param name attr_t2: Compare trees using a custom node
1554                              attribute as a node name in target tree.
1555
1556        :returns: (symmetric distance, total partitions, common node
1557         names, partitions in current tree, partitions in target tree)
1558           
1559        """
1560       
1561        t1 = self
1562        t1content = t1.get_cached_content()
1563        t2content = t2.get_cached_content()
1564        target_names = set([getattr(_n, attr_t1) for _n in t1content[t1]])
1565        ref_names = set([getattr(_n, attr_t2) for _n in t2content[t2]])
1566        common_names = target_names & ref_names
1567        if len(common_names) < 2:
1568            raise ValueError("Trees share less than 2 nodes")
1569
1570        r1 = set([",".join(sorted([getattr(_c, attr_t1) for _c in cont
1571                                   if getattr(_c, attr_t1) in common_names]))
1572                  for cont in t1content.values() if len(cont)>1])
1573        r2 = set([",".join(sorted([getattr(_c, attr_t2) for _c in cont
1574                                   if getattr(_c, attr_t2) in common_names]))
1575                  for cont in t2content.values() if len(cont)>1])
1576        r1.discard("")
1577        r2.discard("")             
1578        inters = r1.intersection(r2)
1579        if len(r1) == len(r2):
1580                rf = (len(r1) - len(inters)) * 2
1581        else :
1582                rf = (len(r1) - len(inters)) + (len(r2) - len(inters))
1583        max_parts = len(r1) + len(r2)
1584        return rf, max_parts, common_names, r1, r2
1585
1586    def get_partitions(self):
1587        """
1588        .. versionadded: 2.1
1589       
1590        It returns the set of all possible partitions under a
1591        node. Note that current implementation is quite inefficient
1592        when used in very large trees.
1593
1594        t = Tree("((a, b), e);")
1595        partitions = t.get_partitions()
1596
1597        # Will return:
1598        # a,b,e
1599        # a,e
1600        # b,e
1601        # a,b
1602        # e
1603        # b
1604        # a
1605        """
1606        all_leaves = frozenset(self.get_leaf_names())
1607        all_partitions = set([all_leaves])
1608        for n in self.iter_descendants():
1609            p1 = frozenset(n.get_leaf_names())
1610            p2 = frozenset(all_leaves - p1)
1611            all_partitions.add(p1)
1612            all_partitions.add(p2)
1613        return all_partitions
1614
1615    def convert_to_ultrametric(self, tree_length, strategy="balanced"):
1616        """
1617        .. versionadded: 2.1
1618
1619        Converts a tree to ultrametric topology (all leaves must have
1620        the same distance to root). Note that, for visual inspection
1621        of ultrametric trees, node.img_style["size"] should be set to
1622        0.
1623        """
1624
1625        # pre-calculate how many splits remain under each node
1626        node2max_depth = {}
1627        for node in self.traverse("postorder"):
1628            if not node.is_leaf():
1629                max_depth = max([node2max_depth[c] for c in node.children]) + 1
1630                node2max_depth[node] = max_depth
1631            else:
1632                node2max_depth[node] = 1
1633        node2dist = {self: 0.0}
1634        tree_length = float(tree_length)
1635        step = tree_length / node2max_depth[self]
1636        for node in self.iter_descendants("levelorder"):
1637            if strategy == "balanced":
1638                node.dist = (tree_length - node2dist[node.up]) / node2max_depth[node]
1639                node2dist[node] =  node.dist + node2dist[node.up]
1640            elif strategy == "fixed":
1641                if not node.is_leaf():
1642                    node.dist = step
1643                else:
1644                    node.dist = tree_length - ((node2dist[node.up]) * step)
1645                node2dist[node] = node2dist[node.up] + 1
1646            node.dist = node.dist
1647
1648    def check_monophyly(self, values, target_attr, ignore_missing=False):
1649        """
1650        Returns True if a given target attribute is monophyletic under
1651        this node for the provided set of values.
1652
1653        If not all values are represented in the current tree
1654        structure, a ValueError exception will be raised to warn that
1655        strict monophyly could never be reached (this behaviour can be
1656        avoided by enabling the `ignore_missing` flag.
1657
1658        :param values: a set of values for which monophyly is
1659            expected.
1660           
1661        :param target_attr: node attribute being used to check
1662            monophyly (i.e. species for species trees, names for gene
1663            family trees, or any custom feature present in the tree).
1664
1665        :param False ignore_missing: Avoid raising an Exception when
1666            missing attributes are found.
1667        """
1668        if type(values) != set:
1669            values = set(values)
1670
1671        # This is the only time I traverse the tree, then I use cached
1672        # leaf content
1673        n2leaves = self.get_cached_content()
1674        # Locate leaves matching requested attribute values
1675        targets = [leaf for leaf in n2leaves[self]
1676                   if getattr(leaf, target_attr) in values]
1677       
1678        # Raise an error if requested attribute values are not even present
1679        if not ignore_missing:
1680            missing_values = values - set([getattr(n, target_attr) for n
1681                                           in targets])
1682            if missing_values: 
1683                raise ValueError("Expected '%s' value(s) not found: %s" %(
1684                    target_attr, ','.join(missing_values)))
1685               
1686        # Check monophyly with get_common_ancestor. Note that this
1687        # step does not require traversing the tree again because
1688        # targets are node instances instead of node names, and
1689        # get_common_ancestor function is smart enough to detect it
1690        # and avoid unnecessary traversing.
1691        common = self.get_common_ancestor(targets)
1692        observed = n2leaves[common]
1693        foreign_leaves = [leaf for leaf in observed
1694                          if getattr(leaf, target_attr) not in values]
1695        if not foreign_leaves:
1696            return True, "monophyletic"
1697        else:
1698            # if the requested attribute is not monophyletic in this
1699            # node, let's differentiate between poly and paraphyly.
1700            poly_common = self.get_common_ancestor(foreign_leaves)
1701            # if the common ancestor of all foreign leaves is self
1702            # contained, we have a paraphyly. Otherwise, polyphyly.
1703            polyphyletic = [leaf for leaf in poly_common if
1704                            getattr(leaf, target_attr) in values]
1705            if polyphyletic:
1706                return False, "polyphyletic"
1707            else:
1708                return False, "paraphyletic"
1709
1710    def get_monophyletic(self, values, target_attr):
1711        """
1712        .. versionadded:: 2.2
1713
1714        Returns a list of nodes matching the provided monophyly
1715        criteria. For a node to be considered a match, all
1716        `target_attr` values within and node, and exclusively them,
1717        should be grouped.
1718       
1719        :param values: a set of values for which monophyly is
1720            expected.
1721           
1722        :param target_attr: node attribute being used to check
1723            monophyly (i.e. species for species trees, names for gene
1724            family trees).
1725           
1726        """
1727
1728        if type(values) != set:
1729            values = set(values)
1730
1731        n2values = self.get_cached_content(store_attr=target_attr)
1732     
1733        is_monophyletic = lambda node: n2values[node] == values
1734        for match in self.iter_leaves(is_leaf_fn=is_monophyletic):
1735            if is_monophyletic(match):
1736                yield match
1737
1738    def resolve_polytomy(self, default_dist=0.0, default_support=0.0,
1739                         recursive=True):
1740        """
1741        .. versionadded: 2.2
1742       
1743        Resolve all polytomies under current node by creating an
1744        arbitrary dicotomic structure among the affected nodes. This
1745        function randomly modifies current tree topology and should
1746        only be used for compatibility reasons (i.e. programs
1747        rejecting multifurcated node in the newick representation).
1748
1749        :param 0.0 default_dist: artificial branch distance of new
1750            nodes.
1751                                 
1752        :param 0.0 default_support: artificial branch support of new
1753            nodes.
1754                                   
1755        :param True recursive: Resolve any polytomy under this
1756             node. When False, only current node will be checked and fixed.
1757        """
1758       
1759       
1760        def _resolve(node):
1761            if len(node.children) > 2:
1762                children = list(node.children)
1763                node.children = []
1764                next_node = root = node
1765                for i in xrange(len(children)-2):
1766                    next_node = next_node.add_child()
1767                    next_node.dist = default_dist
1768                    next_node.support = default_support
1769
1770                next_node = root
1771                for ch in children:
1772                    next_node.add_child(ch)
1773                    if ch != children[-2]:
1774                        next_node = next_node.children[0]
1775        target = [self]
1776        if recursive: 
1777            target.extend([n for n in self.get_descendants()])
1778        for n in target:
1779            _resolve(n)
1780
1781    def add_face(self, face, column, position="branch-right"):
1782        """
1783        .. versionadded: 2.1
1784
1785        Add a fixed face to the node.  This type of faces will be
1786        always attached to nodes, independently of the layout
1787        function.
1788
1789        :argument face: a Face or inherited instance
1790        :argument column: An integer number starting from 0
1791        :argument "branch-right" position: Posible values are:
1792          "branch-right", "branch-top", "branch-bottom", "float",
1793          "aligned"
1794        """ 
1795
1796        if not hasattr(self, "_faces"):
1797            self._faces = _FaceAreas()
1798       
1799        if position not in FACE_POSITIONS:
1800            raise ValueError("face position not in %s" %FACE_POSITIONS)
1801       
1802        if isinstance(face, Face):
1803            getattr(self._faces, position).add_face(face, column=column)
1804        else:
1805            raise ValueError("not a Face instance")
1806
1807    def set_style(self, node_style):
1808        """
1809        .. versionadded: 2.1
1810
1811        Set 'node_style' as the fixed style for the current node.
1812        """
1813        if TREEVIEW:
1814            if node_style is None:
1815                node_style = NodeStyle()
1816            if type(node_style) is NodeStyle:
1817                self._img_style = node_style
1818        else:
1819            raise ValueError("Treeview module is disabled")
1820
1821    def phonehome(self):
1822        from ete2 import _ph
1823        _ph.call()
1824
1825def _translate_nodes(root, *nodes):
1826    name2node = dict([ [n, None] for n in nodes if type(n) is str])
1827    for n in root.traverse():
1828        if n.name in name2node:
1829            if name2node[n.name] is not None:
1830                raise ValueError, "Ambiguous node name: "+str(n.name)
1831            else:
1832                name2node[n.name] = n
1833
1834    if None in name2node.values():
1835        notfound = [key for key, value in name2node.iteritems() if value is None]
1836        raise ValueError("Node names not found: "+str(notfound))
1837
1838    valid_nodes = []
1839    for n in nodes: 
1840        if type(n) is not str:
1841            if type(n) is not root.__class__ :
1842                raise ValueError, "Invalid target node: "+str(n)
1843            else:
1844                valid_nodes.append(n)
1845           
1846    valid_nodes.extend(name2node.values())
1847    if len(valid_nodes) == 1:
1848        return valid_nodes[0]
1849    else:
1850        return valid_nodes
1851
1852def OLD_translate_nodes(root, *nodes):
1853    target_nodes = []
1854    for n in nodes:
1855        if type(n) is str:
1856            mnodes = root.search_nodes(name=n)
1857            if len(mnodes) == 0:
1858                raise ValueError, "Node name not found: "+str(n)
1859            elif len(mnodes)>1:
1860                raise ValueError, "Ambiguous node name: "+str(n)
1861            else:
1862                target_nodes.append(mnodes[0])
1863        elif type(n) != root.__class__:
1864            raise ValueError, "Invalid target node: "+str(n)
1865        else:
1866            target_nodes.append(n)
1867     
1868    if len(target_nodes) == 1:
1869        return target_nodes[0]
1870    else:
1871        return target_nodes
1872
1873
1874# Alias
1875#: .. currentmodule:: ete2
1876Tree = TreeNode
1877
Note: See TracBrowser for help on using the repository browser.