| 1 | __VERSION__="ete2-2.2rev1026" | 
|---|
| 2 | # -*- coding: utf-8 -*- | 
|---|
| 3 | # #START_LICENSE########################################################### | 
|---|
| 4 | # | 
|---|
| 5 | # | 
|---|
| 6 | # This file is part of the Environment for Tree Exploration program | 
|---|
| 7 | # (ETE).  http://ete.cgenomics.org | 
|---|
| 8 | # | 
|---|
| 9 | # ETE is free software: you can redistribute it and/or modify it | 
|---|
| 10 | # under the terms of the GNU General Public License as published by | 
|---|
| 11 | # the Free Software Foundation, either version 3 of the License, or | 
|---|
| 12 | # (at your option) any later version. | 
|---|
| 13 | # | 
|---|
| 14 | # ETE is distributed in the hope that it will be useful, but WITHOUT | 
|---|
| 15 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | 
|---|
| 16 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public | 
|---|
| 17 | # License for more details. | 
|---|
| 18 | # | 
|---|
| 19 | # You should have received a copy of the GNU General Public License | 
|---|
| 20 | # along with ETE.  If not, see <http://www.gnu.org/licenses/>. | 
|---|
| 21 | # | 
|---|
| 22 | # | 
|---|
| 23 | #                     ABOUT THE ETE PACKAGE | 
|---|
| 24 | #                     ===================== | 
|---|
| 25 | # | 
|---|
| 26 | # ETE is distributed under the GPL copyleft license (2008-2011). | 
|---|
| 27 | # | 
|---|
| 28 | # If you make use of ETE in published work, please cite: | 
|---|
| 29 | # | 
|---|
| 30 | # Jaime Huerta-Cepas, Joaquin Dopazo and Toni Gabaldon. | 
|---|
| 31 | # ETE: a python Environment for Tree Exploration. Jaime BMC | 
|---|
| 32 | # Bioinformatics 2010,:24doi:10.1186/1471-2105-11-24 | 
|---|
| 33 | # | 
|---|
| 34 | # Note that extra references to the specific methods implemented in | 
|---|
| 35 | # the toolkit are available in the documentation. | 
|---|
| 36 | # | 
|---|
| 37 | # More info at http://ete.cgenomics.org | 
|---|
| 38 | # | 
|---|
| 39 | # | 
|---|
| 40 | # #END_LICENSE############################################################# | 
|---|
| 41 |  | 
|---|
| 42 | import re | 
|---|
| 43 | import os | 
|---|
| 44 | import base64 | 
|---|
| 45 |  | 
|---|
| 46 | __all__ = ["read_newick", "write_newick", "print_supported_formats"] | 
|---|
| 47 |  | 
|---|
| 48 | # Regular expressions used for reading newick format | 
|---|
| 49 | _ILEGAL_NEWICK_CHARS = ":;(),\[\]\t\n\r=" | 
|---|
| 50 | _NON_PRINTABLE_CHARS_RE = "[\x00-\x1f]+" | 
|---|
| 51 |  | 
|---|
| 52 | _NHX_RE = "\[&&NHX:[^\]]*\]" | 
|---|
| 53 | _FLOAT_RE = "[+-]?\d+\.?\d*(?:[eE][-+]\d+)?" | 
|---|
| 54 | #_FLOAT_RE = "[+-]?\d+\.?\d*" | 
|---|
| 55 | _NAME_RE = "[^():,;\[\]]+" | 
|---|
| 56 |  | 
|---|
| 57 | DEFAULT_DIST = 1.0 | 
|---|
| 58 | DEFAULT_NAME = '' | 
|---|
| 59 | DEFAULT_SUPPORT = 1.0 | 
|---|
| 60 |  | 
|---|
| 61 |  | 
|---|
| 62 | # Allowed formats. This table is used to read and write newick using | 
|---|
| 63 | # different convenctions. You can also add your own formats in an easy way. | 
|---|
| 64 | # | 
|---|
| 65 | # | 
|---|
| 66 | # FORMAT: [[LeafAttr1, LeafAttr1Type, Strict?], [LeafAttr2, LeafAttr2Type, Strict?],\ | 
|---|
| 67 | #    [InternalAttr1, InternalAttr1Type, Strict?], [InternalAttr2, InternalAttr2Type, Strict?]] | 
|---|
| 68 | # | 
|---|
| 69 | # Attributes are placed in the newick as follows: | 
|---|
| 70 | # | 
|---|
| 71 | # .... ,LeafAttr1:LeafAttr2)InternalAttr1:InternalAttr2 ... | 
|---|
| 72 | # | 
|---|
| 73 | # | 
|---|
| 74 | #           /-A | 
|---|
| 75 | # -NoName--| | 
|---|
| 76 | #          |          /-B | 
|---|
| 77 | #           \C-------| | 
|---|
| 78 | #                    |          /-D | 
|---|
| 79 | #                     \E-------| | 
|---|
| 80 | #                               \-G | 
|---|
| 81 | # | 
|---|
| 82 | # Format 0 = (A:0.350596,(B:0.728431,(D:0.609498,G:0.125729)1.000000:0.642905)1.000000:0.567737); | 
|---|
| 83 | # Format 1 = (A:0.350596,(B:0.728431,(D:0.609498,G:0.125729)E:0.642905)C:0.567737); | 
|---|
| 84 | # Format 2 = (A:0.350596,(B:0.728431,(D:0.609498,G:0.125729)1.000000:0.642905)1.000000:0.567737); | 
|---|
| 85 | # Format 3 = (A:0.350596,(B:0.728431,(D:0.609498,G:0.125729)E:0.642905)C:0.567737); | 
|---|
| 86 | # Format 4 = (A:0.350596,(B:0.728431,(D:0.609498,G:0.125729))); | 
|---|
| 87 | # Format 5 = (A:0.350596,(B:0.728431,(D:0.609498,G:0.125729):0.642905):0.567737); | 
|---|
| 88 | # Format 6 = (A:0.350596,(B:0.728431,(D:0.609498,G:0.125729)E)C); | 
|---|
| 89 | # Format 7 = (A,(B,(D,G)E)C); | 
|---|
| 90 | # Format 8 = (A,(B,(D,G))); | 
|---|
| 91 | # Format 9 = (,(,(,))); | 
|---|
| 92 |  | 
|---|
| 93 | NW_FORMAT = { | 
|---|
| 94 | 0: [['name', str, True],  ["dist", float, True],    ['support', float, True],   ["dist", float, True]], # Flexible with support | 
|---|
| 95 | 1: [['name', str, True],  ["dist", float, True],    ['name', str, True],      ["dist", float, True]], # Flexible with internal node names | 
|---|
| 96 | 2: [['name', str, False], ["dist", float, False],   ['support', float, False],  ["dist", float, False]],# Strict with support values | 
|---|
| 97 | 3: [['name', str, False], ["dist", float, False],   ['name', str, False],     ["dist", float, False]], # Strict with internal node names | 
|---|
| 98 | 4: [['name', str, False], ["dist", float, False],   [None, None, False],        [None, None, False]], | 
|---|
| 99 | 5: [['name', str, False], ["dist", float, False],   [None, None, False],        ["dist", float, False]], | 
|---|
| 100 | 6: [['name', str, False], [None, None, False],      [None, None, False],        ["dist", float, False]], | 
|---|
| 101 | 7: [['name', str, False], ["dist", float, False],   ["name", str, False],       [None, None, False]], | 
|---|
| 102 | 8: [['name', str, False], [None, None, False],      ["name", str, False],       [None, None, False]], | 
|---|
| 103 | 9: [['name', str, False], [None, None, False],      [None, None, False],        [None, None, False]], # Only topology with node names | 
|---|
| 104 | 100: [[None, None, False],  [None, None, False],      [None, None, False],        [None, None, False]] # Only Topology | 
|---|
| 105 | } | 
|---|
| 106 |  | 
|---|
| 107 |  | 
|---|
| 108 | def format_node(node, node_type, format): | 
|---|
| 109 | if node_type == "leaf": | 
|---|
| 110 | container1 = NW_FORMAT[format][0][0] | 
|---|
| 111 | container2 = NW_FORMAT[format][1][0] | 
|---|
| 112 | converterFn1 = NW_FORMAT[format][0][1] | 
|---|
| 113 | converterFn2 = NW_FORMAT[format][1][1] | 
|---|
| 114 | else: | 
|---|
| 115 | container1 = NW_FORMAT[format][2][0] | 
|---|
| 116 | container2 = NW_FORMAT[format][3][0] | 
|---|
| 117 | converterFn1 = NW_FORMAT[format][2][1] | 
|---|
| 118 | converterFn2 = NW_FORMAT[format][3][1] | 
|---|
| 119 |  | 
|---|
| 120 | if converterFn1 == str: | 
|---|
| 121 | try: | 
|---|
| 122 | FIRST_PART = re.sub("["+_ILEGAL_NEWICK_CHARS+"]", "_", \ | 
|---|
| 123 | str(getattr(node, container1))) | 
|---|
| 124 | except (AttributeError, TypeError): | 
|---|
| 125 | FIRST_PART = "?" | 
|---|
| 126 |  | 
|---|
| 127 | elif converterFn1 is None: | 
|---|
| 128 | FIRST_PART = "" | 
|---|
| 129 | else: | 
|---|
| 130 | try: | 
|---|
| 131 | #FIRST_PART =  "%0.6f" %(converterFn2(getattr(node, container1))) | 
|---|
| 132 | FIRST_PART =  "%g" %(converterFn2(getattr(node, container1))) | 
|---|
| 133 | except (ValueError, TypeError): | 
|---|
| 134 | FIRST_PART = "?" | 
|---|
| 135 |  | 
|---|
| 136 |  | 
|---|
| 137 | if converterFn2 == str: | 
|---|
| 138 | try: | 
|---|
| 139 | SECOND_PART = ":"+re.sub("["+_ILEGAL_NEWICK_CHARS+"]", "_", \ | 
|---|
| 140 | str(getattr(node, container2))) | 
|---|
| 141 | except (ValueError, TypeError): | 
|---|
| 142 | SECOND_PART = ":?" | 
|---|
| 143 | elif converterFn2 is None: | 
|---|
| 144 | SECOND_PART = "" | 
|---|
| 145 | else: | 
|---|
| 146 | try: | 
|---|
| 147 | #SECOND_PART = ":%0.6f" %(converterFn2(getattr(node, container2))) | 
|---|
| 148 | SECOND_PART = ":%g" %(converterFn2(getattr(node, container2))) | 
|---|
| 149 | except (ValueError, TypeError): | 
|---|
| 150 | SECOND_PART = ":?" | 
|---|
| 151 |  | 
|---|
| 152 | return "%s%s" %(FIRST_PART, SECOND_PART) | 
|---|
| 153 |  | 
|---|
| 154 | # Used to write into specific formats | 
|---|
| 155 | def node2leafformat(node, format): | 
|---|
| 156 | safe_name = re.sub("["+_ILEGAL_NEWICK_CHARS+"]", "_", \ | 
|---|
| 157 | str(getattr(node, "name"))) | 
|---|
| 158 |  | 
|---|
| 159 | if format == 0 or format == 1 or format == 2 or format ==3: | 
|---|
| 160 | return "%s:%0.6f" %(safe_name, node.dist) | 
|---|
| 161 | elif format == 4 or format == 7: | 
|---|
| 162 | return ":%0.6f" %(node.dist) | 
|---|
| 163 | elif format == 5 or format == 6: | 
|---|
| 164 | return "%s" %(safe_name) | 
|---|
| 165 |  | 
|---|
| 166 | def node2internalformat(node, format): | 
|---|
| 167 | safe_name = re.sub("["+_ILEGAL_NEWICK_CHARS+"]", "_", \ | 
|---|
| 168 | str(getattr(node, "name"))) | 
|---|
| 169 | if format == 0 or format == 1: | 
|---|
| 170 | return "%0.6f:%0.6f" %(node.support, node.dist) | 
|---|
| 171 | elif format == 2: | 
|---|
| 172 | return "%s:%0.6f" %(safe_name, node.dist) | 
|---|
| 173 | elif format == 3 or format == 4: | 
|---|
| 174 | return ":%0.6f" %(node.dist) | 
|---|
| 175 | elif format == 5: | 
|---|
| 176 | return "%s" %(safe_name) | 
|---|
| 177 | elif format == 6 or format == 7: | 
|---|
| 178 | return "" | 
|---|
| 179 |  | 
|---|
| 180 | def print_supported_formats(): | 
|---|
| 181 | from ete2.coretype.tree import TreeNode | 
|---|
| 182 | t = TreeNode() | 
|---|
| 183 | t.populate(4, "ABCDEFGHI") | 
|---|
| 184 | print t | 
|---|
| 185 | for f in NW_FORMAT: | 
|---|
| 186 | print "Format", f,"=", write_newick(t, features=None, format=f) | 
|---|
| 187 |  | 
|---|
| 188 | class NewickError(Exception): | 
|---|
| 189 | """Exception class designed for NewickIO errors.""" | 
|---|
| 190 | pass | 
|---|
| 191 |  | 
|---|
| 192 | def read_newick(newick, root_node=None, format=0): | 
|---|
| 193 | """ Reads a newick tree from either a string or a file, and returns | 
|---|
| 194 | an ETE tree structure. | 
|---|
| 195 |  | 
|---|
| 196 | A previously existent node object can be passed as the root of the | 
|---|
| 197 | tree, which means that all its new children will belong to the same | 
|---|
| 198 | class as the root(This allows to work with custom TreeNode | 
|---|
| 199 | objects). | 
|---|
| 200 |  | 
|---|
| 201 | You can also take advantage from this behaviour to concatenate | 
|---|
| 202 | several tree structures. | 
|---|
| 203 | """ | 
|---|
| 204 |  | 
|---|
| 205 | if root_node is None: | 
|---|
| 206 | from ete2.coretype.tree import TreeNode | 
|---|
| 207 | root_node = TreeNode() | 
|---|
| 208 |  | 
|---|
| 209 | if isinstance(newick, basestring): | 
|---|
| 210 | if os.path.exists(newick): | 
|---|
| 211 | nw = open(newick, 'rU').read() | 
|---|
| 212 | else: | 
|---|
| 213 | nw = newick | 
|---|
| 214 | nw = nw.strip() | 
|---|
| 215 | if not nw.startswith('(') and nw.endswith(';'): | 
|---|
| 216 | return _read_node_data(nw, root_node, "single", format) | 
|---|
| 217 |  | 
|---|
| 218 | elif not nw.startswith('(') or not nw.endswith(';'): | 
|---|
| 219 | raise NewickError, \ | 
|---|
| 220 | 'Unexisting tree file or Malformed newick tree structure.' | 
|---|
| 221 | else: | 
|---|
| 222 | return _read_newick_from_string(nw, root_node, format) | 
|---|
| 223 |  | 
|---|
| 224 | else: | 
|---|
| 225 | raise NewickError, \ | 
|---|
| 226 | "'newick' argument must be either a filename or a newick string." | 
|---|
| 227 |  | 
|---|
| 228 | def _read_newick_from_string(nw, root_node, format): | 
|---|
| 229 | """ Reads a newick string in the New Hampshire format. """ | 
|---|
| 230 |  | 
|---|
| 231 | if nw.count('(') != nw.count(')'): | 
|---|
| 232 | raise NewickError, 'Parentheses do not match. Broken tree structure' | 
|---|
| 233 |  | 
|---|
| 234 | # white spaces and separators are removed | 
|---|
| 235 | nw = re.sub("[\n\r\t]+", "", nw) | 
|---|
| 236 |  | 
|---|
| 237 | current_parent = None | 
|---|
| 238 |  | 
|---|
| 239 | # Ok, this is my own way of reading newick structures. I find it | 
|---|
| 240 | # more flexible and elegant than other docummented methods. Don't | 
|---|
| 241 | # know if I'm loosing much efficiency. It Starts by splitting the | 
|---|
| 242 | # structure using open parentheses. Each of the resulting chunks | 
|---|
| 243 | # represent an internal node. So for each chunk I create a new node | 
|---|
| 244 | # that hungs from the current parent node.  Each internal node chunk | 
|---|
| 245 | # may contain information about terminal nodes hanging from the | 
|---|
| 246 | # internal and clossing parenthessis (closing previously opened | 
|---|
| 247 | # internal nodes). | 
|---|
| 248 | # | 
|---|
| 249 | # Enjoy. | 
|---|
| 250 | # by JHC ;) | 
|---|
| 251 |  | 
|---|
| 252 | # Skip the first chunk. It is always == '' | 
|---|
| 253 | for internal_node in nw.split("(")[1:]: | 
|---|
| 254 | # If this is the root of tree, use the root_node instead of | 
|---|
| 255 | # creating it, otherwise make a new one. | 
|---|
| 256 | if current_parent is None: | 
|---|
| 257 | current_parent = root_node | 
|---|
| 258 | else: | 
|---|
| 259 | current_parent = current_parent.add_child() | 
|---|
| 260 | # We can only find leaf nodes within this chunk, since rest of | 
|---|
| 261 | # internal nodes will be in the next newick chunks | 
|---|
| 262 | possible_leaves = internal_node.split(",") | 
|---|
| 263 | for i, leaf in enumerate(possible_leaves): | 
|---|
| 264 | # Any resulting sub-chunk resulting from splitting by commas can | 
|---|
| 265 | # be considered (tpologically) as a child to the current parent | 
|---|
| 266 | # node. We only discard chunks if they are empty and in the last | 
|---|
| 267 | # possition, meaining that the next brother is not terminal bu | 
|---|
| 268 | # internal node (will be visited in the next newick chunk) | 
|---|
| 269 | if leaf.strip() == '' and i == len(possible_leaves)-1: | 
|---|
| 270 | continue | 
|---|
| 271 | # Leaf text strings may end with a variable number of clossing | 
|---|
| 272 | # parenthesis. For each ')' we read the information of the | 
|---|
| 273 | # current node, close it and go up one more node. | 
|---|
| 274 | clossing_nodes = leaf.split(")") | 
|---|
| 275 | # first par contain leaf info | 
|---|
| 276 | _read_node_data(clossing_nodes[0], current_parent, "leaf", format) | 
|---|
| 277 | # The next parts containg clossing nodes and info about the | 
|---|
| 278 | # internal nodes. | 
|---|
| 279 | if len(clossing_nodes)>1: | 
|---|
| 280 | for closing_internal in clossing_nodes[1:]: | 
|---|
| 281 | if closing_internal.strip() ==";": continue | 
|---|
| 282 | _read_node_data(closing_internal, current_parent, "internal", format) | 
|---|
| 283 | current_parent = current_parent.up | 
|---|
| 284 | return root_node | 
|---|
| 285 |  | 
|---|
| 286 | def _parse_extra_features(node, NHX_string): | 
|---|
| 287 | """ Reads node's extra data form its NHX string. NHX uses this | 
|---|
| 288 | format:  [&&NHX:prop1=value1:prop2=value2] """ | 
|---|
| 289 | NHX_string = NHX_string.replace("[&&NHX:", "") | 
|---|
| 290 | NHX_string = NHX_string.replace("]", "") | 
|---|
| 291 | for field in NHX_string.split(":"): | 
|---|
| 292 | try: | 
|---|
| 293 | pname, pvalue = field.split("=") | 
|---|
| 294 | except ValueError, e: | 
|---|
| 295 | print NHX_string, field.split("=") | 
|---|
| 296 | raise ValueError, e | 
|---|
| 297 | node.add_feature(pname, pvalue) | 
|---|
| 298 |  | 
|---|
| 299 | def _read_node_data(subnw, current_node, node_type, format): | 
|---|
| 300 | """ Reads a leaf node from a subpart of the original newick | 
|---|
| 301 | tree """ | 
|---|
| 302 |  | 
|---|
| 303 | if node_type == "leaf" or node_type == "single": | 
|---|
| 304 | if node_type == "leaf": | 
|---|
| 305 | node = current_node.add_child() | 
|---|
| 306 | else: | 
|---|
| 307 | node = current_node | 
|---|
| 308 | container1 = NW_FORMAT[format][0][0] | 
|---|
| 309 | container2 = NW_FORMAT[format][1][0] | 
|---|
| 310 | converterFn1 = NW_FORMAT[format][0][1] | 
|---|
| 311 | converterFn2 = NW_FORMAT[format][1][1] | 
|---|
| 312 | flexible1 = NW_FORMAT[format][0][2] | 
|---|
| 313 | flexible2 = NW_FORMAT[format][1][2] | 
|---|
| 314 | else: | 
|---|
| 315 | node = current_node | 
|---|
| 316 | container1 = NW_FORMAT[format][2][0] | 
|---|
| 317 | container2 = NW_FORMAT[format][3][0] | 
|---|
| 318 | converterFn1 = NW_FORMAT[format][2][1] | 
|---|
| 319 | converterFn2 = NW_FORMAT[format][3][1] | 
|---|
| 320 | flexible1 = NW_FORMAT[format][2][2] | 
|---|
| 321 | flexible2 = NW_FORMAT[format][3][2] | 
|---|
| 322 |  | 
|---|
| 323 | if converterFn1 == str: | 
|---|
| 324 | FIRST_MATCH = "("+_NAME_RE+")" | 
|---|
| 325 | elif converterFn1 == float: | 
|---|
| 326 | FIRST_MATCH = "("+_FLOAT_RE+")" | 
|---|
| 327 | elif converterFn1 is None: | 
|---|
| 328 | FIRST_MATCH = '()' | 
|---|
| 329 |  | 
|---|
| 330 | if converterFn2 == str: | 
|---|
| 331 | SECOND_MATCH = "(:"+_NAME_RE+")" | 
|---|
| 332 | elif converterFn2 == float: | 
|---|
| 333 | SECOND_MATCH = "(:"+_FLOAT_RE+")" | 
|---|
| 334 | elif converterFn2 is None: | 
|---|
| 335 | SECOND_MATCH = '()' | 
|---|
| 336 |  | 
|---|
| 337 | if flexible1: | 
|---|
| 338 | FIRST_MATCH += "?" | 
|---|
| 339 | if flexible2: | 
|---|
| 340 | SECOND_MATCH += "?" | 
|---|
| 341 |  | 
|---|
| 342 | MATCH = '%s\s*%s\s*(%s)?' % (FIRST_MATCH, SECOND_MATCH, _NHX_RE) | 
|---|
| 343 | data = re.match(MATCH, subnw) | 
|---|
| 344 | if data: | 
|---|
| 345 | data = data.groups() | 
|---|
| 346 | if data[0] is not None and data[0] != '': | 
|---|
| 347 | node.add_feature(container1, converterFn1(data[0].strip())) | 
|---|
| 348 |  | 
|---|
| 349 | if data[1] is not None and data[1] != '': | 
|---|
| 350 | node.add_feature(container2, converterFn2(data[1][1:].strip())) | 
|---|
| 351 |  | 
|---|
| 352 | if data[2] is not None \ | 
|---|
| 353 | and data[2].startswith("[&&NHX"): | 
|---|
| 354 | _parse_extra_features(node, data[2]) | 
|---|
| 355 | else: | 
|---|
| 356 | raise NewickError, "Unexpected leaf node format:\n\t"+ subnw[0:50] + "[%s]" %format | 
|---|
| 357 | return | 
|---|
| 358 |  | 
|---|
| 359 | # def write_newick_recursive(node, features=None, format=1, _is_root=True): | 
|---|
| 360 | #     """ Recursively reads a tree structure and returns its NHX | 
|---|
| 361 | #     representation. """ | 
|---|
| 362 | #     newick = "" | 
|---|
| 363 | #     if not node.children: | 
|---|
| 364 | #         safe_name = re.sub("["+_ILEGAL_NEWICK_CHARS+"]", "_", \ | 
|---|
| 365 | #                                str(getattr(node, "name"))) | 
|---|
| 366 |  | 
|---|
| 367 | #         newick += format_node(node, "leaf", format) | 
|---|
| 368 | #         newick += _get_features_string(node, features) | 
|---|
| 369 | #         #return newick | 
|---|
| 370 |  | 
|---|
| 371 | #     else: | 
|---|
| 372 | #         if node.children: | 
|---|
| 373 | #             newick+= "(" | 
|---|
| 374 | #         for cnode in node.children: | 
|---|
| 375 | #             newick += write_newick(cnode, features, format=format,\ | 
|---|
| 376 | #                                      _is_root = False) | 
|---|
| 377 | #             # After last child is processed, add closing string | 
|---|
| 378 | #             if cnode == node.children[-1]: | 
|---|
| 379 | #                 newick += ")" | 
|---|
| 380 | #                 if node.up is not None: | 
|---|
| 381 | #                     newick += format_node(node, "internal", format) | 
|---|
| 382 | #                 newick += _get_features_string(node, features) | 
|---|
| 383 | #             else: | 
|---|
| 384 | #                 newick += ',' | 
|---|
| 385 |  | 
|---|
| 386 | #     if _is_root: | 
|---|
| 387 | #         newick += ";" | 
|---|
| 388 | #     return newick | 
|---|
| 389 |  | 
|---|
| 390 | def write_newick(rootnode, features=None, format=1, format_root_node=True, | 
|---|
| 391 | is_leaf_fn=None): | 
|---|
| 392 | """ Iteratively export a tree structure and returns its NHX | 
|---|
| 393 | representation. """ | 
|---|
| 394 | newick = [] | 
|---|
| 395 | leaf = is_leaf_fn if is_leaf_fn else lambda n: not bool(n.children) | 
|---|
| 396 | for postorder, node in rootnode.iter_prepostorder(is_leaf_fn=is_leaf_fn): | 
|---|
| 397 | if postorder: | 
|---|
| 398 | newick.append(")") | 
|---|
| 399 | if node.up is not None or format_root_node: | 
|---|
| 400 | newick.append(format_node(node, "internal", format)) | 
|---|
| 401 | newick.append(_get_features_string(node, features)) | 
|---|
| 402 | else: | 
|---|
| 403 | if node is not rootnode and node != node.up.children[0]: | 
|---|
| 404 | newick.append(",") | 
|---|
| 405 |  | 
|---|
| 406 | if leaf(node): | 
|---|
| 407 | safe_name = re.sub("["+_ILEGAL_NEWICK_CHARS+"]", "_", \ | 
|---|
| 408 | str(getattr(node, "name"))) | 
|---|
| 409 | newick.append(format_node(node, "leaf", format)) | 
|---|
| 410 | newick.append(_get_features_string(node, features)) | 
|---|
| 411 | else: | 
|---|
| 412 | newick.append("(") | 
|---|
| 413 |  | 
|---|
| 414 | newick.append(";") | 
|---|
| 415 | return ''.join(newick) | 
|---|
| 416 |  | 
|---|
| 417 | def _get_features_string(self, features=None): | 
|---|
| 418 | """ Generates the extended newick string NHX with extra data about | 
|---|
| 419 | a node. """ | 
|---|
| 420 | string = "" | 
|---|
| 421 | if features is None: | 
|---|
| 422 | features = [] | 
|---|
| 423 | elif features == []: | 
|---|
| 424 | features = self.features | 
|---|
| 425 |  | 
|---|
| 426 | for pr in features: | 
|---|
| 427 | if hasattr(self, pr): | 
|---|
| 428 | value = re.sub("["+_ILEGAL_NEWICK_CHARS+"]", "_", \ | 
|---|
| 429 | str(getattr(self, pr))) | 
|---|
| 430 | if string != "": | 
|---|
| 431 | string +=":" | 
|---|
| 432 | string +="%s=%s"  %(pr, str(value)) | 
|---|
| 433 | if string != "": | 
|---|
| 434 | string = "[&&NHX:"+string+"]" | 
|---|
| 435 |  | 
|---|
| 436 | return string | 
|---|