| 1 | __VERSION__="ete2-2.2rev1026" |
|---|
| 2 | # -*- coding: utf-8 -*- |
|---|
| 3 | # #START_LICENSE########################################################### |
|---|
| 4 | # |
|---|
| 5 | # |
|---|
| 6 | # This file is part of the Environment for Tree Exploration program |
|---|
| 7 | # (ETE). http://ete.cgenomics.org |
|---|
| 8 | # |
|---|
| 9 | # ETE is free software: you can redistribute it and/or modify it |
|---|
| 10 | # under the terms of the GNU General Public License as published by |
|---|
| 11 | # the Free Software Foundation, either version 3 of the License, or |
|---|
| 12 | # (at your option) any later version. |
|---|
| 13 | # |
|---|
| 14 | # ETE is distributed in the hope that it will be useful, but WITHOUT |
|---|
| 15 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
|---|
| 16 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public |
|---|
| 17 | # License for more details. |
|---|
| 18 | # |
|---|
| 19 | # You should have received a copy of the GNU General Public License |
|---|
| 20 | # along with ETE. If not, see <http://www.gnu.org/licenses/>. |
|---|
| 21 | # |
|---|
| 22 | # |
|---|
| 23 | # ABOUT THE ETE PACKAGE |
|---|
| 24 | # ===================== |
|---|
| 25 | # |
|---|
| 26 | # ETE is distributed under the GPL copyleft license (2008-2011). |
|---|
| 27 | # |
|---|
| 28 | # If you make use of ETE in published work, please cite: |
|---|
| 29 | # |
|---|
| 30 | # Jaime Huerta-Cepas, Joaquin Dopazo and Toni Gabaldon. |
|---|
| 31 | # ETE: a python Environment for Tree Exploration. Jaime BMC |
|---|
| 32 | # Bioinformatics 2010,:24doi:10.1186/1471-2105-11-24 |
|---|
| 33 | # |
|---|
| 34 | # Note that extra references to the specific methods implemented in |
|---|
| 35 | # the toolkit are available in the documentation. |
|---|
| 36 | # |
|---|
| 37 | # More info at http://ete.cgenomics.org |
|---|
| 38 | # |
|---|
| 39 | # |
|---|
| 40 | # #END_LICENSE############################################################# |
|---|
| 41 | |
|---|
| 42 | import re |
|---|
| 43 | import os |
|---|
| 44 | import base64 |
|---|
| 45 | |
|---|
| 46 | __all__ = ["read_newick", "write_newick", "print_supported_formats"] |
|---|
| 47 | |
|---|
| 48 | # Regular expressions used for reading newick format |
|---|
| 49 | _ILEGAL_NEWICK_CHARS = ":;(),\[\]\t\n\r=" |
|---|
| 50 | _NON_PRINTABLE_CHARS_RE = "[\x00-\x1f]+" |
|---|
| 51 | |
|---|
| 52 | _NHX_RE = "\[&&NHX:[^\]]*\]" |
|---|
| 53 | _FLOAT_RE = "[+-]?\d+\.?\d*(?:[eE][-+]\d+)?" |
|---|
| 54 | #_FLOAT_RE = "[+-]?\d+\.?\d*" |
|---|
| 55 | _NAME_RE = "[^():,;\[\]]+" |
|---|
| 56 | |
|---|
| 57 | DEFAULT_DIST = 1.0 |
|---|
| 58 | DEFAULT_NAME = '' |
|---|
| 59 | DEFAULT_SUPPORT = 1.0 |
|---|
| 60 | |
|---|
| 61 | |
|---|
| 62 | # Allowed formats. This table is used to read and write newick using |
|---|
| 63 | # different convenctions. You can also add your own formats in an easy way. |
|---|
| 64 | # |
|---|
| 65 | # |
|---|
| 66 | # FORMAT: [[LeafAttr1, LeafAttr1Type, Strict?], [LeafAttr2, LeafAttr2Type, Strict?],\ |
|---|
| 67 | # [InternalAttr1, InternalAttr1Type, Strict?], [InternalAttr2, InternalAttr2Type, Strict?]] |
|---|
| 68 | # |
|---|
| 69 | # Attributes are placed in the newick as follows: |
|---|
| 70 | # |
|---|
| 71 | # .... ,LeafAttr1:LeafAttr2)InternalAttr1:InternalAttr2 ... |
|---|
| 72 | # |
|---|
| 73 | # |
|---|
| 74 | # /-A |
|---|
| 75 | # -NoName--| |
|---|
| 76 | # | /-B |
|---|
| 77 | # \C-------| |
|---|
| 78 | # | /-D |
|---|
| 79 | # \E-------| |
|---|
| 80 | # \-G |
|---|
| 81 | # |
|---|
| 82 | # Format 0 = (A:0.350596,(B:0.728431,(D:0.609498,G:0.125729)1.000000:0.642905)1.000000:0.567737); |
|---|
| 83 | # Format 1 = (A:0.350596,(B:0.728431,(D:0.609498,G:0.125729)E:0.642905)C:0.567737); |
|---|
| 84 | # Format 2 = (A:0.350596,(B:0.728431,(D:0.609498,G:0.125729)1.000000:0.642905)1.000000:0.567737); |
|---|
| 85 | # Format 3 = (A:0.350596,(B:0.728431,(D:0.609498,G:0.125729)E:0.642905)C:0.567737); |
|---|
| 86 | # Format 4 = (A:0.350596,(B:0.728431,(D:0.609498,G:0.125729))); |
|---|
| 87 | # Format 5 = (A:0.350596,(B:0.728431,(D:0.609498,G:0.125729):0.642905):0.567737); |
|---|
| 88 | # Format 6 = (A:0.350596,(B:0.728431,(D:0.609498,G:0.125729)E)C); |
|---|
| 89 | # Format 7 = (A,(B,(D,G)E)C); |
|---|
| 90 | # Format 8 = (A,(B,(D,G))); |
|---|
| 91 | # Format 9 = (,(,(,))); |
|---|
| 92 | |
|---|
| 93 | NW_FORMAT = { |
|---|
| 94 | 0: [['name', str, True], ["dist", float, True], ['support', float, True], ["dist", float, True]], # Flexible with support |
|---|
| 95 | 1: [['name', str, True], ["dist", float, True], ['name', str, True], ["dist", float, True]], # Flexible with internal node names |
|---|
| 96 | 2: [['name', str, False], ["dist", float, False], ['support', float, False], ["dist", float, False]],# Strict with support values |
|---|
| 97 | 3: [['name', str, False], ["dist", float, False], ['name', str, False], ["dist", float, False]], # Strict with internal node names |
|---|
| 98 | 4: [['name', str, False], ["dist", float, False], [None, None, False], [None, None, False]], |
|---|
| 99 | 5: [['name', str, False], ["dist", float, False], [None, None, False], ["dist", float, False]], |
|---|
| 100 | 6: [['name', str, False], [None, None, False], [None, None, False], ["dist", float, False]], |
|---|
| 101 | 7: [['name', str, False], ["dist", float, False], ["name", str, False], [None, None, False]], |
|---|
| 102 | 8: [['name', str, False], [None, None, False], ["name", str, False], [None, None, False]], |
|---|
| 103 | 9: [['name', str, False], [None, None, False], [None, None, False], [None, None, False]], # Only topology with node names |
|---|
| 104 | 100: [[None, None, False], [None, None, False], [None, None, False], [None, None, False]] # Only Topology |
|---|
| 105 | } |
|---|
| 106 | |
|---|
| 107 | |
|---|
| 108 | def format_node(node, node_type, format): |
|---|
| 109 | if node_type == "leaf": |
|---|
| 110 | container1 = NW_FORMAT[format][0][0] |
|---|
| 111 | container2 = NW_FORMAT[format][1][0] |
|---|
| 112 | converterFn1 = NW_FORMAT[format][0][1] |
|---|
| 113 | converterFn2 = NW_FORMAT[format][1][1] |
|---|
| 114 | else: |
|---|
| 115 | container1 = NW_FORMAT[format][2][0] |
|---|
| 116 | container2 = NW_FORMAT[format][3][0] |
|---|
| 117 | converterFn1 = NW_FORMAT[format][2][1] |
|---|
| 118 | converterFn2 = NW_FORMAT[format][3][1] |
|---|
| 119 | |
|---|
| 120 | if converterFn1 == str: |
|---|
| 121 | try: |
|---|
| 122 | FIRST_PART = re.sub("["+_ILEGAL_NEWICK_CHARS+"]", "_", \ |
|---|
| 123 | str(getattr(node, container1))) |
|---|
| 124 | except (AttributeError, TypeError): |
|---|
| 125 | FIRST_PART = "?" |
|---|
| 126 | |
|---|
| 127 | elif converterFn1 is None: |
|---|
| 128 | FIRST_PART = "" |
|---|
| 129 | else: |
|---|
| 130 | try: |
|---|
| 131 | #FIRST_PART = "%0.6f" %(converterFn2(getattr(node, container1))) |
|---|
| 132 | FIRST_PART = "%g" %(converterFn2(getattr(node, container1))) |
|---|
| 133 | except (ValueError, TypeError): |
|---|
| 134 | FIRST_PART = "?" |
|---|
| 135 | |
|---|
| 136 | |
|---|
| 137 | if converterFn2 == str: |
|---|
| 138 | try: |
|---|
| 139 | SECOND_PART = ":"+re.sub("["+_ILEGAL_NEWICK_CHARS+"]", "_", \ |
|---|
| 140 | str(getattr(node, container2))) |
|---|
| 141 | except (ValueError, TypeError): |
|---|
| 142 | SECOND_PART = ":?" |
|---|
| 143 | elif converterFn2 is None: |
|---|
| 144 | SECOND_PART = "" |
|---|
| 145 | else: |
|---|
| 146 | try: |
|---|
| 147 | #SECOND_PART = ":%0.6f" %(converterFn2(getattr(node, container2))) |
|---|
| 148 | SECOND_PART = ":%g" %(converterFn2(getattr(node, container2))) |
|---|
| 149 | except (ValueError, TypeError): |
|---|
| 150 | SECOND_PART = ":?" |
|---|
| 151 | |
|---|
| 152 | return "%s%s" %(FIRST_PART, SECOND_PART) |
|---|
| 153 | |
|---|
| 154 | # Used to write into specific formats |
|---|
| 155 | def node2leafformat(node, format): |
|---|
| 156 | safe_name = re.sub("["+_ILEGAL_NEWICK_CHARS+"]", "_", \ |
|---|
| 157 | str(getattr(node, "name"))) |
|---|
| 158 | |
|---|
| 159 | if format == 0 or format == 1 or format == 2 or format ==3: |
|---|
| 160 | return "%s:%0.6f" %(safe_name, node.dist) |
|---|
| 161 | elif format == 4 or format == 7: |
|---|
| 162 | return ":%0.6f" %(node.dist) |
|---|
| 163 | elif format == 5 or format == 6: |
|---|
| 164 | return "%s" %(safe_name) |
|---|
| 165 | |
|---|
| 166 | def node2internalformat(node, format): |
|---|
| 167 | safe_name = re.sub("["+_ILEGAL_NEWICK_CHARS+"]", "_", \ |
|---|
| 168 | str(getattr(node, "name"))) |
|---|
| 169 | if format == 0 or format == 1: |
|---|
| 170 | return "%0.6f:%0.6f" %(node.support, node.dist) |
|---|
| 171 | elif format == 2: |
|---|
| 172 | return "%s:%0.6f" %(safe_name, node.dist) |
|---|
| 173 | elif format == 3 or format == 4: |
|---|
| 174 | return ":%0.6f" %(node.dist) |
|---|
| 175 | elif format == 5: |
|---|
| 176 | return "%s" %(safe_name) |
|---|
| 177 | elif format == 6 or format == 7: |
|---|
| 178 | return "" |
|---|
| 179 | |
|---|
| 180 | def print_supported_formats(): |
|---|
| 181 | from ete2.coretype.tree import TreeNode |
|---|
| 182 | t = TreeNode() |
|---|
| 183 | t.populate(4, "ABCDEFGHI") |
|---|
| 184 | print t |
|---|
| 185 | for f in NW_FORMAT: |
|---|
| 186 | print "Format", f,"=", write_newick(t, features=None, format=f) |
|---|
| 187 | |
|---|
| 188 | class NewickError(Exception): |
|---|
| 189 | """Exception class designed for NewickIO errors.""" |
|---|
| 190 | pass |
|---|
| 191 | |
|---|
| 192 | def read_newick(newick, root_node=None, format=0): |
|---|
| 193 | """ Reads a newick tree from either a string or a file, and returns |
|---|
| 194 | an ETE tree structure. |
|---|
| 195 | |
|---|
| 196 | A previously existent node object can be passed as the root of the |
|---|
| 197 | tree, which means that all its new children will belong to the same |
|---|
| 198 | class as the root(This allows to work with custom TreeNode |
|---|
| 199 | objects). |
|---|
| 200 | |
|---|
| 201 | You can also take advantage from this behaviour to concatenate |
|---|
| 202 | several tree structures. |
|---|
| 203 | """ |
|---|
| 204 | |
|---|
| 205 | if root_node is None: |
|---|
| 206 | from ete2.coretype.tree import TreeNode |
|---|
| 207 | root_node = TreeNode() |
|---|
| 208 | |
|---|
| 209 | if isinstance(newick, basestring): |
|---|
| 210 | if os.path.exists(newick): |
|---|
| 211 | nw = open(newick, 'rU').read() |
|---|
| 212 | else: |
|---|
| 213 | nw = newick |
|---|
| 214 | nw = nw.strip() |
|---|
| 215 | if not nw.startswith('(') and nw.endswith(';'): |
|---|
| 216 | return _read_node_data(nw, root_node, "single", format) |
|---|
| 217 | |
|---|
| 218 | elif not nw.startswith('(') or not nw.endswith(';'): |
|---|
| 219 | raise NewickError, \ |
|---|
| 220 | 'Unexisting tree file or Malformed newick tree structure.' |
|---|
| 221 | else: |
|---|
| 222 | return _read_newick_from_string(nw, root_node, format) |
|---|
| 223 | |
|---|
| 224 | else: |
|---|
| 225 | raise NewickError, \ |
|---|
| 226 | "'newick' argument must be either a filename or a newick string." |
|---|
| 227 | |
|---|
| 228 | def _read_newick_from_string(nw, root_node, format): |
|---|
| 229 | """ Reads a newick string in the New Hampshire format. """ |
|---|
| 230 | |
|---|
| 231 | if nw.count('(') != nw.count(')'): |
|---|
| 232 | raise NewickError, 'Parentheses do not match. Broken tree structure' |
|---|
| 233 | |
|---|
| 234 | # white spaces and separators are removed |
|---|
| 235 | nw = re.sub("[\n\r\t]+", "", nw) |
|---|
| 236 | |
|---|
| 237 | current_parent = None |
|---|
| 238 | |
|---|
| 239 | # Ok, this is my own way of reading newick structures. I find it |
|---|
| 240 | # more flexible and elegant than other docummented methods. Don't |
|---|
| 241 | # know if I'm loosing much efficiency. It Starts by splitting the |
|---|
| 242 | # structure using open parentheses. Each of the resulting chunks |
|---|
| 243 | # represent an internal node. So for each chunk I create a new node |
|---|
| 244 | # that hungs from the current parent node. Each internal node chunk |
|---|
| 245 | # may contain information about terminal nodes hanging from the |
|---|
| 246 | # internal and clossing parenthessis (closing previously opened |
|---|
| 247 | # internal nodes). |
|---|
| 248 | # |
|---|
| 249 | # Enjoy. |
|---|
| 250 | # by JHC ;) |
|---|
| 251 | |
|---|
| 252 | # Skip the first chunk. It is always == '' |
|---|
| 253 | for internal_node in nw.split("(")[1:]: |
|---|
| 254 | # If this is the root of tree, use the root_node instead of |
|---|
| 255 | # creating it, otherwise make a new one. |
|---|
| 256 | if current_parent is None: |
|---|
| 257 | current_parent = root_node |
|---|
| 258 | else: |
|---|
| 259 | current_parent = current_parent.add_child() |
|---|
| 260 | # We can only find leaf nodes within this chunk, since rest of |
|---|
| 261 | # internal nodes will be in the next newick chunks |
|---|
| 262 | possible_leaves = internal_node.split(",") |
|---|
| 263 | for i, leaf in enumerate(possible_leaves): |
|---|
| 264 | # Any resulting sub-chunk resulting from splitting by commas can |
|---|
| 265 | # be considered (tpologically) as a child to the current parent |
|---|
| 266 | # node. We only discard chunks if they are empty and in the last |
|---|
| 267 | # possition, meaining that the next brother is not terminal bu |
|---|
| 268 | # internal node (will be visited in the next newick chunk) |
|---|
| 269 | if leaf.strip() == '' and i == len(possible_leaves)-1: |
|---|
| 270 | continue |
|---|
| 271 | # Leaf text strings may end with a variable number of clossing |
|---|
| 272 | # parenthesis. For each ')' we read the information of the |
|---|
| 273 | # current node, close it and go up one more node. |
|---|
| 274 | clossing_nodes = leaf.split(")") |
|---|
| 275 | # first par contain leaf info |
|---|
| 276 | _read_node_data(clossing_nodes[0], current_parent, "leaf", format) |
|---|
| 277 | # The next parts containg clossing nodes and info about the |
|---|
| 278 | # internal nodes. |
|---|
| 279 | if len(clossing_nodes)>1: |
|---|
| 280 | for closing_internal in clossing_nodes[1:]: |
|---|
| 281 | if closing_internal.strip() ==";": continue |
|---|
| 282 | _read_node_data(closing_internal, current_parent, "internal", format) |
|---|
| 283 | current_parent = current_parent.up |
|---|
| 284 | return root_node |
|---|
| 285 | |
|---|
| 286 | def _parse_extra_features(node, NHX_string): |
|---|
| 287 | """ Reads node's extra data form its NHX string. NHX uses this |
|---|
| 288 | format: [&&NHX:prop1=value1:prop2=value2] """ |
|---|
| 289 | NHX_string = NHX_string.replace("[&&NHX:", "") |
|---|
| 290 | NHX_string = NHX_string.replace("]", "") |
|---|
| 291 | for field in NHX_string.split(":"): |
|---|
| 292 | try: |
|---|
| 293 | pname, pvalue = field.split("=") |
|---|
| 294 | except ValueError, e: |
|---|
| 295 | print NHX_string, field.split("=") |
|---|
| 296 | raise ValueError, e |
|---|
| 297 | node.add_feature(pname, pvalue) |
|---|
| 298 | |
|---|
| 299 | def _read_node_data(subnw, current_node, node_type, format): |
|---|
| 300 | """ Reads a leaf node from a subpart of the original newick |
|---|
| 301 | tree """ |
|---|
| 302 | |
|---|
| 303 | if node_type == "leaf" or node_type == "single": |
|---|
| 304 | if node_type == "leaf": |
|---|
| 305 | node = current_node.add_child() |
|---|
| 306 | else: |
|---|
| 307 | node = current_node |
|---|
| 308 | container1 = NW_FORMAT[format][0][0] |
|---|
| 309 | container2 = NW_FORMAT[format][1][0] |
|---|
| 310 | converterFn1 = NW_FORMAT[format][0][1] |
|---|
| 311 | converterFn2 = NW_FORMAT[format][1][1] |
|---|
| 312 | flexible1 = NW_FORMAT[format][0][2] |
|---|
| 313 | flexible2 = NW_FORMAT[format][1][2] |
|---|
| 314 | else: |
|---|
| 315 | node = current_node |
|---|
| 316 | container1 = NW_FORMAT[format][2][0] |
|---|
| 317 | container2 = NW_FORMAT[format][3][0] |
|---|
| 318 | converterFn1 = NW_FORMAT[format][2][1] |
|---|
| 319 | converterFn2 = NW_FORMAT[format][3][1] |
|---|
| 320 | flexible1 = NW_FORMAT[format][2][2] |
|---|
| 321 | flexible2 = NW_FORMAT[format][3][2] |
|---|
| 322 | |
|---|
| 323 | if converterFn1 == str: |
|---|
| 324 | FIRST_MATCH = "("+_NAME_RE+")" |
|---|
| 325 | elif converterFn1 == float: |
|---|
| 326 | FIRST_MATCH = "("+_FLOAT_RE+")" |
|---|
| 327 | elif converterFn1 is None: |
|---|
| 328 | FIRST_MATCH = '()' |
|---|
| 329 | |
|---|
| 330 | if converterFn2 == str: |
|---|
| 331 | SECOND_MATCH = "(:"+_NAME_RE+")" |
|---|
| 332 | elif converterFn2 == float: |
|---|
| 333 | SECOND_MATCH = "(:"+_FLOAT_RE+")" |
|---|
| 334 | elif converterFn2 is None: |
|---|
| 335 | SECOND_MATCH = '()' |
|---|
| 336 | |
|---|
| 337 | if flexible1: |
|---|
| 338 | FIRST_MATCH += "?" |
|---|
| 339 | if flexible2: |
|---|
| 340 | SECOND_MATCH += "?" |
|---|
| 341 | |
|---|
| 342 | MATCH = '%s\s*%s\s*(%s)?' % (FIRST_MATCH, SECOND_MATCH, _NHX_RE) |
|---|
| 343 | data = re.match(MATCH, subnw) |
|---|
| 344 | if data: |
|---|
| 345 | data = data.groups() |
|---|
| 346 | if data[0] is not None and data[0] != '': |
|---|
| 347 | node.add_feature(container1, converterFn1(data[0].strip())) |
|---|
| 348 | |
|---|
| 349 | if data[1] is not None and data[1] != '': |
|---|
| 350 | node.add_feature(container2, converterFn2(data[1][1:].strip())) |
|---|
| 351 | |
|---|
| 352 | if data[2] is not None \ |
|---|
| 353 | and data[2].startswith("[&&NHX"): |
|---|
| 354 | _parse_extra_features(node, data[2]) |
|---|
| 355 | else: |
|---|
| 356 | raise NewickError, "Unexpected leaf node format:\n\t"+ subnw[0:50] + "[%s]" %format |
|---|
| 357 | return |
|---|
| 358 | |
|---|
| 359 | # def write_newick_recursive(node, features=None, format=1, _is_root=True): |
|---|
| 360 | # """ Recursively reads a tree structure and returns its NHX |
|---|
| 361 | # representation. """ |
|---|
| 362 | # newick = "" |
|---|
| 363 | # if not node.children: |
|---|
| 364 | # safe_name = re.sub("["+_ILEGAL_NEWICK_CHARS+"]", "_", \ |
|---|
| 365 | # str(getattr(node, "name"))) |
|---|
| 366 | |
|---|
| 367 | # newick += format_node(node, "leaf", format) |
|---|
| 368 | # newick += _get_features_string(node, features) |
|---|
| 369 | # #return newick |
|---|
| 370 | |
|---|
| 371 | # else: |
|---|
| 372 | # if node.children: |
|---|
| 373 | # newick+= "(" |
|---|
| 374 | # for cnode in node.children: |
|---|
| 375 | # newick += write_newick(cnode, features, format=format,\ |
|---|
| 376 | # _is_root = False) |
|---|
| 377 | # # After last child is processed, add closing string |
|---|
| 378 | # if cnode == node.children[-1]: |
|---|
| 379 | # newick += ")" |
|---|
| 380 | # if node.up is not None: |
|---|
| 381 | # newick += format_node(node, "internal", format) |
|---|
| 382 | # newick += _get_features_string(node, features) |
|---|
| 383 | # else: |
|---|
| 384 | # newick += ',' |
|---|
| 385 | |
|---|
| 386 | # if _is_root: |
|---|
| 387 | # newick += ";" |
|---|
| 388 | # return newick |
|---|
| 389 | |
|---|
| 390 | def write_newick(rootnode, features=None, format=1, format_root_node=True, |
|---|
| 391 | is_leaf_fn=None): |
|---|
| 392 | """ Iteratively export a tree structure and returns its NHX |
|---|
| 393 | representation. """ |
|---|
| 394 | newick = [] |
|---|
| 395 | leaf = is_leaf_fn if is_leaf_fn else lambda n: not bool(n.children) |
|---|
| 396 | for postorder, node in rootnode.iter_prepostorder(is_leaf_fn=is_leaf_fn): |
|---|
| 397 | if postorder: |
|---|
| 398 | newick.append(")") |
|---|
| 399 | if node.up is not None or format_root_node: |
|---|
| 400 | newick.append(format_node(node, "internal", format)) |
|---|
| 401 | newick.append(_get_features_string(node, features)) |
|---|
| 402 | else: |
|---|
| 403 | if node is not rootnode and node != node.up.children[0]: |
|---|
| 404 | newick.append(",") |
|---|
| 405 | |
|---|
| 406 | if leaf(node): |
|---|
| 407 | safe_name = re.sub("["+_ILEGAL_NEWICK_CHARS+"]", "_", \ |
|---|
| 408 | str(getattr(node, "name"))) |
|---|
| 409 | newick.append(format_node(node, "leaf", format)) |
|---|
| 410 | newick.append(_get_features_string(node, features)) |
|---|
| 411 | else: |
|---|
| 412 | newick.append("(") |
|---|
| 413 | |
|---|
| 414 | newick.append(";") |
|---|
| 415 | return ''.join(newick) |
|---|
| 416 | |
|---|
| 417 | def _get_features_string(self, features=None): |
|---|
| 418 | """ Generates the extended newick string NHX with extra data about |
|---|
| 419 | a node. """ |
|---|
| 420 | string = "" |
|---|
| 421 | if features is None: |
|---|
| 422 | features = [] |
|---|
| 423 | elif features == []: |
|---|
| 424 | features = self.features |
|---|
| 425 | |
|---|
| 426 | for pr in features: |
|---|
| 427 | if hasattr(self, pr): |
|---|
| 428 | value = re.sub("["+_ILEGAL_NEWICK_CHARS+"]", "_", \ |
|---|
| 429 | str(getattr(self, pr))) |
|---|
| 430 | if string != "": |
|---|
| 431 | string +=":" |
|---|
| 432 | string +="%s=%s" %(pr, str(value)) |
|---|
| 433 | if string != "": |
|---|
| 434 | string = "[&&NHX:"+string+"]" |
|---|
| 435 | |
|---|
| 436 | return string |
|---|