Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

source: trunk/GDE/SATIVA/sativa/epac/ete2/text_arraytable.py

Visit:

Last change on this file was 12906, checked in by akozlov, 10 years ago
add sativa files and scripts for ARB integration
File size: 5.0 KB

Line
1	__VERSION__="ete2-2.2rev1026"
2	# -- coding: utf-8 --
3	# #START_LICENSE###########################################################
4	#
5	#
6	# This file is part of the Environment for Tree Exploration program
7	# (ETE). http://ete.cgenomics.org
8	#
9	# ETE is free software: you can redistribute it and/or modify it
10	# under the terms of the GNU General Public License as published by
11	# the Free Software Foundation, either version 3 of the License, or
12	# (at your option) any later version.
13	#
14	# ETE is distributed in the hope that it will be useful, but WITHOUT
15	# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16	# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17	# License for more details.
18	#
19	# You should have received a copy of the GNU General Public License
20	# along with ETE. If not, see <http://www.gnu.org/licenses/>.
21	#
22	#
23	# ABOUT THE ETE PACKAGE
24	# =====================
25	#
26	# ETE is distributed under the GPL copyleft license (2008-2011).
27	#
28	# If you make use of ETE in published work, please cite:
29	#
30	# Jaime Huerta-Cepas, Joaquin Dopazo and Toni Gabaldon.
31	# ETE: a python Environment for Tree Exploration. Jaime BMC
32	# Bioinformatics 2010,:24doi:10.1186/1471-2105-11-24
33	#
34	# Note that extra references to the specific methods implemented in
35	# the toolkit are available in the documentation.
36	#
37	# More info at http://ete.cgenomics.org
38	#
39	#
40	# #END_LICENSE#############################################################
41
42	#! /usr/bin/env python
43
44	import re
45	from sys import stderr
46	import numpy
47
48
49	__all__ = ['read_arraytable', 'write_arraytable']
50
51	def read_arraytable(matrix_file, mtype="float", arraytable_object = None):
52	""" Reads a text tab-delimited matrix from file """
53
54	if arraytable_object is None:
55	from ete2.coretype import arraytable
56	A = arraytable.ArrayTable()
57	else:
58	A = arraytable_object
59
60	A.mtype = mtype
61	temp_matrix = []
62	rowname_counter = {}
63	colname_counter = {}
64	row_dup_flag = False
65	col_dup_flag = False
66
67	# if matrix_file has many lines, tries to read it as the matrix
68	# itself.
69	if len(matrix_file.split("\n"))>1:
70	matrix_data = matrix_file.split("\n")
71	else:
72	matrix_data = open(matrix_file)
73
74	for line in matrix_data:
75	# Clean up line
76	line = line.strip("\n")
77	#line = line.replace(" ","")
78	# Skip empty lines
79	if not line:
80	continue
81	# Get fields in line
82	fields = line.split("\t")
83	# Read column names
84	if line[0]=='#' and re.match("#NAMES",fields[0],re.IGNORECASE):
85	counter = 0
86	for colname in fields[1:]:
87	colname = colname.strip()
88
89	# Handle duplicated col names by adding a number
90	colname_counter[colname] = colname_counter.get(colname,0) + 1
91	if colname in A.colValues:
92	colname += "_%d" % colname_counter[colname]
93	col_dup_flag = True
94	# Adds colname
95	A.colValues[colname] = None
96	A.colNames.append(colname)
97	if col_dup_flag:
98	print >>stderr, "Duplicated column names were renamed."
99
100	# Skip comments
101	elif line[0]=='#':
102	continue
103
104	# Read values (only when column names are loaded)
105	elif A.colNames:
106	# Checks shape
107	if len(fields)-1 != len(A.colNames):
108	raise ValueError, "Invalid number of columns. Expecting:%d" % len(A.colNames)
109
110	# Extracts row name and remove it from fields
111	rowname = fields.pop(0).strip()
112
113	# Handles duplicated row names by adding a number
114	rowname_counter[rowname] = rowname_counter.get(rowname,0) + 1
115	if rowname in A.rowValues:
116	rowname += "_%d" % rowname_counter[rowname]
117	row_dup_names = True
118
119	# Adds row name
120	A.rowValues[rowname] = None
121	A.rowNames.append(rowname)
122
123	# Reads row values
124	values = []
125	for f in fields:
126	if f.strip()=="":
127	f = numpy.nan
128	values.append(f)
129	temp_matrix.append(values)
130	else:
131	raise ValueError, "Column names are required."
132
133	if row_dup_flag:
134	print >>stderr, "Duplicated row names were renamed."
135
136	# Convert all read lines into a numpy matrix
137	vmatrix = numpy.array(temp_matrix).astype(A.mtype)
138
139	# Updates indexes to link names and vectors in matrix
140	A._link_names2matrix(vmatrix)
141	return A
142
143	def write_arraytable(A, fname, colnames=None):
144	if colnames is None:
145	colnames = []
146	elif colnames == []:
147	colnames = A.colNames
148
149	matrix = A.get_several_column_vectors(colnames)
150	matrix = matrix.swapaxes(0,1)
151	OUT = open(fname,"w")
152	print >>OUT, '\t'.join(["#NAMES"] + colnames)
153	counter = 0
154	for rname in A.rowNames:
155	print >>OUT, '\t'.join(map(str,[rname]+matrix[counter].tolist()))
156	counter +=1
157	OUT.close()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: