1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : ali_misc.hxx // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Institute of Microbiology (Technical University Munich) // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // =============================================================== // |
---|
10 | |
---|
11 | #ifndef ALI_MISC_HXX |
---|
12 | #define ALI_MISC_HXX |
---|
13 | |
---|
14 | #ifndef _GLIBCXX_CSTDIO |
---|
15 | #include <cstdio> |
---|
16 | #endif |
---|
17 | #ifndef _GLIBCXX_CSTDLIB |
---|
18 | #include <cstdlib> |
---|
19 | #endif |
---|
20 | #ifndef _UNISTD_H |
---|
21 | #include <unistd.h> |
---|
22 | #endif |
---|
23 | #ifndef _MEMORY_H |
---|
24 | #include <memory.h> |
---|
25 | #endif |
---|
26 | |
---|
27 | #ifndef ATTRIBUTES_H |
---|
28 | #include <attributes.h> |
---|
29 | #endif |
---|
30 | #ifndef ARBTOOLS_H |
---|
31 | #include <arbtools.h> |
---|
32 | #endif |
---|
33 | |
---|
34 | #define ALI_A_CODE 0 |
---|
35 | #define ALI_C_CODE 1 |
---|
36 | #define ALI_G_CODE 2 |
---|
37 | #define ALI_U_CODE 3 |
---|
38 | #define ALI_GAP_CODE 4 |
---|
39 | #define ALI_N_CODE 5 |
---|
40 | #define ALI_DOT_CODE 6 |
---|
41 | #define ALI_UNDEF_CODE 200 |
---|
42 | |
---|
43 | // ----------------------------- |
---|
44 | // Some error functions |
---|
45 | |
---|
46 | inline void ali_message(const char *message, const char *func = "") |
---|
47 | { |
---|
48 | fprintf(stdout, "%s %s\n", func, message); |
---|
49 | } |
---|
50 | |
---|
51 | inline void ali_warning(const char *message, const char *func = "") |
---|
52 | { |
---|
53 | fprintf(stderr, "WARNING %s: %s\n", func, message); |
---|
54 | } |
---|
55 | |
---|
56 | void ali_error(const char *message, const char *func = "") __ATTR__NORETURN; |
---|
57 | void ali_fatal_error(const char *message, const char *func = "") __ATTR__NORETURN; |
---|
58 | |
---|
59 | inline void *CALLOC(long i, long j) { |
---|
60 | char *v = (char *)malloc(i*j); |
---|
61 | if (!v) { |
---|
62 | ali_fatal_error("Out of Memory"); |
---|
63 | } |
---|
64 | memset(v, 0, i*j); |
---|
65 | return v; |
---|
66 | } |
---|
67 | |
---|
68 | // ------------------- |
---|
69 | // Converters |
---|
70 | |
---|
71 | inline int ali_is_base(char c) |
---|
72 | { |
---|
73 | return (c == 'a' || c == 'A' || c == 'c' || c == 'C' || |
---|
74 | c == 'g' || c == 'G' || c == 'u' || c == 'U' || |
---|
75 | c == 't' || c == 'T' || c == 'n' || c == 'N'); |
---|
76 | } |
---|
77 | |
---|
78 | inline int ali_is_base(unsigned char c) |
---|
79 | { |
---|
80 | return ((c <= 3) || (c == 5)); |
---|
81 | } |
---|
82 | |
---|
83 | inline int ali_is_real_base(char c) |
---|
84 | { |
---|
85 | return (c == 'a' || c == 'A' || c == 'c' || c == 'C' || |
---|
86 | c == 'g' || c == 'G' || c == 'u' || c == 'U' || |
---|
87 | c == 't' || c == 'T'); |
---|
88 | } |
---|
89 | |
---|
90 | inline int ali_is_real_base(unsigned char c) |
---|
91 | { |
---|
92 | return (c <= 3); |
---|
93 | } |
---|
94 | |
---|
95 | inline int ali_is_real_base_or_gap(char c) |
---|
96 | { |
---|
97 | return (c == 'a' || c == 'A' || c == 'c' || c == 'C' || |
---|
98 | c == 'g' || c == 'G' || c == 'u' || c == 'U' || |
---|
99 | c == 't' || c == 'T' || c == '-'); |
---|
100 | } |
---|
101 | |
---|
102 | inline int ali_is_real_base_or_gap(unsigned char c) |
---|
103 | { |
---|
104 | return (c <= 4); |
---|
105 | } |
---|
106 | |
---|
107 | inline int ali_is_dot(char c) |
---|
108 | { |
---|
109 | return (c == '.'); |
---|
110 | } |
---|
111 | |
---|
112 | inline int ali_is_dot(unsigned char c) |
---|
113 | { |
---|
114 | return (c == 6); |
---|
115 | } |
---|
116 | |
---|
117 | inline int ali_is_nbase(char c) |
---|
118 | { |
---|
119 | return (c == 'n'); |
---|
120 | } |
---|
121 | |
---|
122 | inline int ali_is_nbase(unsigned char c) |
---|
123 | { |
---|
124 | return (c == 5); |
---|
125 | } |
---|
126 | |
---|
127 | inline int ali_is_gap(char c) |
---|
128 | { |
---|
129 | return (c == '-'); |
---|
130 | } |
---|
131 | |
---|
132 | inline int ali_is_gap(unsigned char c) |
---|
133 | { |
---|
134 | return (c == 4); |
---|
135 | } |
---|
136 | |
---|
137 | inline unsigned char ali_base_to_number(char c, int no_gap_flag = 0) |
---|
138 | { |
---|
139 | switch (c) { |
---|
140 | case 'a': case 'A': return (0); |
---|
141 | case 'c': case 'C': return (1); |
---|
142 | case 'g': case 'G': return (2); |
---|
143 | case 'u': case 'U': case 't': case 'T': return (3); |
---|
144 | case '-': if (no_gap_flag == 0) |
---|
145 | return (4); |
---|
146 | else |
---|
147 | return (6); |
---|
148 | case 'n': case 'N': return (5); |
---|
149 | case '.': return (6); |
---|
150 | default: |
---|
151 | ali_warning("Replace unknowen Base by 'n'"); |
---|
152 | return (5); |
---|
153 | } |
---|
154 | } |
---|
155 | |
---|
156 | inline char ali_number_to_base(unsigned char n) |
---|
157 | { |
---|
158 | switch (n) { |
---|
159 | case 0: return 'a'; |
---|
160 | case 1: return 'c'; |
---|
161 | case 2: return 'g'; |
---|
162 | case 3: return 'u'; |
---|
163 | case 4: return '-'; |
---|
164 | case 5: return 'n'; |
---|
165 | case 6: return '.'; |
---|
166 | default: |
---|
167 | ali_warning("Replace unknowen Number by '.'"); |
---|
168 | printf("received %d\n", n); |
---|
169 | ali_fatal_error("STOP"); |
---|
170 | return '.'; |
---|
171 | } |
---|
172 | } |
---|
173 | |
---|
174 | inline void ali_string_to_sequence(char *sequence) |
---|
175 | { |
---|
176 | for (; *sequence != '\0' && !ali_is_base(*sequence); sequence++) |
---|
177 | *sequence = (char) ali_base_to_number(*sequence, 1); |
---|
178 | |
---|
179 | for (; *sequence != '\0'; sequence++) |
---|
180 | *sequence = (char) ali_base_to_number(*sequence); |
---|
181 | } |
---|
182 | |
---|
183 | inline void ali_sequence_to_string(unsigned char *sequence, unsigned long length) |
---|
184 | { |
---|
185 | for (; length-- > 0; sequence++) |
---|
186 | *sequence = (unsigned char) ali_number_to_base(*sequence); |
---|
187 | } |
---|
188 | |
---|
189 | inline void ali_sequence_to_postree_sequence(unsigned char *sequence, unsigned long length) |
---|
190 | { |
---|
191 | for (; length-- > 0; sequence++) |
---|
192 | if (ali_is_base(*sequence)) { |
---|
193 | if (ali_is_nbase(*sequence)) |
---|
194 | *sequence = 4; |
---|
195 | } |
---|
196 | else { |
---|
197 | ali_warning("Unknowen symbol replaced by 'n'"); |
---|
198 | *sequence = 4; |
---|
199 | } |
---|
200 | } |
---|
201 | |
---|
202 | inline void ali_print_sequence(unsigned char *sequence, unsigned long length) |
---|
203 | { |
---|
204 | for (; length-- > 0; sequence++) |
---|
205 | printf("%d ", *sequence); |
---|
206 | } |
---|
207 | |
---|
208 | #else |
---|
209 | #error ali_misc.hxx included twice |
---|
210 | #endif // ALI_MISC_HXX |
---|