1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : ali_misc.hxx // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Institute of Microbiology (Technical University Munich) // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // =============================================================== // |
---|
10 | |
---|
11 | #ifndef ALI_MISC_HXX |
---|
12 | #define ALI_MISC_HXX |
---|
13 | |
---|
14 | #ifndef _GLIBCXX_CSTDIO |
---|
15 | #include <cstdio> |
---|
16 | #endif |
---|
17 | #ifndef _GLIBCXX_CSTDLIB |
---|
18 | #include <cstdlib> |
---|
19 | #endif |
---|
20 | #ifndef _UNISTD_H |
---|
21 | #include <unistd.h> |
---|
22 | #endif |
---|
23 | #ifndef _MEMORY_H |
---|
24 | #include <memory.h> |
---|
25 | #endif |
---|
26 | |
---|
27 | #ifndef ATTRIBUTES_H |
---|
28 | #include <attributes.h> |
---|
29 | #endif |
---|
30 | #ifndef ARBTOOLS_H |
---|
31 | #include <arbtools.h> |
---|
32 | #endif |
---|
33 | |
---|
34 | #define ALI_A_CODE 0 |
---|
35 | #define ALI_C_CODE 1 |
---|
36 | #define ALI_G_CODE 2 |
---|
37 | #define ALI_U_CODE 3 |
---|
38 | #define ALI_GAP_CODE 4 |
---|
39 | #define ALI_N_CODE 5 |
---|
40 | #define ALI_DOT_CODE 6 |
---|
41 | #define ALI_UNDEF_CODE 200 |
---|
42 | |
---|
43 | // ----------------------------- |
---|
44 | // Some error functions |
---|
45 | |
---|
46 | inline void ali_message(const char *message) { fprintf(stdout, "%s\n", message); } |
---|
47 | inline void ali_warning(const char *message) { fprintf(stderr, "WARNING: %s\n", message); } |
---|
48 | |
---|
49 | void ali_error(const char *message, const char *func = "") __ATTR__NORETURN; |
---|
50 | void ali_fatal_error(const char *message, const char *func = "") __ATTR__NORETURN; |
---|
51 | |
---|
52 | inline void ali_out_of_memory_if(bool cond) { |
---|
53 | if (cond) ali_fatal_error("out of memory"); |
---|
54 | } |
---|
55 | |
---|
56 | inline void *CALLOC(long i, long j) { |
---|
57 | char *v = (char *)malloc(i*j); |
---|
58 | ali_out_of_memory_if(!v); |
---|
59 | memset(v, 0, i*j); |
---|
60 | return v; |
---|
61 | } |
---|
62 | |
---|
63 | // ------------------- |
---|
64 | // Converters |
---|
65 | |
---|
66 | inline int ali_is_base(char c) { |
---|
67 | return c == 'a' || c == 'A' || c == 'c' || c == 'C' || |
---|
68 | c == 'g' || c == 'G' || c == 'u' || c == 'U' || |
---|
69 | c == 't' || c == 'T' || c == 'n' || c == 'N'; |
---|
70 | } |
---|
71 | |
---|
72 | inline int ali_is_base(unsigned char c) { |
---|
73 | return c <= 3 || c == 5; |
---|
74 | } |
---|
75 | |
---|
76 | inline int ali_is_real_base(char c) { |
---|
77 | return c == 'a' || c == 'A' || c == 'c' || c == 'C' || |
---|
78 | c == 'g' || c == 'G' || c == 'u' || c == 'U' || |
---|
79 | c == 't' || c == 'T'; |
---|
80 | } |
---|
81 | |
---|
82 | inline int ali_is_real_base(unsigned char c) { |
---|
83 | return c <= 3; |
---|
84 | } |
---|
85 | |
---|
86 | inline int ali_is_real_base_or_gap(char c) { |
---|
87 | return c == 'a' || c == 'A' || c == 'c' || c == 'C' || |
---|
88 | c == 'g' || c == 'G' || c == 'u' || c == 'U' || |
---|
89 | c == 't' || c == 'T' || c == '-'; |
---|
90 | } |
---|
91 | |
---|
92 | inline int ali_is_real_base_or_gap(unsigned char c) { |
---|
93 | return c <= 4; |
---|
94 | } |
---|
95 | |
---|
96 | inline int ali_is_dot(char c) { |
---|
97 | return c == '.'; |
---|
98 | } |
---|
99 | |
---|
100 | inline int ali_is_dot(unsigned char c) { |
---|
101 | return c == 6; |
---|
102 | } |
---|
103 | |
---|
104 | inline int ali_is_nbase(char c) { |
---|
105 | return c == 'n'; |
---|
106 | } |
---|
107 | |
---|
108 | inline int ali_is_nbase(unsigned char c) { |
---|
109 | return c == 5; |
---|
110 | } |
---|
111 | |
---|
112 | inline int ali_is_gap(char c) { |
---|
113 | return c == '-'; |
---|
114 | } |
---|
115 | |
---|
116 | inline int ali_is_gap(unsigned char c) { |
---|
117 | return c == 4; |
---|
118 | } |
---|
119 | |
---|
120 | inline unsigned char ali_base_to_number(char c, int no_gap_flag = 0) { |
---|
121 | switch (c) { |
---|
122 | case 'a': case 'A': return 0; |
---|
123 | case 'c': case 'C': return 1; |
---|
124 | case 'g': case 'G': return 2; |
---|
125 | case 'u': case 'U': |
---|
126 | case 't': case 'T': return 3; |
---|
127 | case '-': return no_gap_flag ? 6 : 4; |
---|
128 | case '.': return 6; |
---|
129 | default: ali_warning("Replaced unknown base by 'n'"); FALLTHROUGH; |
---|
130 | case 'n': case 'N': return 5; |
---|
131 | } |
---|
132 | } |
---|
133 | |
---|
134 | inline char ali_number_to_base(unsigned char n) { |
---|
135 | switch (n) { |
---|
136 | case 0: return 'a'; |
---|
137 | case 1: return 'c'; |
---|
138 | case 2: return 'g'; |
---|
139 | case 3: return 'u'; |
---|
140 | case 4: return '-'; |
---|
141 | case 5: return 'n'; |
---|
142 | default: |
---|
143 | ali_warning("Replaced unknown number by '.'"); |
---|
144 | printf("received %d\n", n); |
---|
145 | ali_fatal_error("STOP"); |
---|
146 | case 6: return '.'; |
---|
147 | } |
---|
148 | } |
---|
149 | |
---|
150 | inline void ali_string_to_sequence(char *sequence) { |
---|
151 | for (; *sequence != '\0' && !ali_is_base(*sequence); sequence++) |
---|
152 | *sequence = (char) ali_base_to_number(*sequence, 1); |
---|
153 | |
---|
154 | for (; *sequence != '\0'; sequence++) |
---|
155 | *sequence = (char) ali_base_to_number(*sequence); |
---|
156 | } |
---|
157 | |
---|
158 | inline void ali_sequence_to_string(unsigned char *sequence, unsigned long length) { |
---|
159 | for (; length-- > 0; sequence++) |
---|
160 | *sequence = (unsigned char) ali_number_to_base(*sequence); |
---|
161 | } |
---|
162 | |
---|
163 | inline void ali_sequence_to_postree_sequence(unsigned char *sequence, unsigned long length) { |
---|
164 | for (; length-- > 0; sequence++) |
---|
165 | if (ali_is_base(*sequence)) { |
---|
166 | if (ali_is_nbase(*sequence)) |
---|
167 | *sequence = 4; |
---|
168 | } |
---|
169 | else { |
---|
170 | ali_warning("Unknowen symbol replaced by 'n'"); |
---|
171 | *sequence = 4; |
---|
172 | } |
---|
173 | } |
---|
174 | |
---|
175 | inline void ali_print_sequence(unsigned char *sequence, unsigned long length) { |
---|
176 | for (; length-- > 0; sequence++) |
---|
177 | printf("%d ", *sequence); |
---|
178 | } |
---|
179 | |
---|
180 | #else |
---|
181 | #error ali_misc.hxx included twice |
---|
182 | #endif // ALI_MISC_HXX |
---|