1 | // ================================================================= // |
---|
2 | // // |
---|
3 | // File : macke.h // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // ================================================================= // |
---|
7 | |
---|
8 | #ifndef MACKE_H |
---|
9 | #define MACKE_H |
---|
10 | |
---|
11 | #ifndef READER_H |
---|
12 | #include "reader.h" |
---|
13 | #endif |
---|
14 | #ifndef ARB_STRING_H |
---|
15 | #include <arb_string.h> |
---|
16 | #endif |
---|
17 | |
---|
18 | class Macke FINAL_TYPE : public InputFormat { // derived from a Noncopyable |
---|
19 | int numofrem; // num. of remarks |
---|
20 | char **remarks; // remarks |
---|
21 | int allocated; |
---|
22 | |
---|
23 | char *create_id() const OVERRIDE { return ARB_strdup(seqabbr); } |
---|
24 | |
---|
25 | void add_remark_nocopy(char *rem) { |
---|
26 | if (numofrem >= allocated) { |
---|
27 | allocated = allocated*1.5+10; |
---|
28 | ARB_realloc(remarks, allocated); |
---|
29 | } |
---|
30 | ca_assert(allocated>numofrem); |
---|
31 | remarks[numofrem++] = rem; |
---|
32 | } |
---|
33 | |
---|
34 | void add_remark_if_content(const char *key, const char *Str) { |
---|
35 | if (has_content(Str)) add_remark(key, Str); |
---|
36 | } |
---|
37 | void add_remarks_from(const GenbankRef& ref); |
---|
38 | void add_remarks_from(const RDP_comments& comments); |
---|
39 | void add_remarks_from(const OrgInfo& orginf); |
---|
40 | void add_remarks_from(const SeqInfo& seqinf); |
---|
41 | void add_35end_remark(char end35, char yn); |
---|
42 | |
---|
43 | static bool macke_is_continued_remark(const char *str) { |
---|
44 | /* If there is 3 blanks at the beginning of the line, it is continued line. |
---|
45 | * |
---|
46 | * The comment above is lying: |
---|
47 | * The function always only tested for 2 spaces |
---|
48 | * and the converter only produced 2 spaces. |
---|
49 | */ |
---|
50 | return strncmp(str, ": ", 3) == 0; |
---|
51 | } |
---|
52 | |
---|
53 | public: |
---|
54 | |
---|
55 | char *seqabbr; // sequence abbrev. |
---|
56 | char *name; // sequence full name |
---|
57 | int rna_or_dna; // rna or dna |
---|
58 | char *atcc; // CC# of sequence |
---|
59 | char *rna; // Sequence methods, old version entry |
---|
60 | char *date; // date of modification |
---|
61 | char *nbk; // GenBank information -old version entry |
---|
62 | char *acs; // accession number |
---|
63 | char *author; // author of the first reference |
---|
64 | char *journal; // journal of the first reference |
---|
65 | char *title; // title of the first reference |
---|
66 | char *who; // who key in the data |
---|
67 | char *strain; // strain |
---|
68 | char *subspecies; // subspecies |
---|
69 | |
---|
70 | Macke() { |
---|
71 | seqabbr = ARB_strdup(""); |
---|
72 | name = no_content(); |
---|
73 | atcc = no_content(); |
---|
74 | rna = no_content(); |
---|
75 | date = no_content(); |
---|
76 | nbk = no_content(); |
---|
77 | acs = no_content(); |
---|
78 | who = no_content(); |
---|
79 | rna_or_dna = 'd'; // @@@ why? (never is changed anywhere) |
---|
80 | journal = no_content(); |
---|
81 | title = no_content(); |
---|
82 | author = no_content(); |
---|
83 | strain = no_content(); |
---|
84 | subspecies = no_content(); |
---|
85 | |
---|
86 | numofrem = 0; |
---|
87 | remarks = NULL; |
---|
88 | allocated = 0; |
---|
89 | } |
---|
90 | ~Macke() OVERRIDE { |
---|
91 | freenull(seqabbr); |
---|
92 | freenull(name); |
---|
93 | freenull(atcc); |
---|
94 | freenull(rna); |
---|
95 | freenull(date); |
---|
96 | freenull(nbk); |
---|
97 | freenull(acs); |
---|
98 | freenull(who); |
---|
99 | for (int indi = 0; indi < numofrem; indi++) { |
---|
100 | freenull(remarks[indi]); |
---|
101 | } |
---|
102 | freenull(remarks); |
---|
103 | freenull(journal); |
---|
104 | freenull(title); |
---|
105 | freenull(author); |
---|
106 | freenull(strain); |
---|
107 | freenull(subspecies); |
---|
108 | } |
---|
109 | |
---|
110 | void add_remark(const char *rem) { add_remark_nocopy(nulldup(rem)); } |
---|
111 | void add_remark(const char *key, const char *Str) { |
---|
112 | char *rem = nulldup(key); |
---|
113 | Append(rem, Str); |
---|
114 | add_remark_nocopy(rem); |
---|
115 | } |
---|
116 | |
---|
117 | int get_rem_count() const { return numofrem; } |
---|
118 | const char *get_rem(int idx) const { |
---|
119 | ca_assert(idx<numofrem); |
---|
120 | return remarks[idx]; |
---|
121 | } |
---|
122 | char *copy_multi_rem(int& idx, int offset) const { |
---|
123 | // create a heapcopy of a multiline-remark. |
---|
124 | // increments 'idx' to the last line. |
---|
125 | char *rem = nulldup(remarks[idx]+offset); |
---|
126 | while (++idx<numofrem && macke_is_continued_remark(remarks[idx])) { |
---|
127 | skip_eolnl_and_append_spaced(rem, remarks[idx]+3); |
---|
128 | } |
---|
129 | --idx; |
---|
130 | return rem; |
---|
131 | } |
---|
132 | void add_remarks_from(const GenBank& gbk); |
---|
133 | |
---|
134 | // InputFormat interface |
---|
135 | void reinit() OVERRIDE { INPLACE_RECONSTRUCT(Macke, this); } |
---|
136 | const char *get_id() const { return seqabbr; } |
---|
137 | Format format() const OVERRIDE { return MACKE; } |
---|
138 | }; |
---|
139 | |
---|
140 | // -------------------- |
---|
141 | // MackeReader |
---|
142 | |
---|
143 | class MackeReader : public FormatReader, virtual Noncopyable { |
---|
144 | Macke data; |
---|
145 | |
---|
146 | char *inName; |
---|
147 | char*& seqabbr; // = Macke.seqabbr |
---|
148 | char *dummy; |
---|
149 | |
---|
150 | Reader *r1, *r2, *r3; |
---|
151 | Reader **using_reader; // r1, r2 or r3 |
---|
152 | |
---|
153 | void usingReader(Reader*& r) { |
---|
154 | using_reader = &r; |
---|
155 | } |
---|
156 | |
---|
157 | bool macke_in(Macke& macke); |
---|
158 | |
---|
159 | void abort() { |
---|
160 | r1->abort(); |
---|
161 | r2->abort(); |
---|
162 | r3->abort(); |
---|
163 | } |
---|
164 | bool ok() { |
---|
165 | return r1->ok() && r2->ok() && r3->ok(); |
---|
166 | } |
---|
167 | |
---|
168 | bool read_seq_data(Seq& seq) { |
---|
169 | ca_assert(seqabbr); |
---|
170 | usingReader(r2); |
---|
171 | macke_origin(seq, seqabbr, *r2); |
---|
172 | if (seq.is_empty()) abort(); |
---|
173 | return r2->ok(); |
---|
174 | } |
---|
175 | |
---|
176 | void read_to_start(); |
---|
177 | |
---|
178 | public: |
---|
179 | |
---|
180 | MackeReader(const char *inName_); |
---|
181 | ~MackeReader() OVERRIDE; |
---|
182 | |
---|
183 | bool read_one_entry(Seq& seq) OVERRIDE __ATTR__USERESULT; |
---|
184 | bool failed() const OVERRIDE { return r1->failed() || r2->failed() || r3->failed(); } |
---|
185 | void ignore_rest_of_file() OVERRIDE { r1->ignore_rest_of_file(); r2->ignore_rest_of_file(); r3->ignore_rest_of_file(); } |
---|
186 | InputFormat& get_data() OVERRIDE { return data; } |
---|
187 | void rewind() OVERRIDE { |
---|
188 | r1->rewind(); |
---|
189 | r2->rewind(); |
---|
190 | r3->rewind(); |
---|
191 | read_to_start(); |
---|
192 | } |
---|
193 | }; |
---|
194 | |
---|
195 | CONSTEXPR_INLINE bool isMackeHeader(const char *line) { return line[0] == '#'; } |
---|
196 | CONSTEXPR_INLINE bool isMackeSeqHeader(const char *line) { return line[0] == '#' && line[1] == '='; } |
---|
197 | CONSTEXPR_INLINE bool isMackeSeqInfo(const char *line) { return line[0] == '#' && line[1] == ':'; } |
---|
198 | |
---|
199 | CONSTEXPR_INLINE bool isMackeNonSeq(const char *line) { return line[0] == '#' || line[0] == '\n' || line[0] == ' '; } |
---|
200 | |
---|
201 | class Not { |
---|
202 | typedef bool (*LinePredicate)(const char *line); |
---|
203 | LinePredicate p; |
---|
204 | public: |
---|
205 | Not(LinePredicate p_) : p(p_) {} |
---|
206 | bool operator()(const char *line) const { return !p(line); } |
---|
207 | }; |
---|
208 | |
---|
209 | #else |
---|
210 | #error macke.h included twice |
---|
211 | #endif // MACKE_H |
---|