| 1 | // ================================================================= // |
|---|
| 2 | // // |
|---|
| 3 | // File : macke.h // |
|---|
| 4 | // Purpose : // |
|---|
| 5 | // // |
|---|
| 6 | // ================================================================= // |
|---|
| 7 | |
|---|
| 8 | #ifndef MACKE_H |
|---|
| 9 | #define MACKE_H |
|---|
| 10 | |
|---|
| 11 | #ifndef READER_H |
|---|
| 12 | #include "reader.h" |
|---|
| 13 | #endif |
|---|
| 14 | |
|---|
| 15 | class Macke : public InputFormat { // derived from a Noncopyable |
|---|
| 16 | int numofrem; // num. of remarks |
|---|
| 17 | char **remarks; // remarks |
|---|
| 18 | int allocated; |
|---|
| 19 | |
|---|
| 20 | char *create_id() const OVERRIDE { return strdup(seqabbr); } |
|---|
| 21 | |
|---|
| 22 | void add_remark_nocopy(char *rem) { |
|---|
| 23 | if (numofrem >= allocated) { |
|---|
| 24 | allocated = allocated*1.5+10; |
|---|
| 25 | remarks = (char**)Reallocspace(remarks, sizeof(*remarks)*allocated); |
|---|
| 26 | } |
|---|
| 27 | ca_assert(allocated>numofrem); |
|---|
| 28 | remarks[numofrem++] = rem; |
|---|
| 29 | } |
|---|
| 30 | |
|---|
| 31 | void add_remark_if_content(const char *key, const char *Str) { |
|---|
| 32 | if (has_content(Str)) add_remark(key, Str); |
|---|
| 33 | } |
|---|
| 34 | void add_remarks_from(const GenbankRef& ref); |
|---|
| 35 | void add_remarks_from(const RDP_comments& comments); |
|---|
| 36 | void add_remarks_from(const OrgInfo& orginf); |
|---|
| 37 | void add_remarks_from(const SeqInfo& seqinf); |
|---|
| 38 | void add_35end_remark(char end35, char yn); |
|---|
| 39 | |
|---|
| 40 | static bool macke_is_continued_remark(const char *str) { |
|---|
| 41 | /* If there is 3 blanks at the beginning of the line, it is continued line. |
|---|
| 42 | * |
|---|
| 43 | * The comment above is lying: |
|---|
| 44 | * The function always only tested for 2 spaces |
|---|
| 45 | * and the converter only produced 2 spaces. |
|---|
| 46 | */ |
|---|
| 47 | return strncmp(str, ": ", 3) == 0; |
|---|
| 48 | } |
|---|
| 49 | |
|---|
| 50 | public: |
|---|
| 51 | |
|---|
| 52 | char *seqabbr; // sequence abbrev. |
|---|
| 53 | char *name; // sequence full name |
|---|
| 54 | int rna_or_dna; // rna or dna |
|---|
| 55 | char *atcc; // CC# of sequence |
|---|
| 56 | char *rna; // Sequence methods, old version entry |
|---|
| 57 | char *date; // date of modification |
|---|
| 58 | char *nbk; // GenBank information -old version entry |
|---|
| 59 | char *acs; // accession number |
|---|
| 60 | char *author; // author of the first reference |
|---|
| 61 | char *journal; // journal of the first reference |
|---|
| 62 | char *title; // title of the first reference |
|---|
| 63 | char *who; // who key in the data |
|---|
| 64 | char *strain; // strain |
|---|
| 65 | char *subspecies; // subspecies |
|---|
| 66 | |
|---|
| 67 | Macke() { |
|---|
| 68 | seqabbr = strdup(""); |
|---|
| 69 | name = no_content(); |
|---|
| 70 | atcc = no_content(); |
|---|
| 71 | rna = no_content(); |
|---|
| 72 | date = no_content(); |
|---|
| 73 | nbk = no_content(); |
|---|
| 74 | acs = no_content(); |
|---|
| 75 | who = no_content(); |
|---|
| 76 | rna_or_dna = 'd'; // @@@ why? (never is changed anywhere) |
|---|
| 77 | journal = no_content(); |
|---|
| 78 | title = no_content(); |
|---|
| 79 | author = no_content(); |
|---|
| 80 | strain = no_content(); |
|---|
| 81 | subspecies = no_content(); |
|---|
| 82 | |
|---|
| 83 | numofrem = 0; |
|---|
| 84 | remarks = NULL; |
|---|
| 85 | allocated = 0; |
|---|
| 86 | } |
|---|
| 87 | virtual ~Macke() OVERRIDE { |
|---|
| 88 | freenull(seqabbr); |
|---|
| 89 | freenull(name); |
|---|
| 90 | freenull(atcc); |
|---|
| 91 | freenull(rna); |
|---|
| 92 | freenull(date); |
|---|
| 93 | freenull(nbk); |
|---|
| 94 | freenull(acs); |
|---|
| 95 | freenull(who); |
|---|
| 96 | for (int indi = 0; indi < numofrem; indi++) { |
|---|
| 97 | freenull(remarks[indi]); |
|---|
| 98 | } |
|---|
| 99 | freenull(remarks); |
|---|
| 100 | freenull(journal); |
|---|
| 101 | freenull(title); |
|---|
| 102 | freenull(author); |
|---|
| 103 | freenull(strain); |
|---|
| 104 | freenull(subspecies); |
|---|
| 105 | } |
|---|
| 106 | |
|---|
| 107 | void add_remark(const char *rem) { add_remark_nocopy(nulldup(rem)); } |
|---|
| 108 | void add_remark(const char *key, const char *Str) { |
|---|
| 109 | char *rem = nulldup(key); |
|---|
| 110 | Append(rem, Str); |
|---|
| 111 | add_remark_nocopy(rem); |
|---|
| 112 | } |
|---|
| 113 | |
|---|
| 114 | int get_rem_count() const { return numofrem; } |
|---|
| 115 | const char *get_rem(int idx) const { |
|---|
| 116 | ca_assert(idx<numofrem); |
|---|
| 117 | return remarks[idx]; |
|---|
| 118 | } |
|---|
| 119 | char *copy_multi_rem(int& idx, int offset) const { |
|---|
| 120 | // create a heapcopy of a multiline-remark. |
|---|
| 121 | // increments 'idx' to the last line. |
|---|
| 122 | char *rem = nulldup(remarks[idx]+offset); |
|---|
| 123 | while (++idx<numofrem && macke_is_continued_remark(remarks[idx])) { |
|---|
| 124 | skip_eolnl_and_append_spaced(rem, remarks[idx]+3); |
|---|
| 125 | } |
|---|
| 126 | --idx; |
|---|
| 127 | return rem; |
|---|
| 128 | } |
|---|
| 129 | void add_remarks_from(const GenBank& gbk); |
|---|
| 130 | |
|---|
| 131 | // InputFormat interface |
|---|
| 132 | void reinit() OVERRIDE { INPLACE_RECONSTRUCT(Macke, this); } |
|---|
| 133 | const char *get_id() const { return seqabbr; } |
|---|
| 134 | Format format() const OVERRIDE { return MACKE; } |
|---|
| 135 | }; |
|---|
| 136 | |
|---|
| 137 | // -------------------- |
|---|
| 138 | // MackeReader |
|---|
| 139 | |
|---|
| 140 | class MackeReader : public FormatReader, virtual Noncopyable { |
|---|
| 141 | Macke data; |
|---|
| 142 | |
|---|
| 143 | char *inName; |
|---|
| 144 | char*& seqabbr; // = Macke.seqabbr |
|---|
| 145 | char *dummy; |
|---|
| 146 | |
|---|
| 147 | Reader *r1, *r2, *r3; |
|---|
| 148 | Reader **using_reader; // r1, r2 or r3 |
|---|
| 149 | |
|---|
| 150 | void usingReader(Reader*& r) { |
|---|
| 151 | using_reader = &r; |
|---|
| 152 | } |
|---|
| 153 | |
|---|
| 154 | bool macke_in(Macke& macke); |
|---|
| 155 | |
|---|
| 156 | void abort() { |
|---|
| 157 | r1->abort(); |
|---|
| 158 | r2->abort(); |
|---|
| 159 | r3->abort(); |
|---|
| 160 | } |
|---|
| 161 | bool ok() { |
|---|
| 162 | return r1->ok() && r2->ok() && r3->ok(); |
|---|
| 163 | } |
|---|
| 164 | |
|---|
| 165 | bool read_seq_data(Seq& seq) { |
|---|
| 166 | ca_assert(seqabbr); |
|---|
| 167 | usingReader(r2); |
|---|
| 168 | macke_origin(seq, seqabbr, *r2); |
|---|
| 169 | if (seq.is_empty()) abort(); |
|---|
| 170 | return r2->ok(); |
|---|
| 171 | } |
|---|
| 172 | |
|---|
| 173 | void read_to_start(); |
|---|
| 174 | |
|---|
| 175 | public: |
|---|
| 176 | |
|---|
| 177 | MackeReader(const char *inName_); |
|---|
| 178 | ~MackeReader() OVERRIDE; |
|---|
| 179 | |
|---|
| 180 | bool read_one_entry(Seq& seq) OVERRIDE __ATTR__USERESULT; |
|---|
| 181 | bool failed() const OVERRIDE { return r1->failed() || r2->failed() || r3->failed(); } |
|---|
| 182 | void ignore_rest_of_file() OVERRIDE { r1->ignore_rest_of_file(); r2->ignore_rest_of_file(); r3->ignore_rest_of_file(); } |
|---|
| 183 | InputFormat& get_data() OVERRIDE { return data; } |
|---|
| 184 | void rewind() OVERRIDE { |
|---|
| 185 | r1->rewind(); |
|---|
| 186 | r2->rewind(); |
|---|
| 187 | r3->rewind(); |
|---|
| 188 | read_to_start(); |
|---|
| 189 | } |
|---|
| 190 | }; |
|---|
| 191 | |
|---|
| 192 | inline bool isMackeHeader(const char *line) { return line[0] == '#'; } |
|---|
| 193 | inline bool isMackeSeqHeader(const char *line) { return line[0] == '#' && line[1] == '='; } |
|---|
| 194 | inline bool isMackeSeqInfo(const char *line) { return line[0] == '#' && line[1] == ':'; } |
|---|
| 195 | |
|---|
| 196 | inline bool isMackeNonSeq(const char *line) { return line[0] == '#' || line[0] == '\n' || line[0] == ' '; } |
|---|
| 197 | |
|---|
| 198 | class Not { |
|---|
| 199 | typedef bool (*LinePredicate)(const char *line); |
|---|
| 200 | LinePredicate p; |
|---|
| 201 | public: |
|---|
| 202 | Not(LinePredicate p_) : p(p_) {} |
|---|
| 203 | bool operator()(const char *line) const { return !p(line); } |
|---|
| 204 | }; |
|---|
| 205 | |
|---|
| 206 | #else |
|---|
| 207 | #error macke.h included twice |
|---|
| 208 | #endif // MACKE_H |
|---|