1 | // ================================================================ // |
---|
2 | // // |
---|
3 | // File : Importer.h // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Coded by Ralf Westram (coder@reallysoft.de) in November 2006 // |
---|
7 | // Institute of Microbiology (Technical University Munich) // |
---|
8 | // http://www.arb-home.de/ // |
---|
9 | // // |
---|
10 | // ================================================================ // |
---|
11 | #ifndef IMPORTER_H |
---|
12 | #define IMPORTER_H |
---|
13 | |
---|
14 | #ifndef BUFFEREDFILEREADER_H |
---|
15 | #include <BufferedFileReader.h> |
---|
16 | #endif |
---|
17 | #ifndef SMARTPTR_H |
---|
18 | #include <smartptr.h> |
---|
19 | #endif |
---|
20 | #ifndef METATAG_H |
---|
21 | #include "MetaTag.h" |
---|
22 | #endif |
---|
23 | |
---|
24 | class DBwriter; |
---|
25 | class Feature; |
---|
26 | |
---|
27 | enum FeatureLineType { |
---|
28 | FL_START = 1, // start of feature (e.g. 'CDS 352120..353193'). starts at offset 5 |
---|
29 | // all types below start at offset 21 (or higher): |
---|
30 | FL_QUALIFIER = 2, // start of qualifier (e.g. '/codon_start=1') |
---|
31 | FL_QUALIFIER_NODATA = 4, // start of qualifier w/o data (e.g. '/pseudo') |
---|
32 | FL_QUALIFIER_QUOTED = 8, // start of qualifier with quoted data (e.g. '/product="phosphate"') |
---|
33 | FL_QUALIFIER_QUOTE_OPENED = 16, // start of qualifier with quoted data (e.g. '/product="phosphate') |
---|
34 | FL_CONTINUED_QUOTE_CLOSED = 32, // something terminated by a quote ('"') |
---|
35 | FL_CONTINUED = 64, // other |
---|
36 | |
---|
37 | // meta types: |
---|
38 | FL_META_QUALIFIER = (FL_QUALIFIER|FL_QUALIFIER_NODATA|FL_QUALIFIER_QUOTED|FL_QUALIFIER_QUOTE_OPENED), |
---|
39 | FL_META_CONTINUED = (FL_CONTINUED_QUOTE_CLOSED|FL_CONTINUED), |
---|
40 | }; |
---|
41 | |
---|
42 | class FeatureLine { |
---|
43 | void interpret_as_continued_line(); |
---|
44 | public: |
---|
45 | string name; // feature or qualifier name (only valid for FL_START, FL_QUALIFIER...) |
---|
46 | string rest; // rest of line (behind '=' for FL_QUALIFIER..., not for FL_QUALIFIER_NODATA) |
---|
47 | string orgLine; |
---|
48 | FeatureLineType type; |
---|
49 | |
---|
50 | FeatureLine(const string& line); |
---|
51 | bool reinterpret_as_continued_line(); |
---|
52 | }; |
---|
53 | |
---|
54 | typedef SmartPtr<Feature> FeaturePtr; |
---|
55 | typedef SmartPtr<FeatureLine> FeatureLinePtr; |
---|
56 | typedef std::vector<FeatureLinePtr> FeatureLines; |
---|
57 | |
---|
58 | class Importer : virtual Noncopyable { |
---|
59 | protected: |
---|
60 | DBwriter& db_writer; |
---|
61 | LineReader& flatfile; |
---|
62 | MetaTagTranslator tagTranslator; |
---|
63 | FeatureLines pushedFeatureLines; // pushed back feature lines |
---|
64 | stringVector warnings; |
---|
65 | long expectedSeqLength; // length read from LOCUS or ID line ( = 0 -> no length info found) |
---|
66 | |
---|
67 | void expectLine(string& line) { if (!flatfile.getLine(line)) throw flatfile.lineError("Unexpected EOF"); } |
---|
68 | const MetaTag *findTag(const string& tag) { return tagTranslator.get(tag); } |
---|
69 | |
---|
70 | virtual bool readFeatureTableLine(string& line) = 0; |
---|
71 | |
---|
72 | FeatureLinePtr getFeatureTableLine(); |
---|
73 | void backFeatureTableLine(FeatureLinePtr& fline) { pushedFeatureLines.push_back(fline); } |
---|
74 | |
---|
75 | FeatureLinePtr getUnwrappedFeatureTableLine(); |
---|
76 | |
---|
77 | FeaturePtr parseFeature(); |
---|
78 | void parseFeatureTable(); |
---|
79 | |
---|
80 | virtual void import_section() = 0; |
---|
81 | |
---|
82 | void show_warnings(const string& import_of_what); |
---|
83 | |
---|
84 | void check_base_counters(const SequenceBuffer& seqData, const BaseCounter *headerCount); |
---|
85 | |
---|
86 | public: |
---|
87 | Importer(LineReader& Flatfile, DBwriter& DB_writer, const MetaTag *meta_description); |
---|
88 | virtual ~Importer() {} |
---|
89 | |
---|
90 | void import(); |
---|
91 | void warning(const char *msg); // add a warning |
---|
92 | }; |
---|
93 | |
---|
94 | |
---|
95 | class GenebankImporter : public Importer { |
---|
96 | void import_section() OVERRIDE; |
---|
97 | bool readFeatureTableLine(string& line) OVERRIDE; |
---|
98 | void parseSequence(const string& tag, const string& headerline); |
---|
99 | |
---|
100 | public: |
---|
101 | GenebankImporter(LineReader& Flatfile, DBwriter& DB_writer); |
---|
102 | ~GenebankImporter() OVERRIDE {} |
---|
103 | |
---|
104 | }; |
---|
105 | |
---|
106 | |
---|
107 | class EmblImporter : public Importer { |
---|
108 | void import_section() OVERRIDE; |
---|
109 | bool readFeatureTableLine(string& line) OVERRIDE; |
---|
110 | void parseSequence(const string& headerline); |
---|
111 | |
---|
112 | public: |
---|
113 | EmblImporter(LineReader& Flatfile, DBwriter& DB_writer); |
---|
114 | ~EmblImporter() OVERRIDE {} |
---|
115 | }; |
---|
116 | |
---|
117 | |
---|
118 | #else |
---|
119 | #error Importer.h included twice |
---|
120 | #endif // IMPORTER_H |
---|
121 | |
---|