source: branches/help/GENOM_IMPORT/Importer.h

Last change on this file was 18475, checked in by westram, 4 years ago
  • DRY base counter test (and empty data warning).
File size: 4.2 KB
Line 
1// ================================================================ //
2//                                                                  //
3//   File      : Importer.h                                         //
4//   Purpose   :                                                    //
5//                                                                  //
6//   Coded by Ralf Westram (coder@reallysoft.de) in November 2006   //
7//   Institute of Microbiology (Technical University Munich)        //
8//   http://www.arb-home.de/                                        //
9//                                                                  //
10// ================================================================ //
11#ifndef IMPORTER_H
12#define IMPORTER_H
13
14#ifndef BUFFEREDFILEREADER_H
15#include <BufferedFileReader.h>
16#endif
17#ifndef SMARTPTR_H
18#include <smartptr.h>
19#endif
20#ifndef METATAG_H
21#include "MetaTag.h"
22#endif
23
24class DBwriter;
25class Feature;
26
27enum FeatureLineType {
28    FL_START                  = 1,  // start of feature (e.g. 'CDS             352120..353193'). starts at offset 5
29    // all types below start at offset 21 (or higher):
30    FL_QUALIFIER              = 2,  // start of qualifier (e.g. '/codon_start=1')
31    FL_QUALIFIER_NODATA       = 4,  // start of qualifier w/o data (e.g. '/pseudo')
32    FL_QUALIFIER_QUOTED       = 8,  // start of qualifier with quoted data (e.g. '/product="phosphate"')
33    FL_QUALIFIER_QUOTE_OPENED = 16, // start of qualifier with quoted data (e.g. '/product="phosphate')
34    FL_CONTINUED_QUOTE_CLOSED = 32, // something terminated by a quote ('"')
35    FL_CONTINUED              = 64, // other
36
37    // meta types:
38    FL_META_QUALIFIER = (FL_QUALIFIER|FL_QUALIFIER_NODATA|FL_QUALIFIER_QUOTED|FL_QUALIFIER_QUOTE_OPENED),
39    FL_META_CONTINUED = (FL_CONTINUED_QUOTE_CLOSED|FL_CONTINUED),
40};
41
42class FeatureLine {
43    void interpret_as_continued_line();
44public:
45    string          name;       // feature or qualifier name (only valid for FL_START, FL_QUALIFIER...)
46    string          rest;       // rest of line (behind '=' for FL_QUALIFIER..., not for FL_QUALIFIER_NODATA)
47    string          orgLine;
48    FeatureLineType type;
49
50    FeatureLine(const string& line);
51    bool reinterpret_as_continued_line();
52};
53
54typedef SmartPtr<Feature>           FeaturePtr;
55typedef SmartPtr<FeatureLine>       FeatureLinePtr;
56typedef std::vector<FeatureLinePtr> FeatureLines;
57
58class Importer : virtual Noncopyable {
59protected:
60    DBwriter&         db_writer;
61    LineReader&       flatfile;
62    MetaTagTranslator tagTranslator;
63    FeatureLines      pushedFeatureLines; // pushed back feature lines
64    stringVector      warnings;
65    long              expectedSeqLength; // length read from LOCUS or ID line ( = 0 -> no length info found)
66
67    void expectLine(string& line) { if (!flatfile.getLine(line)) throw flatfile.lineError("Unexpected EOF"); }
68    const MetaTag *findTag(const string& tag) { return tagTranslator.get(tag); }
69
70    virtual bool readFeatureTableLine(string& line) = 0;
71
72    FeatureLinePtr getFeatureTableLine();
73    void           backFeatureTableLine(FeatureLinePtr& fline) { pushedFeatureLines.push_back(fline); }
74
75    FeatureLinePtr getUnwrappedFeatureTableLine();
76
77    FeaturePtr parseFeature();
78    void       parseFeatureTable();
79
80    virtual void import_section() = 0;
81
82    void show_warnings(const string& import_of_what);
83
84    void check_base_counters(const SequenceBuffer& seqData, const BaseCounter *headerCount);
85
86public:
87    Importer(LineReader& Flatfile, DBwriter& DB_writer, const MetaTag *meta_description);
88    virtual ~Importer() {}
89
90    void import();
91    void warning(const char *msg); // add a warning
92};
93
94
95class GenebankImporter : public Importer {
96    void import_section() OVERRIDE;
97    bool readFeatureTableLine(string& line) OVERRIDE;
98    void parseSequence(const string& tag, const string& headerline);
99
100public:
101    GenebankImporter(LineReader& Flatfile, DBwriter& DB_writer);
102    ~GenebankImporter() OVERRIDE {}
103
104};
105
106
107class EmblImporter : public Importer {
108    void import_section() OVERRIDE;
109    bool readFeatureTableLine(string& line) OVERRIDE;
110    void parseSequence(const string& headerline);
111
112public:
113    EmblImporter(LineReader& Flatfile, DBwriter& DB_writer);
114    ~EmblImporter() OVERRIDE {}
115};
116
117
118#else
119#error Importer.h included twice
120#endif // IMPORTER_H
121
Note: See TracBrowser for help on using the repository browser.