source: branches/profile/GENOM/GEN_gene.cxx

Last change on this file was 12815, checked in by epruesse, 5 years ago

fix common typos reported by lintian

  • splitted → split (irregular verb)
  • ressources → resources
  • explicitely → explicitly
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 8.8 KB
Line 
1// =============================================================== //
2//                                                                 //
3//   File      : GEN_gene.cxx                                      //
4//   Purpose   :                                                   //
5//                                                                 //
6//   Coded by Ralf Westram (coder@reallysoft.de) in 2001           //
7//   Institute of Microbiology (Technical University Munich)       //
8//   http://www.arb-home.de/                                       //
9//                                                                 //
10// =============================================================== //
11
12#include "GEN_gene.hxx"
13#include "GEN_local.hxx"
14#include "GEN_nds.hxx"
15
16#include <aw_awar.hxx>
17#include <aw_msg.hxx>
18#include <aw_root.hxx>
19#include <arbdbt.h>
20#include <adGene.h>
21
22// Standard fields of a gb_gene entry:
23// -----------------------------------
24// name                  = short name of gene (unique in one species)
25// type                  = type of gene (e.g. 'gene', 'CDS', 'tRNA', 'misc_feature')
26// pos_start             = start-position(s) of gene(-parts); range is 1...genomeLength
27// pos_stop              = end-position(s)   of gene(-parts); range is 1...genomeLength
28// pos_certain           = contains pairs of chars (1. for start-pos, 2. for end-pos)
29//
30//                         '=' means 'pos is exact'
31//                         '<' means 'pos may be lower'
32//                         '>' means 'pos may be higher'
33//                         '+' means 'pos is directly behind'
34//                         '-' means 'pos is directly before'
35//
36//                         if pos_certain is missing -> like '=='
37//
38// pos_complement        = 1 -> CDS is on opposite strand
39
40// fields for split genes:
41// --------------------------
42// pos_joined         = xxx -> gene consists of abs(xxx) parts (if missing xxx == 1 is assumed)
43//
44// if abs(xxx)>1, the gene consists of several parts.
45// In that case the fields 'pos_start', 'pos_stop',  'pos_certain' and 'pos_complement'
46// contain multiple comma-separated values - one for each joined part.
47//
48// if xxx is < -1, then joining the parts does not make sense (or nothing is known about it)
49//
50// Note: Please do not access these fields manually - use GEN_read_position!
51
52// other fields added by importer:
53// -------------------------------
54//
55// During import ARB tries to reproduce existing translations.
56// If it succeeds, it removes the translation.
57//
58// ARB_translation      = written if ARB translation differs from original translation
59//                        (original translation is not deleted in this case)
60// ARB_translation_note = additional info about failed translation
61// ARB_translation_rm   = 1 -> translation was reproduced and deleted
62//
63// if a gene with type 'gene' exists and another gene with different type, but
64// identical location exists as well, ARB sets ARB_display_hidden to 1 for
65// the 'gene'. For the other gene with diff. type ARB sets a reference to the
66// hidden 'gene':
67//
68// ARB_is_gene          = shortname of related hidden gene
69
70
71// fields used for display:
72// ------------------------
73// ARB_display_hidden = 1 -> do not display this gene (depends on AWAR_GENMAP_SHOW_HIDDEN too)
74// ARB_color          = color group
75
76
77
78// Old format standard fields of a gb_gene entry:
79// ----------------------------------------------
80// name          = short name of gene (unique in one species)
81// pos_begin     = start-position of gene
82// pos_end       = end-position of gene
83// pos_uncertain = contains 2 chars (1. for start-pos, 2. for end-pos); = means 'pos is exact'; < means 'pos may be lower'; > means 'pos may be higher'; missing -> like ==
84// complement    = 1 -> encoding from right to left
85//
86// fields for split genes:
87// --------------------------
88// pos_joined               = xxx -> gene consists of xxx parts (may not exist if xxx == 1)
89// pos_beginxxx, pos_endxxx = start-/end-positions for parts 2...n
90// pos_uncertainxxx         = like above for parts 2...n
91//
92
93using namespace std;
94
95static const GEN_position *loadPositions4gene(GBDATA *gb_gene) {
96    static GEN_position *loaded_position     = 0;
97    static GBDATA       *positionLoaded4gene = 0;
98
99    if (positionLoaded4gene != gb_gene) {
100        if (loaded_position) {
101            GEN_free_position(loaded_position);
102            loaded_position     = 0;
103            positionLoaded4gene = 0;
104        }
105
106        if (gb_gene) {
107            loaded_position = GEN_read_position(gb_gene);
108            if (loaded_position) positionLoaded4gene = gb_gene;
109        }
110    }
111    return loaded_position;
112}
113
114void GEN_gene::init(GBDATA *gb_gene_, GEN_root *root_) {
115    gb_gene = gb_gene_;
116    root    = root_;
117    name    = GBT_read_name(gb_gene);
118
119    GBDATA *gbd = GB_entry(gb_gene, "complement");
120    complement  = gbd ? GB_read_byte(gbd) == 1 : false;
121}
122
123void GEN_gene::load_location(int part, const GEN_position *location) {
124    gen_assert(part >= 1);
125    gen_assert(part <= location->parts);
126
127    pos1       = location->start_pos[part-1];
128    pos2       = location->stop_pos[part-1];
129    complement = location->complement[part-1];
130
131    gen_assert(pos1 <= pos2);
132}
133
134GEN_gene::GEN_gene(GBDATA *gb_gene_, GEN_root *root_, const GEN_position *location) {
135    init(gb_gene_, root_);
136    load_location(1, location);
137    nodeInfo = GEN_make_node_text_nds(root->GbMain(), gb_gene, 0);
138}
139
140GEN_gene::GEN_gene(GBDATA *gb_gene_, GEN_root *root_, const GEN_position *location, int partNumber) {
141    //  partNumber 1..n which part of a split gene
142    //  maxParts   1..n of how many parts consists this gene?
143
144    init(gb_gene_, root_);
145    load_location(partNumber, location);
146
147    {
148        char buffer[30];
149        sprintf(buffer, " (%i/%i)", partNumber, location->parts);
150        nodeInfo = name+buffer;
151    }
152}
153
154void GEN_gene::reinit_NDS() const {
155    nodeInfo = GEN_make_node_text_nds(root->GbMain(), gb_gene, 0);
156}
157
158// ------------------
159//      GEN_root
160
161GEN_root::GEN_root(const char *organism_name_, const char *gene_name_, GBDATA *gb_main_, AW_root *aw_root, GEN_graphic *gen_graphic_)
162    : gb_main(gb_main_)
163    , gen_graphic(gen_graphic_)
164    , organism_name(organism_name_)
165    , gene_name(gene_name_)
166    , error_reason("")
167    , length(-1)
168    , gb_gene_data(0)
169{
170    GB_transaction  ta(gb_main);
171    GBDATA         *gb_organism = GBT_find_species(gb_main, organism_name.c_str());
172
173    if (!gb_organism) {
174        error_reason = strdup("Please select a species.");
175    }
176    else {
177        GBDATA *gb_data = GBT_find_sequence(gb_organism, GENOM_ALIGNMENT);
178        if (!gb_data) {
179            error_reason = GBS_global_string_copy("'%s' has no data in '%s'", organism_name.c_str(), GENOM_ALIGNMENT);
180        }
181        else {
182            length = GB_read_count(gb_data);
183
184            gb_gene_data    = GEN_find_gene_data(gb_organism);
185            GBDATA *gb_gene = gb_gene_data ? GEN_first_gene_rel_gene_data(gb_gene_data) : 0;
186
187            if (!gb_gene) {
188                error_reason = GBS_global_string("Species '%s' has no gene-information", organism_name.c_str());
189            }
190            else {
191                bool show_hidden = aw_root->awar(AWAR_GENMAP_SHOW_HIDDEN)->read_int() != 0;
192
193                while (gb_gene) {
194                    bool show_this = show_hidden;
195
196                    if (!show_this) {
197                        GBDATA *gbd = GB_entry(gb_gene, ARB_HIDDEN);
198
199                        if (!gbd || !GB_read_byte(gbd)) { // gene is not hidden
200                            show_this = true;
201                        }
202                    }
203
204                    if (show_this) {
205                        const GEN_position *location = loadPositions4gene(gb_gene);
206
207                        if (!location) {
208                            GB_ERROR  warning = GB_await_error();
209                            char     *id      = GEN_global_gene_identifier(gb_gene, gb_organism);
210                            aw_message(GBS_global_string("Can't load gene '%s':\nReason: %s", id, warning));
211                            free(id);
212                        }
213                        else {
214                            int parts = location->parts;
215                            if (parts == 1) {
216                                gene_set.insert(GEN_gene(gb_gene, this, location));
217                            }
218                            else { // joined gene
219                                for (int p = 1; p <= parts; ++p) {
220                                    gene_set.insert(GEN_gene(gb_gene, this, location, p));
221                                }
222                            }
223                        }
224                    }
225                    gb_gene = GEN_next_gene(gb_gene);
226                }
227            }
228        }
229    }
230}
231
232void GEN_root::reinit_NDS() const {
233    GEN_iterator end  = gene_set.end();
234    for (GEN_iterator gene = gene_set.begin(); gene != end; ++gene) {
235        gene->reinit_NDS();
236    }
237}
Note: See TracBrowser for help on using the repository browser.