source: branches/help/GENOM/GEN_gene.cxx

Last change on this file was 18159, checked in by westram, 5 years ago
  • full update from child 'fix' into 'trunk'
    • fix item name accessors (GBT_get_name + GBT_get_name_or_description)
    • add null2empty
  • adds: log:branches/fix@18140:18158
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 8.8 KB
Line 
1// =============================================================== //
2//                                                                 //
3//   File      : GEN_gene.cxx                                      //
4//   Purpose   :                                                   //
5//                                                                 //
6//   Coded by Ralf Westram (coder@reallysoft.de) in 2001           //
7//   Institute of Microbiology (Technical University Munich)       //
8//   http://www.arb-home.de/                                       //
9//                                                                 //
10// =============================================================== //
11
12#include "GEN_gene.hxx"
13#include "GEN_local.hxx"
14#include "GEN_nds.hxx"
15
16#include <aw_awar.hxx>
17#include <aw_msg.hxx>
18#include <aw_root.hxx>
19#include <arbdbt.h>
20#include <adGene.h>
21
22// Standard fields of a gb_gene entry:
23// -----------------------------------
24// name                  = short name of gene (unique in one species)
25// type                  = type of gene (e.g. 'gene', 'CDS', 'tRNA', 'misc_feature')
26// pos_start             = start-position(s) of gene(-parts); range is 1...genomeLength
27// pos_stop              = end-position(s)   of gene(-parts); range is 1...genomeLength
28// pos_certain           = contains pairs of chars (1. for start-pos, 2. for end-pos)
29//
30//                         '=' means 'pos is exact'
31//                         '<' means 'pos may be lower'
32//                         '>' means 'pos may be higher'
33//                         '+' means 'pos is directly behind'
34//                         '-' means 'pos is directly before'
35//
36//                         if pos_certain is missing -> like '=='
37//
38// pos_complement        = 1 -> CDS is on opposite strand
39
40// fields for split genes:
41// --------------------------
42// pos_joined         = xxx -> gene consists of abs(xxx) parts (if missing xxx == 1 is assumed)
43//
44// if abs(xxx)>1, the gene consists of several parts.
45// In that case the fields 'pos_start', 'pos_stop',  'pos_certain' and 'pos_complement'
46// contain multiple comma-separated values - one for each joined part.
47//
48// if xxx is < -1, then joining the parts does not make sense (or nothing is known about it)
49//
50// Note: Please do not access these fields manually - use GEN_read_position!
51
52// other fields added by importer:
53// -------------------------------
54//
55// During import ARB tries to reproduce existing translations.
56// If it succeeds, it removes the translation.
57//
58// ARB_translation      = written if ARB translation differs from original translation
59//                        (original translation is not deleted in this case)
60// ARB_translation_note = additional info about failed translation
61// ARB_translation_rm   = 1 -> translation was reproduced and deleted
62//
63// if a gene with type 'gene' exists and another gene with different type, but
64// identical location exists as well, ARB sets ARB_display_hidden to 1 for
65// the 'gene'. For the other gene with diff. type ARB sets a reference to the
66// hidden 'gene':
67//
68// ARB_is_gene          = shortname of related hidden gene
69
70
71// fields used for display:
72// ------------------------
73// ARB_display_hidden = 1 -> do not display this gene (depends on AWAR_GENMAP_SHOW_HIDDEN too)
74// ARB_color          = color group
75
76
77
78// Old format standard fields of a gb_gene entry:
79// ----------------------------------------------
80// name          = short name of gene (unique in one species)
81// pos_begin     = start-position of gene
82// pos_end       = end-position of gene
83// pos_uncertain = contains 2 chars (1. for start-pos, 2. for end-pos); = means 'pos is exact'; < means 'pos may be lower'; > means 'pos may be higher'; missing -> like ==
84// complement    = 1 -> encoding from right to left
85//
86// fields for split genes:
87// --------------------------
88// pos_joined               = xxx -> gene consists of xxx parts (may not exist if xxx == 1)
89// pos_beginxxx, pos_endxxx = start-/end-positions for parts 2...n
90// pos_uncertainxxx         = like above for parts 2...n
91//
92
93using namespace std;
94
95static const GEN_position *loadPositions4gene(GBDATA *gb_gene) {
96    static GEN_position *loaded_position     = NULp;
97    static GBDATA       *positionLoaded4gene = NULp;
98
99    if (positionLoaded4gene != gb_gene) {
100        if (loaded_position) {
101            GEN_free_position(loaded_position);
102            loaded_position     = NULp;
103            positionLoaded4gene = NULp;
104        }
105
106        if (gb_gene) {
107            loaded_position = GEN_read_position(gb_gene);
108            if (loaded_position) positionLoaded4gene = gb_gene;
109        }
110    }
111    return loaded_position;
112}
113
114void GEN_gene::init() {
115    name = GBT_get_name_or_description(gb_gene);
116
117    GBDATA *gbd = GB_entry(gb_gene, "complement");
118    complement  = gbd ? GB_read_byte(gbd) == 1 : false;
119}
120
121void GEN_gene::load_location(int part, const GEN_position *location) {
122    gen_assert(part >= 1);
123    gen_assert(part <= location->parts);
124
125    pos1       = location->start_pos[part-1];
126    pos2       = location->stop_pos[part-1];
127    complement = location->complement[part-1];
128
129    gen_assert(pos1 <= pos2);
130}
131
132GEN_gene::GEN_gene(GBDATA *gb_gene_, GEN_root *root_, const GEN_position *location) :
133    gb_gene(gb_gene_),
134    root(root_)
135{
136    init();
137    load_location(1, location);
138    nodeInfo = GEN_make_node_text_nds(gb_gene, 0);
139}
140
141GEN_gene::GEN_gene(GBDATA *gb_gene_, GEN_root *root_, const GEN_position *location, int partNumber) :
142    gb_gene(gb_gene_),
143    root(root_)
144{
145    //  partNumber 1..n which part of a split gene
146    //  maxParts   1..n of how many parts consists this gene?
147
148    init();
149    load_location(partNumber, location);
150
151    {
152        char buffer[30];
153        sprintf(buffer, " (%i/%i)", partNumber, location->parts);
154        nodeInfo = name+buffer;
155    }
156}
157
158void GEN_gene::reinit_NDS() const {
159    nodeInfo = GEN_make_node_text_nds(gb_gene, 0);
160}
161
162// ------------------
163//      GEN_root
164
165GEN_root::GEN_root(const char *organism_name_, const char *gene_name_, GBDATA *gb_main_, AW_root *aw_root, GEN_graphic *gen_graphic_) :
166    gb_main(gb_main_),
167    gen_graphic(gen_graphic_),
168    organism_name(organism_name_),
169    gene_name(gene_name_),
170    error_reason(""),
171    length(-1),
172    gb_gene_data(NULp)
173{
174    GB_transaction  ta(gb_main);
175    GBDATA         *gb_organism = GBT_find_species(gb_main, organism_name.c_str());
176
177    if (!gb_organism) {
178        error_reason = ARB_strdup("Please select a species.");
179    }
180    else {
181        GBDATA *gb_data = GBT_find_sequence(gb_organism, GENOM_ALIGNMENT);
182        if (!gb_data) {
183            error_reason = GBS_global_string_copy("'%s' has no data in '%s'", organism_name.c_str(), GENOM_ALIGNMENT);
184        }
185        else {
186            length = GB_read_count(gb_data);
187
188            gb_gene_data    = GEN_find_gene_data(gb_organism);
189            GBDATA *gb_gene = gb_gene_data ? GEN_first_gene_rel_gene_data(gb_gene_data) : NULp;
190
191            if (!gb_gene) {
192                error_reason = GBS_global_string("Species '%s' has no gene-information", organism_name.c_str());
193            }
194            else {
195                bool show_hidden = aw_root->awar(AWAR_GENMAP_SHOW_HIDDEN)->read_int() != 0;
196
197                while (gb_gene) {
198                    bool show_this = show_hidden;
199
200                    if (!show_this) {
201                        GBDATA *gbd = GB_entry(gb_gene, ARB_HIDDEN);
202
203                        if (!gbd || !GB_read_byte(gbd)) { // gene is not hidden
204                            show_this = true;
205                        }
206                    }
207
208                    if (show_this) {
209                        const GEN_position *location = loadPositions4gene(gb_gene);
210
211                        if (!location) {
212                            GB_ERROR  warning = GB_await_error();
213                            char     *id      = GEN_global_gene_identifier(gb_gene, gb_organism);
214                            aw_message(GBS_global_string("Can't load gene '%s':\nReason: %s", id, warning));
215                            free(id);
216                        }
217                        else {
218                            int parts = location->parts;
219                            if (parts == 1) {
220                                gene_set.insert(GEN_gene(gb_gene, this, location));
221                            }
222                            else { // joined gene
223                                for (int p = 1; p <= parts; ++p) {
224                                    gene_set.insert(GEN_gene(gb_gene, this, location, p));
225                                }
226                            }
227                        }
228                    }
229                    gb_gene = GEN_next_gene(gb_gene);
230                }
231            }
232        }
233    }
234}
235
236void GEN_root::reinit_NDS() const {
237    GEN_iterator end  = gene_set.end();
238    for (GEN_iterator gene = gene_set.begin(); gene != end; ++gene) {
239        gene->reinit_NDS();
240    }
241}
Note: See TracBrowser for help on using the repository browser.