1 | #Please insert up references in the next lines (line starts with keyword UP) |
---|
2 | UP arb.hlp |
---|
3 | UP glossary.hlp |
---|
4 | |
---|
5 | #Please insert subtopic references (line starts with keyword SUB) |
---|
6 | #SUB subtopic.hlp |
---|
7 | |
---|
8 | # Hypertext links in helptext can be added like this: LINK{ref.hlp|http://add|bla@domain} |
---|
9 | |
---|
10 | #************* Title of helpfile !! and start of real helpfile ******** |
---|
11 | TITLE ARB: Database |
---|
12 | |
---|
13 | OCCURRENCE ARB_NT |
---|
14 | |
---|
15 | DESCRIPTION |
---|
16 | A central database of sequences and |
---|
17 | additional information (taken from public databases or supplied |
---|
18 | by the user) is stored in a binary or ASCII file (*.arb). |
---|
19 | ( and in future releases archive and delta files). |
---|
20 | The database reader auto-detects binary or ASCII mode. |
---|
21 | Brief advantages of the different file types: |
---|
22 | |
---|
23 | binary with fast load file: |
---|
24 | |
---|
25 | (+) very fast |
---|
26 | (+) runs on slow and old computers |
---|
27 | (-) needs a lot of harddisc space |
---|
28 | => for normal operation on old machines |
---|
29 | |
---|
30 | binary: |
---|
31 | |
---|
32 | (+) very fast |
---|
33 | (+) small (compression rate: 60%-95%) |
---|
34 | => for normal operation |
---|
35 | |
---|
36 | ASCII: |
---|
37 | |
---|
38 | (+) editable by standard text editors |
---|
39 | (+) information can be extracted by hand |
---|
40 | (-) needs an extreme amount of harddisc space |
---|
41 | => to check and correct a database |
---|
42 | |
---|
43 | |
---|
44 | All ARB tools for database handling and most of the ARB tools |
---|
45 | for data analysis act directly upon the database. The database |
---|
46 | is kept consistent at any time. Any local modifications by |
---|
47 | individual ARB tools are immediately exported to the database |
---|
48 | and all other active tools. |
---|
49 | |
---|
50 | |
---|
51 | |
---|
52 | NOTES ASCII format |
---|
53 | |
---|
54 | DATA FORMAT |
---|
55 | |
---|
56 | [xxx] means xxx is optional |
---|
57 | [xxx]* means xxx is optional and can occur many times |
---|
58 | xxx|yyy means xxx or yyy |
---|
59 | // means comment |
---|
60 | |
---|
61 | ARBDB HIERARCHY |
---|
62 | |
---|
63 | ARB DB is a hierarchical database system, so here's a short description |
---|
64 | of the hierarchy: |
---|
65 | |
---|
66 | ARBDB ::= species_data // container containing all species |
---|
67 | presets // global alignment and db field information |
---|
68 | [extended_data] // all SAIs |
---|
69 | [tmp] // temporary data |
---|
70 | [tree_data] // all trees |
---|
71 | ... // user defined entries (programmers) |
---|
72 | |
---|
73 | species_data::= [species]* |
---|
74 | |
---|
75 | extended_data::= [extended]* |
---|
76 | |
---|
77 | gene_data::= [gene]* // container for genes (species local) |
---|
78 | |
---|
79 | |
---|
80 | species::= 'name' // species identifier |
---|
81 | ['full_name'] |
---|
82 | ... // (end) user defined fields |
---|
83 | [ali_xxx] // the alignment container(s) |
---|
84 | [gene_data] // container containing genes |
---|
85 | |
---|
86 | extended::= // analogous to species |
---|
87 | |
---|
88 | gene::= // analogous to species |
---|
89 | |
---|
90 | ali_xxx::= 'data' // the sequence |
---|
91 | ... // additional sequence information |
---|
92 | |
---|
93 | presets::= 'use' // default alignment |
---|
94 | [alignment]* |
---|
95 | [key_data] // description of the user defined keys |
---|
96 | |
---|
97 | alignment::= 'alignment_name' // name of the alignment (prefix 'ali_') |
---|
98 | 'alignment_len' // length of longest sequence |
---|
99 | 'alignment_write_security' // default write security |
---|
100 | 'alignment_type' // dna or pro |
---|
101 | 'aligned' // ==1 when all sequences have the same |
---|
102 | // length else 0 |
---|
103 | key_data::= [key]* |
---|
104 | |
---|
105 | key::= 'key_name' // name of an user defined field |
---|
106 | 'key_type' // type (12=string 3=int) |
---|
107 | |
---|
108 | ******************************************* |
---|
109 | *************** ASCII BASIC ************** |
---|
110 | ******************************************* |
---|
111 | |
---|
112 | Note: |
---|
113 | |
---|
114 | - /* xxx */ is used for comments and not read |
---|
115 | |
---|
116 | - I use a grammar to describe the dataformat. |
---|
117 | All terminal symbols are surrounded by "'". |
---|
118 | |
---|
119 | ASCII::= ['/*ARBDB ASCII*/'] |
---|
120 | [FIELD]* |
---|
121 | |
---|
122 | FIELD::= KEY [PROTECTION] [TYPE] VALUE |
---|
123 | | |
---|
124 | KEY [PROTECTION] '%%' (% |
---|
125 | [FIELD]* |
---|
126 | %) /* Comment */ |
---|
127 | |
---|
128 | |
---|
129 | KEY::= 'Any string of a-z|A-Z|0-9|"_"' |
---|
130 | |KEY| > 2 < 256 |
---|
131 | |
---|
132 | PROTECTION::= ':''delete protection level''write p.l.''00' |
---|
133 | // 00 are reserved for future use |
---|
134 | |
---|
135 | TYPE::= '%s' // STRING |
---|
136 | '%i' // INTEGER |
---|
137 | '%f' // FLOAT |
---|
138 | '%N' // BYTES |
---|
139 | '%I' // BITS |
---|
140 | '%F' // FLOATS |
---|
141 | |
---|
142 | |
---|
143 | VALUE::= '"string"' | '"^Astring^A"' | 'string' //type = STRING |
---|
144 | | 'int_number' //type = INT |
---|
145 | | 'real_number' //type = FLOAT |
---|
146 | | 'coded bytestring' //type = BYTES,FLOATS, |
---|
147 | // BITS |
---|
148 | |
---|
149 | |
---|
150 | EXAMPLES None |
---|
151 | |
---|
152 | ******************************************* |
---|
153 | ************** ASCII EXAMPLE ************* |
---|
154 | ******************************************* |
---|
155 | |
---|
156 | /*ARBDB ASCII*/ |
---|
157 | species_data %% (% |
---|
158 | species :5000 %% (% |
---|
159 | name :7600 "EscCol10" |
---|
160 | file "ecrna3.empro" |
---|
161 | full_name "Escherichia coli" |
---|
162 | acc "V00331;" |
---|
163 | ali_23all :5000 %% (% |
---|
164 | data :7500 "...........ACGTUUU........... |
---|
165 | mark %I "---------------++++--------- |
---|
166 | %) /*ali_23all*/ |
---|
167 | |
---|
168 | species :5000 %% (% |
---|
169 | name :7600 "EscCol11" |
---|
170 | file "ecrr23s.empro" |
---|
171 | full_name "Escherichia coli" |
---|
172 | ali_23all :5000 %% (% |
---|
173 | data :7500 "...........ACGTUUUGGG....... |
---|
174 | mark %I "---------------++++--------- |
---|
175 | %) /*ali_23all*/ |
---|
176 | %) /*species*/ |
---|
177 | %) /*species_data*/ |
---|
178 | presets %% (% |
---|
179 | use "ali_23all" |
---|
180 | max_alignment_len %i 2000 |
---|
181 | alignment_len %i 0 |
---|
182 | max_name_len %i 9 |
---|
183 | alignment %% (% |
---|
184 | alignment_name "ali_23all" |
---|
185 | alignment_len %i 4205 |
---|
186 | aligned %i 1 |
---|
187 | alignment_write_security %i 5 |
---|
188 | alignment_type "rna" |
---|
189 | %) /*alignment*/ |
---|
190 | key_data %% (% |
---|
191 | key %% (% |
---|
192 | key_name "name" |
---|
193 | key_type %i 12 |
---|
194 | %) /*key*/ |
---|
195 | key %% (% |
---|
196 | key_name "group_name" |
---|
197 | key_type %i 12 |
---|
198 | %) /*key*/ |
---|
199 | key %% (% |
---|
200 | key_name "acc" |
---|
201 | key_type %i 12 |
---|
202 | %) /*key*/ |
---|
203 | key %% (% |
---|
204 | key_name "ali_23all/data" |
---|
205 | key_type %i 12 |
---|
206 | %) /*key*/ |
---|
207 | key %% (% |
---|
208 | key_name "ali_23all/mark" |
---|
209 | key_type %i 6 |
---|
210 | %) /*key*/ |
---|
211 | key %% (% |
---|
212 | key_name "aligned" |
---|
213 | key_type %i 12 |
---|
214 | %) /*key*/ |
---|
215 | key %% (% |
---|
216 | key_name "author" |
---|
217 | key_type %i 12 |
---|
218 | %) /*key*/ |
---|
219 | %) /*key_data*/ |
---|
220 | %) /*presets*/ |
---|
221 | tree_data %% (% |
---|
222 | tree_main :4400 %% (% |
---|
223 | nnodes %i 2 |
---|
224 | tree "N0.014808,0.015168;N0.000360,0.000360;LEscCol10^ALEscColi^ALEscCol11^A" |
---|
225 | ruler %% (% |
---|
226 | size %f 0.100000 |
---|
227 | RADIAL %% (% |
---|
228 | ruler_y %f 0.341577 |
---|
229 | ruler_x %f 0.000000 |
---|
230 | %) /*RADIAL*/ |
---|
231 | text_x %f 0.000000 |
---|
232 | text_y %f 0.000000 |
---|
233 | ruler_width %i 0 |
---|
234 | LIST %% (% |
---|
235 | ruler_y %f 0.000000 |
---|
236 | ruler_x %f 0.000000 |
---|
237 | %) /*LIST*/ |
---|
238 | %) /*ruler*/ |
---|
239 | %) /*tree_main*/ |
---|
240 | %) /*tree_data*/ |
---|
241 | extended_data :7000 %% (% |
---|
242 | extended %% (% |
---|
243 | name "HELIX_PAIRS" |
---|
244 | ali_23all %% (% |
---|
245 | data "............................1a.. |
---|
246 | %) /*ali_23all*/ |
---|
247 | %) /*extended*/ |
---|
248 | extended %% (% |
---|
249 | name "gpl5rr" |
---|
250 | ali_23all %% (% |
---|
251 | phyl_options %N 10000106D02:0C03.0D02-07.87.DB6 |
---|
252 | bits %I "-----------------------+++++++++-+-+++ |
---|
253 | floats %F 10000106D04:0A.C816.425C03.5D.802F.BF03 |
---|
254 | %) /*ali_23all*/ |
---|
255 | %) /*extended*/ |
---|
256 | %) /*extended_data*/ |
---|
257 | tmp %% (% |
---|
258 | focus %% (% |
---|
259 | species_name "EscColi" |
---|
260 | cursor_position %i 323 |
---|
261 | %) /*focus*/ |
---|
262 | message "" |
---|
263 | %) /*tmp*/ |
---|
264 | |
---|
265 | |
---|
266 | WARNINGS The ASCII version of arb needs a lot of virtual memory when |
---|
267 | loaded. |
---|
268 | |
---|