source: tags/arb_5.1/HELP_SOURCE/oldhelp/commands.hlp

Last change on this file was 6142, checked in by westram, 15 years ago
  • backport [6141] (parts not affecting code at all, i.e. helpfiles, figs, ..)
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 21.0 KB
Line 
1#Please insert up references in the next lines (line starts with keyword UP)
2UP      arb.hlp
3UP      glossary.hlp
4
5#Please insert subtopic references  (line starts with keyword SUB)
6#SUB    subtopic.hlp
7#SUB    parser.hlp
8#SUB    regexpr.hlp
9SUB     exec_bug.hlp
10
11# Hypertext links in helptext can be added like this: LINK{ref.hlp|http://add|bla@domain}
12
13#************* Title of helpfile !! and start of real helpfile ********
14
15TITLE           ARB Command Interpreter (ACI)
16
17OCCURRENCE      NDS
18                [ export db ]
19                [ ARB_NT/Species/search/parse_fields ]
20
21DESCRIPTION     The command interpreter is a simple interpreter.
22                All commands take the data from the input streams,
23                modify it and write it to the output
24                (which may be the input of the next command). The first
25                input stream is normally the value of a database field
26                (see NDS for more information).
27
28                     e.g. count("a") counts every 'a' in each input stream and
29                     generates an output stream (== the sum of 'a') for every
30                     input.
31
32                Many commands have command modifiers which are appended to
33                the command.
34
35                Different commands can be separated by:
36
37                        ';'     all !!! commands take all !!!  the input streams and
38                                each command generates its own output streams
39                        '|'     the output of the left commands are used as the input
40                                of the right.
41
42                e.g.
43
44                        count("A");count("AG")
45
46                                creates two streams: 1. how many A's
47                                                     2. and how many A's and G's
48
49                        count("A");count("G")|per_cent
50
51                                per_cent is a command that divides two numbers
52                                (number of 'A's / number of 'G's) and returns the result
53                                as percent.
54
55                Finally all output streams are concatenated and
56
57                        - NDS:                     printed at the tips of the tree.
58                        - MODIFY DATABASE FIELD:   stored in the destination field.
59
60DESCRIPTION
61
62        eg: count("A");count("G")|"a/g = "; per_cent
63
64        input --> count("A") -->| ----->  "a/g = "  --> | \
65        "AGG" \                 | \ /                   |  --> 'a/g = 50'
66               \                |  \                    |  -->
67                \               | /  -->  per_cent  --> | /
68        .        ->count("G")-->| ----->                |
69
70
71
72
73SECTION COMMANDLIST
74
75                If not otherwise mentioned every command creates one
76                output stream for each input stream.
77
78        STREAM HANDLING
79
80                echo(x1;x2;x3...)       creates an output stream for each
81                                        parameter 'x' and writes 'x' onto it.
82
83                "text"                  == echo("text")
84
85                dd                      copies all input streams to output streams
86
87                cut(N1,N2,N3)           copies the Nth input stream(s)
88
89                drop(N1,N2)             copies all but the Nth input stream(s)
90
91                dropempty               drops all empty input streams
92
93                dropzero                drops all non-numeric or zero input streams
94
95                swap(N1,N2)             swaps two input streams
96                                        (w/o parameters: swaps last two streams)
97
98                toback(X)               moves the Xth input stream
99                                        to the end of output streams
100
101                tofront(X)              moves the Xth input stream
102                                        to the start of output streams
103
104                merge([sep])            merges all input streams into one output stream.
105                                        If 'sep' is specified, it's inserted between them.
106                                        If no input streams are given, it returns 1 empty
107                                        input stream.
108
109                split([sep[,mode]])     splits all input streams at separator string 'sep'
110                                        (default: split at linefeed).
111
112                                        Modes:
113
114                                        0               remove found separators (default)
115                                        1               split before separator
116                                        2               split after separator
117
118                streams                 returns the number of input streams
119
120        STRING
121
122                head(n)                 the first n characters
123                left(n)                 the first n characters
124
125                tail(n)                 the last n characters
126                right(n)                the last n characters
127
128                len                     the length of the input
129
130                len("chr")              the length of the input excluding the
131                                        characters in 'chr'
132
133                mid(x,y)                the string from x to y
134                                        y < 0 means a position relative to the
135                                        end
136
137                crop("str")             removes characters of 'str' from
138                                        both ends of the input
139
140                remove("str");          removes all characters of 'str'
141                                        e.g. remove(" ") removes all blanks
142
143                keep("str");            the opposite of remove:
144                                        remove all chars that are not a member
145                                        of 'str'
146
147                srt("orig=dest",...)    replace command, invokes SRT
148                                        (see LINK{parser.hlp})
149
150                translate("old","new"[,"other"])
151
152                        translates all characters from input that occur in the
153                        first argument ("old") by the corresponding character of the
154                        second argument ("new").
155
156                        An optional third argument (one character only) means:
157                        replace all other characters with the third argument.
158
159                        Example:
160
161                                Input:                        "--AabBCcxXy--"
162                                translate("abc-","xyz-")      "--AxyBCzxXy--"
163                                translate("abc-","xyz-",".")  "--.xy..z...--"
164
165                        This can be used to replace illegal characters from sequence date
166                        (see predefined expressions in 'Modify fields of listed species').
167
168
169                tab(n)                  append n-len(input) spaces
170
171                pretab(n)               prepend n-len(input) spaces
172
173                upper                   converts string to upper case
174                lower                   converts string to lower case
175                caps                    capitalizes string
176
177                format(options)
178
179                    takes a long string and breaks it into several lines
180
181                        option       (default)     description
182                        ==========================================================
183                        width=#      (50)          line width
184                        firsttab=#   (10)          first line left indent
185                        tab=#        (10)          left indent (not first line)
186                        "nl=chrs"    (" ")         list of characters that specify
187                                                   a possibly point of a line break;
188                                                   This character is deleted !
189                        "forcenl=chrs" ("\n")      Force a newline at these characters.
190
191                extract_words("chars",val)
192
193                    Search for all words (separated by ',' ';' ':' ' ' or 'tab') that
194                    contain more characters of type chars than val, sort them
195                    alphabetically and write them separated by ' ' to the output
196
197        STRING COMPARISON
198
199               compare(a,b)             return -1 if a<b, 0 if a=b, 1 if a>b
200               equals(a,b)              return 1 if a=b, 0 otherwise
201               contains(a,b)            if a contains b, this returns the position of
202                                        b inside a (1..N) and 0 otherwise.
203               partof(a,b)              if a is part of b, this returns the position of
204                                        a inside b (1..N) and 0 otherwise.
205
206               The above functions are binary operators (see below).
207               For each of them a case-insensitive alternative exists (icompare, iequals, ...).
208
209        CALCULATOR
210
211                plus                    add arguments
212                minus                   subtract arguments
213                mult                    multiply arguments
214                div                     divide arguments
215                per_cent                divide arguments * 100 (rounded)
216                rest                    divide arguments, take rest
217
218                The above functions work as binary operators (see below).
219
220                To avoid 'division by zero'-errors, the operators 'div', 'per_cent' and 'rest'
221                return 0 if the second argument is zero.
222
223                Calculation is performed with integer numbers.
224
225        BINARY OPERATORS
226
227               Several operators work as so called 'binary operators'.
228               These operators may be used in various ways, which are
229               shown using the operator 'plus':
230
231                     ACI                OUTPUT                  STREAMS
232                     plus(a,b)          a+b                     input:0 output:1
233                     a;b|plus           a+b                     input:2 output:1
234                     a;b;c;d|plus       a+b;c+d                 input:4 output:2
235                     a;b;c|operator(x)  a+x;b+x;c+x             input:3 output:3
236
237               That means, if the binary operator
238
239                    - has no arguments, it expects an even number of input streams. The operator is
240                      applied to the first 2 streams, then to the second 2 stream and so on.
241                      The number of output streams is half the number of input streams.
242                    - has 1 argument, it accepts one to many input streams. The operator
243                      is applied to each input stream together with the argument.
244                      For each input stream one output stream is generated.
245                    - has 2 arguments, it is applied to these. The arguments are interpreted as
246                      escaped ACI commands and are applied for each input stream. The results of
247                      the commands are passed as arguments to the binary operator. For each input
248                      stream one output stream is generated.
249
250        CONDITIONAL
251
252                select(a,b,c,...)       each input stream is converted into a number
253                                        (non-numeric text converts to zero). That number is
254                                        used to select one of the given arguments:
255                                             0 selects 'a',
256                                             1 selects 'b',
257                                             2 selects 'c' and so on.
258                                        The selected argument is interpreted as ACI command
259                                        and is applied to an empty input stream.
260
261        DEBUGGING
262
263                trace(onoff)            toggle tracing of ACI actions to standard output.
264                                        Start arb from a terminal to see the output.
265                                        Parameter: 0 or 1 (switch off or on)
266
267                                        All streams are copied (like 'dd').
268
269        DATABASE AND SEQUENCE
270
271                readdb(field_name)      the contents of the field 'field_name'
272
273                sequence                the sequence in the current alignment.
274
275                                        Note: older ARB versions returned 'no sequence'
276                                        if the current alignment contained no sequence.
277                                        Now it returns an empty string.
278
279                                        For genes it returns only the corresponding part
280                                        of the sequence. If the field complement = 1 then the
281                                        result is the reverse-complement.
282
283                sequence_type           the default sequence's type(rna/dna..)
284                sequence_name           the default sequence name(ali_16s,..)
285
286                Note: The commands above only work at the beginning of the ACI expression.
287
288                checksum(options)       calculates a CRC checksum
289                                        options:
290                                        "exclude=chrs"    remove 'chrs' before calculation
291                                        "toupper"         make everything uppercase first
292
293                gcgchecksum             a gcg compatible checksum
294
295                format_sequence(options)
296
297                        takes a long string ( sequence ) and breaks it into several lines
298
299                        option       (default)  description
300                        =============================================================
301                        width=#      (50)       sequence line width
302                        firsttab=#   (10)       first line left indent
303                        tab=#        (10)       left indent (not first line)
304                        numleft      (NO)       numbers on the left side
305                        gap=#        (10)       insert a gap every # seq. characters.
306
307                extract_sequence("chars",rel_len)
308
309                        like extract_words, but do not sort words, but rel_len is the minimum
310                        percentage of characters of a word that mach a character in 'chars'
311                        before word is taken. All words will be separated by white space.
312
313                taxonomy([treename,] depth)
314
315                        Returns the taxonomy of the current species or group as defined by a tree.
316
317                        If 'treename' is specified, its used as tree, otherwise the 'default tree'
318                        is used (which in most cases is the tree displayed in the ARB_NT main window).
319
320                        'depth' specifies how many "levels" of the taxonomy are used.
321
322        FILTERING
323
324                There are several functions to filter sequential data:
325
326                      - filter
327                      - diff
328                      - gc
329
330                All these functions use the following COMMON OPTIONS to define
331                what is used as filter sequence:
332
333                    - species=name
334
335                      Use species 'name' as filter.
336
337                    - SAI=name
338
339                      Use SAI 'name' as filter.
340
341                    - first=1
342
343                      Use 1st input stream as filter for all other input streams.
344
345                    - pairwise=1
346
347                      Use 1st input stream as filter for 2nd stream,
348                      3rd stream as filter for 4th stream, and so on.
349
350                    - align=ali_name
351
352                      Use alignment 'ali_name' instead of current default
353                      alignment (only meaningful together with 'species' or 'SAI').
354
355                    Note: Only one of the parameters 'species', 'SAI', 'first' or 'pairwise' may be used.
356
357                diff(options)
358
359                        Calculates the difference between the filter (see common options above) and the input stream(s) and
360                        write the result to output stream(s).
361
362                        Additional options:
363
364                        - equal=x
365
366                          Character written to output if filter and stream are equal at
367                          a position (defaults to '.'). To copy the stream contents for
368                          equal columns, specify 'equal=' (directly followed by ',' or ')')
369
370                        - differ=y
371
372                          Character written to output if filter and stream don't match at one column position.
373                          Default is to copy the character from the stream.
374
375                filter(options)
376
377                        Filters only specified columns out of the input stream(s). You need to
378                        specify either
379
380                        - exclude=xyz
381
382                          to use all columns, where the filter (see common options above) has none
383                          of the characters 'xyz'
384
385                        or
386
387                        - include=xyz
388
389                          to use only columns, where the filter has one of the characters 'xyz'
390
391                        All used columns are concatenated and written to the output stream(s).
392
393
394                change(options)
395
396                        Randomly modifies the content of columns selected
397                        by the filter (see common options above).
398
399                        The options 'include=xyz' and 'exclude=xyz' work like
400                        with 'filter()', but here they select the columns to modify - all other
401                        columns get copied unmodified.
402
403                        How the selected columns are modified, is specified by the following
404                        parameters:
405
406                                - change=percent
407
408                                  percentage of changed columns (default: silently change nothing, to make
409                                  it more difficult for you to ignore this helpfile)
410
411                                - to=xy
412
413                                  randomly change to one of the characters 'xy'.
414
415                                  Hints:
416
417                                        - Use 'xyy' to produce 33% 'x' and 66% 'y'
418                                        - Use 'xxxxxxxxxy' to produce 90% 'x' and 10% 'y'
419                                        - Use 'x' to replace all matching columns by 'x'
420
421                        I think the intention for this (long undocumented) command is to easily generate
422                        artificial sequences with different GC-content, in order to test treeing-software.
423
424        SPECIALS
425
426                exec(command,var1,...)
427
428                    Execute external (unix) command
429
430                        WARNING !!!!!! You should not use this command for NDS !!!
431                        because any slow command will disable all editing -> You never
432                        can remove this command from the NDS. Even arb_panic will not
433                        easily help you.
434
435                command(escapedCommand)
436
437                        applies 'escapedCommand' to all input streams using
438
439                                 - ACI,
440                                 - SRT (if starts with ':') (see LINK{parser.hlp})
441                                 - or as REG (if starts with '/') (see LINK{regexpr.hlp}).
442
443                        In escapedCommand you have to escape '\' and '"' by
444                        preceding a '\'. If you nest calls you have to use multiple escapes
445                        (e.g. inside an export filter - which is in fact an
446                        SRT expression - you'll have to use double escapes).
447
448                eval(escapedCommand)
449
450                        the 'escapedCommand' is evaluated (using an empty string as input)
451                        and the result is interpreted as command and gets applied to all
452                        input streams.
453
454                        Example: Said you have two numeric positions stored in database fields
455                                 'pos1' and 'pos2' for each species. Then the following command
456                                 extracts the sequence data from pos1 to pos2:
457
458                        sequence|eval(" \"mid(\";readdb(pos1);\";\";readdb(pos2);\")\" ")
459
460                        How the example works:
461
462                                The argument to eval is the escaped version of the command
463                                 '"mid(" ; readdb(pos1) ; ";" ; readdb(pos2) ; ")"'.
464                                 If pos1 contains '10' and pos2 contains '20' that command
465                                 evaluates to 'mid(10;20)'.
466
467                                 The resulting ACI for the example species
468                                 is 'sequence|eval("mid(10;20)")' which is equivalent
469                                 to 'sequence|mid(10;20)'.
470
471                define(name,escapedCommand)
472
473                        defines a ACI-macro 'name'. 'escapedCommand' contains an escaped
474                        ACI command sequence. This command sequence can be executed with
475                        do(name).
476
477                do(name)
478
479                        applies a previously defined ACI-macro to all input streams (see 'define').
480
481                        'define' followed by 'do' works similar to 'command'.
482                        See embl.eft for an example using define and 'do'
483
484                origin_organism(escapedCommand)
485                origin_gene(escapedCommand)
486
487                        like command() but readdb() etc. reads all data from the
488                        origin organism/gene of a gene-species (not from the gene-species itself).
489
490                        This function applies only to gene-species!
491
492SECTION         Future features
493
494                statistic
495
496                        creates a character statistic of the sequence
497                        (not implemented yet)
498
499EXAMPLES        sequence|format_sequence(firsttab=0;tab=10)|"SEQUENCE_";dd
500
501                                fetches the default sequence, formats it,
502                                and prepends 'SEQUENCE_'.
503
504                sequence|remove(".-")|format_sequence
505
506                                get the default sequence, remove all '.-' and
507                                format it
508
509                sequence|remove(".-")|len
510
511                                the number of non '.-' symbols (sequence length )
512
513                "[";taxonomy(tree_other,3);" -> ";taxonomy(3);"]"
514
515                                shows for each species how their taxonomy
516                                changed between "tree_other" and current tree
517
518                equals(readdb(tmp),readdb(acc))|select(echo("tmp and acc differ"),)
519
520                                returns 'tmp and acc differ' if the content of
521                                the database fields 'tmp' and 'acc' differs. empty result
522                                otherwise.
523
524                readdb(full_name)|icontains(bacillus)|compare(0)|select(echo(..),readdb(full_name))
525
526                                returns the content of the 'full_name' database entry if it contains
527                                the substring 'bacillus'. Otherwise returns '..'
528
529
530BUGS            The output of taxonomy() is not always instantly refreshed.
Note: See TracBrowser for help on using the repository browser.