source: branches/profile/HELP_SOURCE/oldhelp/aci.hlp

Last change on this file was 11080, checked in by westram, 12 years ago
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 23.2 KB
Line 
1#Please insert up references in the next lines (line starts with keyword UP)
2UP      arb.hlp
3UP      glossary.hlp
4
5#Please insert subtopic references  (line starts with keyword SUB)
6SUB     exec_bug.hlp
7
8# Hypertext links in helptext can be added like this: LINK{ref.hlp|http://add|bla@domain}
9
10#************* Title of helpfile !! and start of real helpfile ********
11
12TITLE           ARB Command Interpreter (ACI)
13
14OCCURRENCE      NDS
15                [ export db ]
16                [ ARB_NT/Species/search/parse_fields ]
17
18DESCRIPTION     The command interpreter is a simple interpreter.
19
20                All commands take the data from the input streams,
21                modify it and write it to the output
22                (which may be the input of the next command).
23
24                The first input stream often is the value of a database field
25                (see NDS for more information).
26
27                     e.g. count("a") counts every 'a' in each input stream and
28                     generates an output stream (== the sum of 'a') for every
29                     input.
30
31                Many commands have parameters which are specified behind
32                the command in parenthesis.
33
34                Different commands can be separated by:
35
36                        ';'     all !!! commands take all !!!  the input streams and
37                                each command generates its own output streams
38                        '|'     the output of the left commands are used as the input
39                                of the right.
40
41                e.g.
42
43                        count("A");count("AG")
44
45                                creates two streams:
46
47                                        1. how many A's
48                                        2. and how many A's and G's
49
50                        count("A");count("G")|per_cent
51
52                                per_cent is a command that divides two numbers
53                                (number of 'A's / number of 'G's) and returns the result
54                                as percent.
55
56                Finally all output streams are concatenated and
57
58                        - NDS:printed at the tips of the tree.
59                        - MODIFY DATABASE FIELD: stored in the destination field.
60
61SECTION Example data flow
62
63        eg: count("A");count("G")|"a/g = "; per_cent
64
65        input                                                         concatenate output
66        "AGG" ----> count("A") -->| -----> "a/g = " --> | --> "a/g = " ---> 'a/g = 50'
67              \                   | \ /                 |               /
68               \                  |  \                  |              /
69                \                 | / \                 |             /
70                 -> count("G") -->| -----> per_cent --> | --> "50" ---
71
72
73SECTION PARAMETERS
74
75        Several commands expect or accept additional parameters in
76        parenthesis (e.g. 'remove(aA)').
77
78        Multiple parameters have to be separated by ',' or ';'.
79
80        There are two distinct ways to specify such a parameter:
81        - unquoted
82
83          Unquoted parameters are taken as specified, but may not contain any of
84          the following characters: ',;"|)'
85
86        - quoted
87
88          Quoted parameters begin and end with a '"'. You can use any character,
89          but you need to escape '\' and '"' by preceeding a '\'.
90
91          Example: 'remove("\"")' will remove all double quotes from input.
92                   'remove("\\")' will remove all backslashes from input.
93
94        [@@@ behavior currently not strictly implemented]
95
96SECTION COMMANDLIST
97
98                If not otherwise mentioned every command creates one
99                output stream for each input stream.
100
101        STREAM HANDLING
102
103                echo(x1;x2;x3...)       creates an output stream for each
104                                        parameter 'x' and writes 'x' onto it.
105
106                "text"                  same as 'echo("text")'
107
108                dd                      copies all input streams to output streams
109
110                cut(N1,N2,N3)           copies the Nth input stream(s)
111
112                drop(N1,N2)             copies all but the Nth input stream(s)
113
114                dropempty               drops all empty input streams
115
116                dropzero                drops all non-numeric or zero input streams
117
118                swap(N1,N2)             swaps two input streams
119                                        (w/o parameters: swaps last two streams)
120
121                toback(X)               moves the Xth input stream
122                                        to the end of output streams
123
124                tofront(X)              moves the Xth input stream
125                                        to the start of output streams
126
127                merge([sep])            merges all input streams into one output stream.
128                                        If 'sep' is specified, it's inserted between them.
129                                        If no input streams are given, it returns 1 empty
130                                        input stream.
131
132                split([sep[,mode]])     splits all input streams at separator string 'sep'
133                                        (default: split at linefeed).
134
135                                        Modes:
136
137                                        0               remove found separators (default)
138                                        1               split before separator
139                                        2               split after separator
140
141                streams                 returns the number of input streams
142
143        STRING
144
145                head(n)                 the first n characters
146                left(n)                 the first n characters
147
148                tail(n)                 the last n characters
149                right(n)                the last n characters
150
151                                        the above functions return an empty string for n<=0
152
153                len                     the length of the input
154
155                len("chr")              the length of the input excluding the
156                                        characters in 'chr'
157
158                mid(x,y)                the substring string from position x to y
159
160                                        Allowed positions are
161                                        - [1..N] for mid()
162                                        - [0..N-1] for mid0()
163
164                                        A position below that range is relative to the end of the string,
165                                        i.e. mid(-2,0) and mid0(-3,-1) are equiv to tail(3)
166
167                crop("str")             removes characters of 'str' from
168                                        both ends of the input
169
170                remove("str");          removes all characters of 'str'
171                                        e.g. remove(" ") removes all blanks
172
173                keep("str");            the opposite of remove:
174                                        remove all chars that are not a member
175                                        of 'str'
176
177                srt("orig=dest",...)    replace command, invokes SRT
178                                        (see LINK{srt.hlp})
179
180                translate("old","new"[,"other"])
181
182                        translates all characters from input that occur in the
183                        first argument ("old") by the corresponding character of the
184                        second argument ("new").
185
186                        An optional third argument (one character only) means:
187                        replace all other characters with the third argument.
188
189                        Example:
190
191                                Input:                        "--AabBCcxXy--"
192                                translate("abc-","xyz-")      "--AxyBCzxXy--"
193                                translate("abc-","xyz-",".")  "--.xy..z...--"
194
195                        This can be used to replace illegal characters from sequence date
196                        (see predefined expressions in 'Modify fields of listed species').
197
198
199                tab(n)                  append n-len(input) spaces
200
201                pretab(n)               prepend n-len(input) spaces
202
203                upper                   converts string to upper case
204                lower                   converts string to lower case
205                caps                    capitalizes string
206
207                format(options)
208
209                    takes a long string and breaks it into several lines
210
211                        option       (default)     description
212                        ==========================================================
213                        width=#      (50)          line width
214                        firsttab=#   (10)          first line left indent
215                        tab=#        (10)          left indent (not first line)
216                        "nl=chrs"    (" ")         list of characters that specify
217                                                   a possibly point of a line break;
218                                                   This character is deleted !
219                        "forcenl=chrs" ("\n")      Force a newline at these characters.
220
221                    (see also format_sequence below)
222
223                extract_words("chars",val)
224
225                    Search for all words (separated by ',' ';' ':' ' ' or 'tab') that
226                    contain more characters of type chars than val, sort them
227                    alphabetically and write them separated by ' ' to the output
228
229        ESCAPING AND QUOTING
230
231                 escape         escapes all occurrances of '\' and '"' by preceeding a '\'
232                 quote          quotes the input in '"'
233
234                 unescape       inverse of escape
235                 unquote        removes quotes (if present). otherwise return input
236
237
238        STRING COMPARISON
239
240               compare(a,b)             return -1 if a<b, 0 if a=b, 1 if a>b
241               equals(a,b)              return 1 if a=b, 0 otherwise
242               contains(a,b)            if a contains b, this returns the position of
243                                        b inside a (1..N) and 0 otherwise.
244               partof(a,b)              if a is part of b, this returns the position of
245                                        a inside b (1..N) and 0 otherwise.
246
247               The above functions are binary operators (see below).
248               For each of them a case-insensitive alternative exists (icompare, iequals, ...).
249
250        CALCULATOR
251
252                plus                    add arguments
253                minus                   subtract arguments
254                mult                    multiply arguments
255                div                     divide arguments
256                per_cent                divide arguments * 100 (rounded)
257                rest                    divide arguments, take rest
258
259                The above functions work as binary operators (see below).
260
261                To avoid 'division by zero'-errors, the operators 'div', 'per_cent' and 'rest'
262                return 0 if the second argument is zero.
263
264                Calculation is performed with integer numbers.
265
266        BINARY OPERATORS
267
268               Several operators work as so called 'binary operators'.
269               These operators may be used in various ways, which are
270               shown using the operator 'plus':
271
272                     ACI                OUTPUT                  STREAMS
273                     plus(a,b)          a+b                     input:0 output:1
274                     a;b|plus           a+b                     input:2 output:1
275                     a;b;c;d|plus       a+b;c+d                 input:4 output:2
276                     a;b;c|operator(x)  a+x;b+x;c+x             input:3 output:3
277
278               That means, if the binary operator
279
280                    - has no arguments, it expects an even number of input streams. The operator is
281                      applied to the first 2 streams, then to the second 2 stream and so on.
282                      The number of output streams is half the number of input streams.
283                    - has 1 argument, it accepts one to many input streams. The operator
284                      is applied to each input stream together with the argument.
285                      For each input stream one output stream is generated.
286                    - has 2 arguments, it is applied to these. The arguments are interpreted as
287                      ACI commands and are applied for each input stream. The results of
288                      the commands are passed as arguments to the binary operator. For each input
289                      stream one output stream is generated.
290
291        CONDITIONAL
292
293                select(a,b,c,...)       each input stream is converted into a number
294                                        (non-numeric text converts to zero). That number is
295                                        used to select one of the given arguments:
296                                             0 selects 'a',
297                                             1 selects 'b',
298                                             2 selects 'c' and so on.
299                                        The selected argument is interpreted as ACI command
300                                        and is applied to an empty input stream.
301
302        DEBUGGING
303
304                trace(onoff)            toggle tracing of ACI actions to standard output.
305                                        Start arb from a terminal to see the output.
306                                        Parameter: 0 or 1 (switch off or on)
307
308                                        All streams are copied (like 'dd').
309
310        DATABASE AND SEQUENCE
311
312                readdb(field_name)      the contents of the field 'field_name'
313
314                sequence                the sequence in the current alignment.
315
316                                        Note: older ARB versions returned 'no sequence'
317                                        if the current alignment contained no sequence.
318                                        Now it returns an empty string.
319
320                                        For genes it returns only the corresponding part
321                                        of the sequence. If the field complement = 1 then the
322                                        result is the reverse-complement.
323
324                sequence_type           the sequence type of the selected alignment ('rna','dna',..)
325                ali_name                the name of the selected alignment (e.g. 'ali_16s')
326
327                Note: The commands above only work at the beginning of the ACI expression.
328
329                checksum(options)       calculates a CRC checksum
330                                        options:
331                                        "exclude=chrs"    remove 'chrs' before calculation
332                                        "toupper"         make everything uppercase first
333
334                gcgchecksum             a gcg compatible checksum
335
336                format_sequence(options)
337
338                        takes a long string ( sequence ) and breaks it into several lines
339
340                        option       (default)  description
341                        =============================================================
342                        width=#      (50)       sequence line width
343                        firsttab=#   (10)       first line left indent
344                        tab=#        (10)       left indent (not first line)
345                        numleft      (NO)       numbers on the left side
346                        gap=#        (10)       insert a gap every # seq. characters.
347
348                    (see also 'format' above)
349
350                extract_sequence("chars",rel_len)
351
352                        like extract_words, but do not sort words, but rel_len is the minimum
353                        percentage of characters of a word that mach a character in 'chars'
354                        before word is taken. All words will be separated by white space.
355
356                taxonomy([treename,] depth)
357
358                        Returns the taxonomy of the current species or group as defined by a tree.
359
360                        If 'treename' is specified, its used as tree, otherwise the 'default tree'
361                        is used (which in most cases is the tree displayed in the ARB_NT main window).
362
363                        'depth' specifies how many "levels" of the taxonomy are used.
364
365        FILTERING
366
367                There are several functions to filter sequential data:
368
369                      - filter
370                      - diff
371                      - gc
372
373                All these functions use the following COMMON OPTIONS to define
374                what is used as filter sequence:
375
376                    - species=name
377
378                      Use species 'name' as filter.
379
380                    - SAI=name
381
382                      Use SAI 'name' as filter.
383
384                    - first=1
385
386                      Use 1st input stream as filter for all other input streams.
387
388                    - pairwise=1
389
390                      Use 1st input stream as filter for 2nd stream,
391                      3rd stream as filter for 4th stream, and so on.
392
393                    - align=ali_name
394
395                      Use alignment 'ali_name' instead of current default
396                      alignment (only meaningful together with 'species' or 'SAI').
397
398                    Note: Only one of the parameters 'species', 'SAI', 'first' or 'pairwise' may be used.
399
400                diff(options)
401
402                        Calculates the difference between the filter (see common options above) and the input stream(s) and
403                        write the result to output stream(s).
404
405                        Additional options:
406
407                        - equal=x
408
409                          Character written to output if filter and stream are equal at
410                          a position (defaults to '.'). To copy the stream contents for
411                          equal columns, specify 'equal=' (directly followed by ',' or ')')
412
413                        - differ=y
414
415                          Character written to output if filter and stream don't match at one column position.
416                          Default is to copy the character from the stream.
417
418                filter(options)
419
420                        Filters only specified columns out of the input stream(s). You need to
421                        specify either
422
423                        - exclude=xyz
424
425                          to use all columns, where the filter (see common options above) has none
426                          of the characters 'xyz'
427
428                        or
429
430                        - include=xyz
431
432                          to use only columns, where the filter has one of the characters 'xyz'
433
434                        All used columns are concatenated and written to the output stream(s).
435
436
437                change(options)
438
439                        Randomly modifies the content of columns selected
440                        by the filter (see common options above).
441                        Only columns containing letters will be modified.
442
443                        The options 'include=xyz' and 'exclude=xyz' work like
444                        with 'filter()', but here they select the columns to modify - all other
445                        columns get copied unmodified.
446
447                        How the selected columns are modified, is specified by the following
448                        parameters:
449
450                                - change=percent
451
452                                  percentage of changed columns (default: silently change nothing, to make
453                                  it more difficult for you to ignore this helpfile)
454
455                                - to=xy
456
457                                  randomly change to one of the characters 'xy'.
458
459                                  Hints:
460
461                                        - Use 'xyy' to produce 33% 'x' and 66% 'y'
462                                        - Use 'xxxxxxxxxy' to produce 90% 'x' and 10% 'y'
463                                        - Use 'x' to replace all matching columns by 'x'
464
465                        I think the intention for this (long undocumented) command is to easily generate
466                        artificial sequences with different GC-content, in order to test treeing-software.
467
468        SPECIALS
469
470                exec(command[,param1,param2,...])
471
472                    Execute external (unix) command.
473
474                    Given params will be single-quoted and passed to the command.
475
476                    All input streams will be concatenated and piped into the command.
477
478                    When the command itself is a pipe, put it in parenthesis (e.g. "(sort|uniq)").
479                    Note: This won't work together with params.
480
481                    The result is the output of the command.
482
483                    WARNING!!!
484
485                        You better not use this command for NDS,
486                        because any slow command will disable all editing -> You never
487                        can remove this command from the NDS. Even arb_panic will not
488                        easily help you.
489
490                command(action)
491
492                        applies 'action' to all input streams using
493
494                                 - ACI,
495                                 - SRT (if starts with ':') (see LINK{srt.hlp})
496                                 - or as REG (if starts with '/') (see LINK{reg.hlp}).
497
498                        If you nest calls (i.e. if 'action' contains further calls to 'command') you have to apply
499                        escaping multiple times (e.g. inside an export filter - which is in fact an
500                        SRT expression - you'll have to use double escapes).
501
502                eval(exprEvalToAction)
503
504                        the 'exprEvalToAction' is evaluated (using an empty string as input)
505                        and the result is interpreted as action and gets applied to all
506                        input streams (as in 'command' above).
507
508                        Example: Said you have two numeric positions stored in database fields
509                                 'pos1' and 'pos2' for each species. Then the following command
510                                 extracts the sequence data from pos1 to pos2:
511
512                                 'sequence|eval(" \"mid(\";readdb(pos1);\";\";readdb(pos2);\")\" ")'
513
514                        How the example works:
515
516                            The argument is the escaped version of the
517                            command '"mid(" ; readdb(pos1) ; ";" ; readdb(pos2) ; ")"'.
518
519                            If pos1 contains '10' and pos2 contains '20' that command will
520                            evaluate to 'mid(10;20)'.
521
522                            For these positions the executed ACI behaves like 'sequence|mid(10;20)'.
523
524                define(name,escapedCommand)
525
526                        defines a ACI-macro 'name'. 'escapedCommand' contains an escaped
527                        ACI command sequence. This command sequence can be executed with
528                        do(name).
529
530                do(name)
531
532                        applies a previously defined ACI-macro to all input streams (see 'define').
533
534                        'define(a,action)' followed by 'do(a)' works similar to 'command(action)'.
535
536                        See embl.eft for an example using define and 'do'
537
538                origin_organism(action)
539                origin_gene(action)
540
541                        like command() but readdb() etc. reads all data from the
542                        origin organism/gene of a gene-species (not from the gene-species itself).
543
544                        This function applies only to gene-species!
545
546SECTION         Future features
547
548                statistic
549
550                        creates a character statistic of the sequence
551                        (not implemented yet)
552
553EXAMPLES        sequence|format_sequence(firsttab=0;tab=10)|"SEQUENCE_";dd
554
555                                fetches the default sequence, formats it,
556                                and prepends 'SEQUENCE_'.
557
558                sequence|remove(".-")|format_sequence
559
560                                get the default sequence, remove all '.-' and
561                                format it
562
563                sequence|remove(".-")|len
564
565                                the number of non '.-' symbols (sequence length )
566
567                "[";taxonomy(tree_other,3);" -> ";taxonomy(3);"]"
568
569                                shows for each species how their taxonomy
570                                changed between "tree_other" and current tree
571
572                equals(readdb(tmp),readdb(acc))|select(echo("tmp and acc differ"),)
573
574                                returns 'tmp and acc differ' if the content of
575                                the database fields 'tmp' and 'acc' differs. empty result
576                                otherwise.
577
578                readdb(full_name)|icontains(bacillus)|compare(0)|select(echo(..),readdb(full_name))
579
580                                returns the content of the 'full_name' database entry if it contains
581                                the substring 'bacillus'. Otherwise returns '..'
582
583
584BUGS            The output of taxonomy() is not always instantly refreshed.
Note: See TracBrowser for help on using the repository browser.