source: branches/tree/HELP_SOURCE/source/importift.hlp

Last change on this file was 18769, checked in by westram, 3 years ago
  • move all helpfiles to new source location
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 7.3 KB
Line 
1#Please insert up references in the next lines (line starts with keyword UP)
2UP      arb.hlp
3UP      glossary.hlp
4
5#Please insert subtopic references  (line starts with keyword SUB)
6SUB     srt.hlp
7SUB     aci.hlp
8
9# Hypertext links in helptext can be added like this: LINK{ref.hlp|http://add|bla@domain}
10
11#************* Title of helpfile !! and start of real helpfile ********
12TITLE           How to define new import formats
13
14OCCURRENCE      ARB_NT
15
16SECTION         BRIEF DESCRIPTION
17
18                Import filters delivered together with ARB are located in
19                the directory '$ARBHOME/lib/import'. Their file extension
20                has to be '.ift'!
21
22                To customize your own import filters, click on
23                the 'Test'-button in the import window (see LINK{import_test.hlp}).
24
25                    Note: If multiple users should be able to use a customized filter, you need
26                    to copy that filter from '~/.arb_prop/filter' into '$ARBHOME/lib/import'.
27
28                Each of the import filters describes how to analyze and read files
29                of one specific format.
30
31                A basic import description file (.ift) looks like this:
32
33                        [AUTODETECT     "Matchpattern"]
34                        BEGIN           "Matchpattern"
35                        [KEYWIDTH       #Columnnumber]
36                        [AUTOTAG        ["TAGNAME"]]
37                        [IFNOTSET       x "Reason why x is not set"]+
38                        [SETGLOBAL      x "global value"]+
39                        [INCLUDE        "file"]+
40                        [DESCRIPTION    "text describing import filter"]+
41                        [MATCH          "Matchpattern"
42                                [SRT            "SRT_STRING"]
43                                [ACI            "ACI_STRING"]
44                                [TAG            "TAGNAME"]
45                                [WRITE          "DB_FIELD_NAME"]
46                                [WRITE_INT      "DB_FIELD_NAME"]
47                                [WRITE_FLOAT    "DB_FIELD_NAME"]
48                                [APPEND         "DB_FIELD_NAME"]
49                                [SETVAR         x]
50                        ]*
51                        SEQUENCESTART   "Matchpattern"
52                        SEQUENCECOLUMN  #Columnnumber
53                        [SEQUENCESRT    "SRT_STRING"]
54                        [SEQUENCEACI    "ACI_STRING"]
55                        SEQUENCEEND     "STRING"
56                        [CREATE_ACC_FROM_SEQUENCE]
57                        [DONT_GEN_NAMES]
58                        END             "STRING"
59
60                or it can pipe the data through any external program PROGRAM to
61                convert it to an already existing format 'exformat'
62                using the following basic design:
63
64                [AUTODETECT     "Matchpattern"]
65                SYSTEM          "PROGRAM $< $>"
66                NEW_FORMAT      "lib/import/exformat.ift"
67
68                $< will be replaced by the input file name
69                $> will ve replaced by the intermediate file name
70
71DESCRIPTION     First of all the converter appends all import files maching
72                the filepattern into one file. The files are separated by the
73                string defined with the keyword  SEQUENCEEND.
74
75                1. Search the first line matching the pattern defined by BEGIN
76
77                2. Try to match all MATCH_patterns.
78
79                   For all lines that match do:
80
81                        - append all following lines, which start after
82                          column KEYWIDTH
83
84                        - run commands with the concatenated lines
85
86                   Known commands are (they are executed in the order listed here):
87
88                         - SRT "SRT_STRING"
89
90                               start the string replace tool on the current result and
91                               set the output as current result (see LINK{srt.hlp}).
92
93                         - ACI "ACI_STRING"
94
95                               run the arb command interpreter to change the current result (see LINK{aci.hlp}).
96
97                         - TAG "TAGNAME"
98
99                               tag information (i.e. "[EBI] 1997 [RDP] 1998")
100
101                         - WRITE "DB_FIELD_NAME"
102
103                               write the current result into DB_FIELD_NAME
104
105                         - WRITE_INT "DB_FIELD_NAME"
106
107                               like WRITE, but expect integer target field
108
109                         - WRITE_FLOAT "DB_FIELD_NAME"
110
111                               like WRITE, but expect floating-point target field
112
113                         - APPEND "DB_FIELD_NAME"
114
115                               append the current result to DB_FIELD_NAME
116
117                         - SETVAR x
118
119                               store the current result in the variable x, where x may be any character.
120                               After it was set this variable can be referenced by using $x in any
121                               command expression (SRT_STRING,ACI_STRING,TAGNAME,DB_FIELD_NAME).
122
123                               For each used variable there has to be defined an error reason
124                               describing, what's wrong if the variable has NOT been set.
125                               Define error reasons using
126
127                                      IFNOTSET x "Reason why x is not set"
128
129                               Note: use '$$' to insert a single '$'.
130
131                               Allowed variable names are 'a' to 'z'.
132
133                   Note: Every of these commands may only occur once in one MATCH rule.
134                         To run some of them multiple, create multiple MATCH rules.
135
136                3. If the line matches SEQUENCESTART_pattern, assume that
137                   all following lines to and except the line
138                   matching SEQUENCEEND_pattern contain the sequence data.
139
140                4. GOTO 1
141
142                Postprocesses:
143
144                        CREATE_ACC_FROM_SEQUENCE:
145
146                                Generate a checksum for all sequences with no accession
147                                entry ('acc' -field) and write it as the accession number
148
149                        DONT_GEN_NAMES:
150
151                                Do not try to generate unique identifiers (shortnames) for
152                                the species using the full_name field.
153
154                General commands:
155
156                        INCLUDE "filename"
157
158                                Simply inserts the contents of "filename" at the current position.
159
160                                It's possible to declare variables in the file where the INCLUDE
161                                happens and to use them in the included file. (Example:
162                                longebi.ift, longgenbank.ift and feature_table.ift in subdir nonformats)
163
164                        SETGLOBAL x "value"
165
166                                Sets global variable 'x' to 'value'.
167
168                        AUTOTAG ["TAGNAME"]
169
170                                If set, act like each MATCH rule has a
171                                   TAG "TAGNAME"
172                                entry. Use AUTOTAG w/o parameter to reset
173                                to default behavior.
174
175EXAMPLES        Look at the files in '$ARBHOME/lib/import'
176
177WARNINGS        Format detection does not always work
Note: See TracBrowser for help on using the repository browser.