1 | #!/usr/bin/perl |
---|
2 | # ================================================================ # |
---|
3 | # # |
---|
4 | # File : genbank_gen_long_features.pl # |
---|
5 | # Purpose : modifies genbank feature table for easy scanning # |
---|
6 | # with ARBs ift # |
---|
7 | # # |
---|
8 | # Coded by Ralf Westram (coder@reallysoft.de) in November 2007 # |
---|
9 | # Institute of Microbiology (Technical University Munich) # |
---|
10 | # http://www.arb-home.de/ # |
---|
11 | # # |
---|
12 | # ================================================================ # |
---|
13 | |
---|
14 | use strict; |
---|
15 | use warnings; |
---|
16 | |
---|
17 | my $last_feature = undef; |
---|
18 | my $line_number = 0; |
---|
19 | my $line_to_print = undef; |
---|
20 | |
---|
21 | eval { |
---|
22 | my $mode = 0; |
---|
23 | foreach (<STDIN>) { |
---|
24 | $line_number++; |
---|
25 | if ($mode==0) { # copy all till feature table |
---|
26 | if (/^FEATURES/o) { $mode = 1; } |
---|
27 | print $_; |
---|
28 | } |
---|
29 | elsif ($mode==1) { # reformat feature table (qualifiers) |
---|
30 | if (/^ /o) { # qualifier-line |
---|
31 | my ($white,$rest) = ($&,$'); |
---|
32 | |
---|
33 | chomp($rest); |
---|
34 | if ($rest =~ /^\//o) { # start of qualifier |
---|
35 | if (defined $line_to_print) { print $line_to_print."\n"; $line_to_print=undef; } |
---|
36 | # $line_to_print = $white.$rest; |
---|
37 | $line_to_print = 'FTx '.$last_feature.substr(" ",length($last_feature)).$rest; |
---|
38 | } |
---|
39 | else { |
---|
40 | if (not defined $line_to_print) { |
---|
41 | die "Found continued qualifier line (expected start of qualifier or new feature)" |
---|
42 | } |
---|
43 | $line_to_print .= $rest; |
---|
44 | } |
---|
45 | } |
---|
46 | elsif (/^ORIGIN/) { |
---|
47 | if (defined $line_to_print) { print $line_to_print."\n"; $line_to_print=undef; } |
---|
48 | print $_; |
---|
49 | |
---|
50 | $mode=2; # switch mode |
---|
51 | } |
---|
52 | else { # new feature |
---|
53 | if (defined $line_to_print) { print $line_to_print."\n"; $line_to_print=undef; } |
---|
54 | if (/^( )([a-z_]+)( .*)$/io) { # checked - really new feature |
---|
55 | my ($white1,$feature,$rest) = ($1,$2,$3); |
---|
56 | $last_feature = $feature; |
---|
57 | chomp($rest); |
---|
58 | $line_to_print = "FT ".$feature.$rest; |
---|
59 | } |
---|
60 | else { |
---|
61 | die "Unexpected case (expected new feature)"; |
---|
62 | } |
---|
63 | } |
---|
64 | } |
---|
65 | else { # mode==2 -> copy sequence |
---|
66 | print $_; |
---|
67 | if ($_ eq "//\n") { |
---|
68 | $mode = 0; # reset mode |
---|
69 | } |
---|
70 | } |
---|
71 | } |
---|
72 | if (defined $line_to_print) { die "Unexpected content in internal feature-buffer"; } |
---|
73 | }; |
---|
74 | if ($@) { |
---|
75 | chomp $@; |
---|
76 | die "$@ in line $line_number of inputfile\n"; |
---|
77 | } |
---|