| 1 | #!/usr/bin/perl |
|---|
| 2 | # ================================================================ # |
|---|
| 3 | # # |
|---|
| 4 | # File : genbank_gen_long_features.pl # |
|---|
| 5 | # Purpose : modifies genbank feature table for easy scanning # |
|---|
| 6 | # with ARBs ift # |
|---|
| 7 | # # |
|---|
| 8 | # Coded by Ralf Westram (coder@reallysoft.de) in November 2007 # |
|---|
| 9 | # Institute of Microbiology (Technical University Munich) # |
|---|
| 10 | # http://www.arb-home.de/ # |
|---|
| 11 | # # |
|---|
| 12 | # ================================================================ # |
|---|
| 13 | |
|---|
| 14 | use strict; |
|---|
| 15 | use warnings; |
|---|
| 16 | |
|---|
| 17 | my $last_feature = undef; |
|---|
| 18 | my $line_number = 0; |
|---|
| 19 | my $line_to_print = undef; |
|---|
| 20 | |
|---|
| 21 | eval { |
|---|
| 22 | my $mode = 0; |
|---|
| 23 | foreach (<STDIN>) { |
|---|
| 24 | $line_number++; |
|---|
| 25 | if ($mode==0) { # copy all till feature table |
|---|
| 26 | if (/^FEATURES/o) { $mode = 1; } |
|---|
| 27 | print $_; |
|---|
| 28 | } |
|---|
| 29 | elsif ($mode==1) { # reformat feature table (qualifiers) |
|---|
| 30 | if (/^ /o) { # qualifier-line |
|---|
| 31 | my ($white,$rest) = ($&,$'); |
|---|
| 32 | |
|---|
| 33 | chomp($rest); |
|---|
| 34 | if ($rest =~ /^\//o) { # start of qualifier |
|---|
| 35 | if (defined $line_to_print) { print $line_to_print."\n"; $line_to_print=undef; } |
|---|
| 36 | # $line_to_print = $white.$rest; |
|---|
| 37 | $line_to_print = 'FTx '.$last_feature.substr(" ",length($last_feature)).$rest; |
|---|
| 38 | } |
|---|
| 39 | else { |
|---|
| 40 | if (not defined $line_to_print) { |
|---|
| 41 | die "Found continued qualifier line (expected start of qualifier or new feature)" |
|---|
| 42 | } |
|---|
| 43 | $line_to_print .= $rest; |
|---|
| 44 | } |
|---|
| 45 | } |
|---|
| 46 | elsif (/^ORIGIN/) { |
|---|
| 47 | if (defined $line_to_print) { print $line_to_print."\n"; $line_to_print=undef; } |
|---|
| 48 | print $_; |
|---|
| 49 | |
|---|
| 50 | $mode=2; # switch mode |
|---|
| 51 | } |
|---|
| 52 | else { # new feature |
|---|
| 53 | if (defined $line_to_print) { print $line_to_print."\n"; $line_to_print=undef; } |
|---|
| 54 | if (/^( )([a-z_]+)( .*)$/io) { # checked - really new feature |
|---|
| 55 | my ($white1,$feature,$rest) = ($1,$2,$3); |
|---|
| 56 | $last_feature = $feature; |
|---|
| 57 | chomp($rest); |
|---|
| 58 | $line_to_print = "FT ".$feature.$rest; |
|---|
| 59 | } |
|---|
| 60 | else { |
|---|
| 61 | die "Unexpected case (expected new feature)"; |
|---|
| 62 | } |
|---|
| 63 | } |
|---|
| 64 | } |
|---|
| 65 | else { # mode==2 -> copy sequence |
|---|
| 66 | print $_; |
|---|
| 67 | if ($_ eq "//\n") { |
|---|
| 68 | $mode = 0; # reset mode |
|---|
| 69 | } |
|---|
| 70 | } |
|---|
| 71 | } |
|---|
| 72 | if (defined $line_to_print) { die "Unexpected content in internal feature-buffer"; } |
|---|
| 73 | }; |
|---|
| 74 | if ($@) { |
|---|
| 75 | chomp $@; |
|---|
| 76 | die "$@ in line $line_number of inputfile\n"; |
|---|
| 77 | } |
|---|