| 1 | #!/usr/bin/perl -w |
|---|
| 2 | use lib "$ENV{'ARBHOME'}/lib/"; |
|---|
| 3 | use ARB; |
|---|
| 4 | |
|---|
| 5 | $gb_main = ARB::open(":","r"); |
|---|
| 6 | if (! $gb_main ) { |
|---|
| 7 | $error = ARB::await_error(); |
|---|
| 8 | print ("Error: $error\n"); |
|---|
| 9 | exit 0; |
|---|
| 10 | } |
|---|
| 11 | |
|---|
| 12 | sub message($) { |
|---|
| 13 | my ($msg) = @_; |
|---|
| 14 | BIO::message($gb_main, $msg); |
|---|
| 15 | } |
|---|
| 16 | |
|---|
| 17 | sub error($) { |
|---|
| 18 | my ($msg) = @_; |
|---|
| 19 | $msg = "Error: ".$msg; |
|---|
| 20 | ARB::abort_transaction($gb_main); # this undoes all changes made by this script |
|---|
| 21 | |
|---|
| 22 | ARB::begin_transaction($gb_main); |
|---|
| 23 | BIO::message($gb_main, $msg); |
|---|
| 24 | BIO::message($gb_main, "Script aborted!"); |
|---|
| 25 | ARB::commit_transaction($gb_main); |
|---|
| 26 | die $msg; |
|---|
| 27 | } |
|---|
| 28 | |
|---|
| 29 | |
|---|
| 30 | ARB::begin_transaction($gb_main); |
|---|
| 31 | |
|---|
| 32 | my $gb_species = BIO::first_marked_species($gb_main); |
|---|
| 33 | my $current_alignment = BIO::get_default_alignment($gb_main); |
|---|
| 34 | my $marked = 0; |
|---|
| 35 | my $corr_ends = 0; |
|---|
| 36 | my $corr_species = 0; |
|---|
| 37 | |
|---|
| 38 | message("Checking '$current_alignment' sequence ends of marked species.."); |
|---|
| 39 | |
|---|
| 40 | SPECIES: while ($gb_species) { |
|---|
| 41 | $marked++; |
|---|
| 42 | $gb_species = BIO::next_marked_species($gb_species); |
|---|
| 43 | last SPECIES if not $gb_species; |
|---|
| 44 | |
|---|
| 45 | my $name = BIO::read_name($gb_species); |
|---|
| 46 | my $gb_sequence = BIO::find_sequence($gb_species, $current_alignment); |
|---|
| 47 | |
|---|
| 48 | if ($gb_sequence) { |
|---|
| 49 | my $sequence = ARB::read_string($gb_sequence); |
|---|
| 50 | |
|---|
| 51 | if ($sequence =~ /^([\.-]*)(.*[^\.-])([\.-]*)$/ig) { |
|---|
| 52 | my $start = $1; |
|---|
| 53 | my $mid = $2; |
|---|
| 54 | my $end = $3; |
|---|
| 55 | my $changed = 0; |
|---|
| 56 | |
|---|
| 57 | # translate '-' to '.' at sequence start |
|---|
| 58 | if ($start =~ /-/) { |
|---|
| 59 | $start =~ y/-/./; |
|---|
| 60 | $changed = 1; |
|---|
| 61 | $corr_ends++; |
|---|
| 62 | } |
|---|
| 63 | # translate '-' to '.' at sequence end |
|---|
| 64 | if ($end =~ /-/) { |
|---|
| 65 | $end =~ y/-/./; |
|---|
| 66 | $changed = 1; |
|---|
| 67 | $corr_ends++; |
|---|
| 68 | } |
|---|
| 69 | |
|---|
| 70 | my $newSequence = $start.$mid.$end; |
|---|
| 71 | |
|---|
| 72 | # some safety belts: |
|---|
| 73 | # |
|---|
| 74 | # 1. check length |
|---|
| 75 | my $oldLen = length($sequence); |
|---|
| 76 | my $newLen = length($newSequence); |
|---|
| 77 | if ($oldLen != $newLen) { error("sequence length of '$name' has changed"); } |
|---|
| 78 | |
|---|
| 79 | # 2. compare checksums |
|---|
| 80 | my $old_crc = ARB::checksum($sequence,$oldLen,1,".-"); |
|---|
| 81 | my $new_crc = ARB::checksum($newSequence,$newLen,1,".-"); |
|---|
| 82 | if ($old_crc != $new_crc) { error("sequence checksum of '$name' changed"); } |
|---|
| 83 | |
|---|
| 84 | if ($changed == 1) { |
|---|
| 85 | # store result in database |
|---|
| 86 | ARB::write_string($gb_sequence, $newSequence); |
|---|
| 87 | $corr_species++; |
|---|
| 88 | } |
|---|
| 89 | } |
|---|
| 90 | else { |
|---|
| 91 | error("Error in regexp - cannot analyze sequence"); |
|---|
| 92 | } |
|---|
| 93 | } else { |
|---|
| 94 | message("Sequence '$name' has no data in alignment '$current_alignment' - skipped."); |
|---|
| 95 | } |
|---|
| 96 | } |
|---|
| 97 | |
|---|
| 98 | if ($marked) { |
|---|
| 99 | if ($corr_ends == 0) { |
|---|
| 100 | message("No sequence ends of your $marked marked species needed correction."); |
|---|
| 101 | } |
|---|
| 102 | else { |
|---|
| 103 | message("Corrected $corr_ends sequence ends of $corr_species sequences (of $marked marked species)."); |
|---|
| 104 | } |
|---|
| 105 | } |
|---|
| 106 | else { |
|---|
| 107 | message("Please mark all species where sequence ends should be corrected."); |
|---|
| 108 | } |
|---|
| 109 | |
|---|
| 110 | ARB::commit_transaction($gb_main); |
|---|
| 111 | ARB::close($gb_main); |
|---|