1 | #!/usr/bin/perl -w |
---|
2 | use lib "$ENV{'ARBHOME'}/lib/"; |
---|
3 | use ARB; |
---|
4 | |
---|
5 | $gb_main = ARB::open(":","r"); |
---|
6 | if (! $gb_main ) { |
---|
7 | $error = ARB::await_error(); |
---|
8 | print ("Error: $error\n"); |
---|
9 | exit 0; |
---|
10 | } |
---|
11 | |
---|
12 | sub message($) { |
---|
13 | my ($msg) = @_; |
---|
14 | BIO::message($gb_main, $msg); |
---|
15 | } |
---|
16 | |
---|
17 | sub error($) { |
---|
18 | my ($msg) = @_; |
---|
19 | $msg = "Error: ".$msg; |
---|
20 | ARB::abort_transaction($gb_main); # this undoes all changes made by this script |
---|
21 | |
---|
22 | ARB::begin_transaction($gb_main); |
---|
23 | BIO::message($gb_main, $msg); |
---|
24 | BIO::message($gb_main, "Script aborted!"); |
---|
25 | ARB::commit_transaction($gb_main); |
---|
26 | die $msg; |
---|
27 | } |
---|
28 | |
---|
29 | |
---|
30 | ARB::begin_transaction($gb_main); |
---|
31 | |
---|
32 | my $gb_species = BIO::first_marked_species($gb_main); |
---|
33 | my $current_alignment = BIO::get_default_alignment($gb_main); |
---|
34 | my $marked = 0; |
---|
35 | my $corr_ends = 0; |
---|
36 | my $corr_species = 0; |
---|
37 | |
---|
38 | message("Checking '$current_alignment' sequence ends of marked species.."); |
---|
39 | |
---|
40 | SPECIES: while ($gb_species) { |
---|
41 | $marked++; |
---|
42 | $gb_species = BIO::next_marked_species($gb_species); |
---|
43 | last SPECIES if not $gb_species; |
---|
44 | |
---|
45 | my $name = BIO::read_name($gb_species); |
---|
46 | my $gb_sequence = BIO::find_sequence($gb_species, $current_alignment); |
---|
47 | |
---|
48 | if ($gb_sequence) { |
---|
49 | my $sequence = ARB::read_string($gb_sequence); |
---|
50 | |
---|
51 | if ($sequence =~ /^([\.-]*)(.*[^\.-])([\.-]*)$/ig) { |
---|
52 | my $start = $1; |
---|
53 | my $mid = $2; |
---|
54 | my $end = $3; |
---|
55 | my $changed = 0; |
---|
56 | |
---|
57 | # translate '-' to '.' at sequence start |
---|
58 | if ($start =~ /-/) { |
---|
59 | $start =~ y/-/./; |
---|
60 | $changed = 1; |
---|
61 | $corr_ends++; |
---|
62 | } |
---|
63 | # translate '-' to '.' at sequence end |
---|
64 | if ($end =~ /-/) { |
---|
65 | $end =~ y/-/./; |
---|
66 | $changed = 1; |
---|
67 | $corr_ends++; |
---|
68 | } |
---|
69 | |
---|
70 | my $newSequence = $start.$mid.$end; |
---|
71 | |
---|
72 | # some safety belts: |
---|
73 | # |
---|
74 | # 1. check length |
---|
75 | my $oldLen = length($sequence); |
---|
76 | my $newLen = length($newSequence); |
---|
77 | if ($oldLen != $newLen) { error("sequence length of '$name' has changed"); } |
---|
78 | |
---|
79 | # 2. compare checksums |
---|
80 | my $old_crc = ARB::checksum($sequence,$oldLen,1,".-"); |
---|
81 | my $new_crc = ARB::checksum($newSequence,$newLen,1,".-"); |
---|
82 | if ($old_crc != $new_crc) { error("sequence checksum of '$name' changed"); } |
---|
83 | |
---|
84 | if ($changed == 1) { |
---|
85 | # store result in database |
---|
86 | ARB::write_string($gb_sequence, $newSequence); |
---|
87 | $corr_species++; |
---|
88 | } |
---|
89 | } |
---|
90 | else { |
---|
91 | error("Error in regexp - cannot analyze sequence"); |
---|
92 | } |
---|
93 | } else { |
---|
94 | message("Sequence '$name' has no data in alignment '$current_alignment' - skipped."); |
---|
95 | } |
---|
96 | } |
---|
97 | |
---|
98 | if ($marked) { |
---|
99 | if ($corr_ends == 0) { |
---|
100 | message("No sequence ends of your $marked marked species needed correction."); |
---|
101 | } |
---|
102 | else { |
---|
103 | message("Corrected $corr_ends sequence ends of $corr_species sequences (of $marked marked species)."); |
---|
104 | } |
---|
105 | } |
---|
106 | else { |
---|
107 | message("Please mark all species where sequence ends should be corrected."); |
---|
108 | } |
---|
109 | |
---|
110 | ARB::commit_transaction($gb_main); |
---|
111 | ARB::close($gb_main); |
---|