source: trunk/lib/macros/ARB/SEQ_DATA/dashes2dots_at_sequenceends_of_marked.amc

Last change on this file was 12803, checked in by westram, 10 years ago
  • Property svn:eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 2.8 KB
Line 
1#!/usr/bin/perl -w
2use lib "$ENV{'ARBHOME'}/lib/";
3use ARB;
4
5$gb_main = ARB::open(":","r");
6if (! $gb_main ) {
7    $error = ARB::await_error();
8    print ("Error: $error\n");
9    exit 0;
10}
11
12sub message($) {
13  my ($msg) = @_;
14  BIO::message($gb_main, $msg);
15}
16
17sub error($) {
18  my ($msg) = @_;
19  $msg = "Error: ".$msg;
20  ARB::abort_transaction($gb_main); # this undoes all changes made by this script
21
22  ARB::begin_transaction($gb_main);
23  BIO::message($gb_main, $msg);
24  BIO::message($gb_main, "Script aborted!");
25  ARB::commit_transaction($gb_main);
26  die $msg;
27}
28
29
30ARB::begin_transaction($gb_main);
31
32my $gb_species        = BIO::first_marked_species($gb_main);
33my $current_alignment = BIO::get_default_alignment($gb_main);
34my $marked            = 0;
35my $corr_ends         = 0;
36my $corr_species      = 0;
37
38message("Checking '$current_alignment' sequence ends of marked species..");
39
40SPECIES: while ($gb_species) {
41  $marked++;
42  $gb_species = BIO::next_marked_species($gb_species);
43  last SPECIES if not $gb_species;
44
45  my $name = BIO::read_name($gb_species);
46  my $gb_sequence = BIO::find_sequence($gb_species, $current_alignment);
47
48  if ($gb_sequence) {
49    my $sequence = ARB::read_string($gb_sequence);
50
51    if ($sequence =~ /^([\.-]*)(.*[^\.-])([\.-]*)$/ig) {
52      my $start   = $1;
53      my $mid     = $2;
54      my $end     = $3;
55      my $changed = 0;
56
57      # translate '-' to '.' at sequence start
58      if ($start =~ /-/) {
59        $start =~ y/-/./;
60        $changed = 1;
61        $corr_ends++;
62      }
63      # translate '-' to '.' at sequence end
64      if ($end =~ /-/) {
65        $end =~ y/-/./;
66        $changed = 1;
67        $corr_ends++;
68      }
69
70      my $newSequence = $start.$mid.$end;
71
72      # some safety belts:
73      #
74      # 1. check length
75      my $oldLen = length($sequence);
76      my $newLen = length($newSequence);
77      if ($oldLen != $newLen) { error("sequence length of '$name' has changed"); }
78
79      # 2. compare checksums
80      my $old_crc = ARB::checksum($sequence,$oldLen,1,".-");
81      my $new_crc = ARB::checksum($newSequence,$newLen,1,".-");
82      if ($old_crc != $new_crc) { error("sequence checksum of '$name' changed"); }
83
84      if ($changed == 1) {
85        # store result in database
86        ARB::write_string($gb_sequence, $newSequence);
87        $corr_species++;
88      }
89    }
90    else {
91      error("Error in regexp - cannot analyze sequence");
92    }
93  } else {
94    message("Sequence '$name' has no data in alignment '$current_alignment' - skipped.");
95  }
96}
97
98if ($marked) {
99  if ($corr_ends == 0) {
100    message("No sequence ends of your $marked marked species needed correction.");
101  }
102  else {
103    message("Corrected $corr_ends sequence ends of $corr_species sequences (of $marked marked species).");
104  }
105}
106else {
107  message("Please mark all species where sequence ends should be corrected.");
108}
109
110ARB::commit_transaction($gb_main);
111ARB::close($gb_main);
Note: See TracBrowser for help on using the repository browser.