Changeset 7738 for branches/stable_5.0

Show
Ignore:
Timestamp:
22/07/11 21:18:41 (10 months ago)
Author:
westram
Message:
Files:
1 modified

Legend:

Unmodified
Added
Removed
  • branches/stable_5.0/PERL_SCRIPTS/SPECIES/markSpecies.pl

    r5991 r7738  
    2222# ------------------------------------------------------------ 
    2323 
    24 sub markSpecies(\%$$) { 
    25   my ($marklist_r, $wanted_mark, $clearRest) = @_; 
     24sub markSpecies(\%$$$$$) { 
     25  my ($marklist_r, $wanted_mark, $clearRest,$field,$ambiguous,$partial) = @_; 
    2626 
    2727  my $gb_main = ARB::open(":","rw"); 
     
    3232  my @count = (0,0); 
    3333 
     34  my %seen = (); 
     35  my $had_field = 0; 
     36 
    3437  for (my $gb_species = BIO::first_species($gb_main); 
    3538       $gb_species; 
     
    3740    { 
    3841      my $marked = ARB::read_flag($gb_species); 
    39       my $species_name = BIO::read_string($gb_species, "name"); 
    40       $species_name || expectError('read_string'); 
     42      my $field_content = BIO::read_string($gb_species, $field); 
    4143 
    42       if (defined $$marklist_r{$species_name}) { 
    43         if ($marked!=$wanted_mark) { 
    44           ARB::write_flag($gb_species,$wanted_mark); 
    45           $count[$wanted_mark]++; 
     44      if ($field_content) { 
     45        $had_field++; 
     46 
     47        my $matching_entry = undef; 
     48 
     49        if (defined $$marklist_r{$field_content}) { 
     50          $matching_entry = $field_content; 
    4651        } 
    47         delete $$marklist_r{$species_name}; 
    48       } 
    49       else { 
    50         if ($marked==$wanted_mark and $clearRest==1) { 
    51           ARB::write_flag($gb_species,1-$wanted_mark); 
    52           $count[1-$wanted_mark]++; 
     52        else { 
     53        MATCH: foreach (keys %$marklist_r) { 
     54            if ($field_content =~ /$_/) { 
     55              $matching_entry = $_; 
     56              last MATCH; 
     57            } 
     58          } 
     59        } 
     60 
     61        if (defined $matching_entry) { 
     62          if ($marked!=$wanted_mark) { 
     63            ARB::write_flag($gb_species,$wanted_mark); 
     64            $count[$wanted_mark]++; 
     65          } 
     66          if ($ambiguous==1) { 
     67            $seen{$matching_entry} = 1; 
     68          } 
     69          else { 
     70            # expect field content to be unique 
     71            # -> delete after use 
     72            delete $$marklist_r{$matching_entry}; 
     73          } 
     74        } 
     75        else { 
     76          if ($marked==$wanted_mark and $clearRest==1) { 
     77            ARB::write_flag($gb_species,1-$wanted_mark); 
     78            $count[1-$wanted_mark]++; 
     79          } 
    5380        } 
    5481      } 
    5582    } 
    5683 
     84  if ($ambiguous==1) { 
     85    # correct marklist 
     86    foreach (keys %seen) { delete $$marklist_r{$_}; } 
     87  } 
     88 
    5789  ARB::commit_transaction($gb_main); 
    5890  ARB::close($gb_main); 
     91 
     92  if ($had_field==0) { die "No species has a field named '$field'\n"; } 
    5993 
    6094  return ($count[1],$count[0]); 
     
    85119  my ($err) = @_; 
    86120  print "Purpose: Mark/unmark species in running ARB\n"; 
    87   print "Usage: markSpecies.pl [-unmark] [-keep] specieslist\n"; 
    88   print "       -unmark   Unmark species (default is to mark)\n"; 
    89   print "       -keep     Do not change rest (default is to unmark/mark rest)\n"; 
     121  print "Usage: markSpecies.pl [-unmark] [-keep] specieslist [field]\n"; 
     122  print "       -unmark     Unmark species (default is to mark)\n"; 
     123  print "       -keep       Do not change rest (default is to unmark/mark rest)\n"; 
     124  print "       -ambiguous  Allow field to be ambiguous (otherwise it has to be unique)\n"; 
     125  print "       -partial    Allow partial match for field content (slow!)\n"; 
    90126  print "\n"; 
    91   print "specieslist is a file containing one species per line\n"; 
     127  print "specieslist is a file containing one entry per line\n"; 
     128  print "   normally the entry will be the short name as used in your DB\n"; 
     129  print "   when you specify 'field' you may use other entries (e.g. 'acc')\n"; 
     130  print "\n"; 
    92131  print "Use '-' as filename to read from STDIN\n"; 
    93132  print "\n"; 
     
    101140  my $mark      = 1; 
    102141  my $clearRest = 1; 
     142  my $ambiguous = 0; 
     143  my $partial   = 0; 
    103144 
    104   while ($args>1) { 
     145  while (substr($ARGV[0],0,1) eq '-') { 
    105146    my $arg = shift @ARGV; 
    106147    if ($arg eq '-unmark') { $mark = 0; } 
    107148    elsif ($arg eq '-keep') { $clearRest = 0; } 
     149    elsif ($arg eq '-ambiguous') { $ambiguous = 1; } 
     150    elsif ($arg eq '-partial') { $partial = 1; } 
    108151    else { die_usage("Unknown switch '$arg'"); } 
    109152    $args--; 
    110153  } 
    111154 
    112   my $file = shift @ARGV; 
     155  my $file  = shift @ARGV; 
     156  my $field = shift @ARGV; 
     157  $field = 'name' if (not defined $field); 
     158 
    113159  my %marklist; 
    114160  buildMarklist($file,%marklist); 
    115   my ($marked,$unmarked) = markSpecies(%marklist,$mark,$clearRest); 
     161  my ($marked,$unmarked) = markSpecies(%marklist,$mark,$clearRest,$field,$ambiguous,$partial); 
    116162 
    117163  if ($marked>0) { print "Marked $marked species\n"; } 
     
    120166  my @notFound = keys %marklist; 
    121167  if (scalar(@notFound)) { 
    122     print "Some species were not found in database:\n"; 
     168    if ($field eq 'name') { 
     169      print "Some species were not found in database:\n"; 
     170    } 
     171    else { 
     172      print "Some entries did not match any species:\n"; 
     173    } 
    123174    foreach (@notFound) { print "- '$_'\n"; } 
    124175  }