Changeset 7737
- Timestamp:
- 22/07/11 21:16:18 (10 months ago)
- Files:
-
- 1 modified
-
trunk/PERL_SCRIPTS/SPECIES/markSpecies.pl (modified) (6 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/PERL_SCRIPTS/SPECIES/markSpecies.pl
r5991 r7737 22 22 # ------------------------------------------------------------ 23 23 24 sub markSpecies(\%$$ ) {25 my ($marklist_r, $wanted_mark, $clearRest ) = @_;24 sub markSpecies(\%$$$$$) { 25 my ($marklist_r, $wanted_mark, $clearRest,$field,$ambiguous,$partial) = @_; 26 26 27 27 my $gb_main = ARB::open(":","rw"); … … 32 32 my @count = (0,0); 33 33 34 my %seen = (); 35 my $had_field = 0; 36 34 37 for (my $gb_species = BIO::first_species($gb_main); 35 38 $gb_species; … … 37 40 { 38 41 my $marked = ARB::read_flag($gb_species); 39 my $species_name = BIO::read_string($gb_species, "name"); 40 $species_name || expectError('read_string'); 42 my $field_content = BIO::read_string($gb_species, $field); 41 43 42 if (defined $$marklist_r{$species_name}) { 43 if ($marked!=$wanted_mark) { 44 ARB::write_flag($gb_species,$wanted_mark); 45 $count[$wanted_mark]++; 44 if ($field_content) { 45 $had_field++; 46 47 my $matching_entry = undef; 48 49 if (defined $$marklist_r{$field_content}) { 50 $matching_entry = $field_content; 46 51 } 47 delete $$marklist_r{$species_name}; 48 } 49 else { 50 if ($marked==$wanted_mark and $clearRest==1) { 51 ARB::write_flag($gb_species,1-$wanted_mark); 52 $count[1-$wanted_mark]++; 52 else { 53 MATCH: foreach (keys %$marklist_r) { 54 if ($field_content =~ /$_/) { 55 $matching_entry = $_; 56 last MATCH; 57 } 58 } 59 } 60 61 if (defined $matching_entry) { 62 if ($marked!=$wanted_mark) { 63 ARB::write_flag($gb_species,$wanted_mark); 64 $count[$wanted_mark]++; 65 } 66 if ($ambiguous==1) { 67 $seen{$matching_entry} = 1; 68 } 69 else { 70 # expect field content to be unique 71 # -> delete after use 72 delete $$marklist_r{$matching_entry}; 73 } 74 } 75 else { 76 if ($marked==$wanted_mark and $clearRest==1) { 77 ARB::write_flag($gb_species,1-$wanted_mark); 78 $count[1-$wanted_mark]++; 79 } 53 80 } 54 81 } 55 82 } 56 83 84 if ($ambiguous==1) { 85 # correct marklist 86 foreach (keys %seen) { delete $$marklist_r{$_}; } 87 } 88 57 89 ARB::commit_transaction($gb_main); 58 90 ARB::close($gb_main); 91 92 if ($had_field==0) { die "No species has a field named '$field'\n"; } 59 93 60 94 return ($count[1],$count[0]); … … 85 119 my ($err) = @_; 86 120 print "Purpose: Mark/unmark species in running ARB\n"; 87 print "Usage: markSpecies.pl [-unmark] [-keep] specieslist\n"; 88 print " -unmark Unmark species (default is to mark)\n"; 89 print " -keep Do not change rest (default is to unmark/mark rest)\n"; 121 print "Usage: markSpecies.pl [-unmark] [-keep] specieslist [field]\n"; 122 print " -unmark Unmark species (default is to mark)\n"; 123 print " -keep Do not change rest (default is to unmark/mark rest)\n"; 124 print " -ambiguous Allow field to be ambiguous (otherwise it has to be unique)\n"; 125 print " -partial Allow partial match for field content (slow!)\n"; 90 126 print "\n"; 91 print "specieslist is a file containing one species per line\n"; 127 print "specieslist is a file containing one entry per line\n"; 128 print " normally the entry will be the short name as used in your DB\n"; 129 print " when you specify 'field' you may use other entries (e.g. 'acc')\n"; 130 print "\n"; 92 131 print "Use '-' as filename to read from STDIN\n"; 93 132 print "\n"; … … 101 140 my $mark = 1; 102 141 my $clearRest = 1; 142 my $ambiguous = 0; 143 my $partial = 0; 103 144 104 while ( $args>1) {145 while (substr($ARGV[0],0,1) eq '-') { 105 146 my $arg = shift @ARGV; 106 147 if ($arg eq '-unmark') { $mark = 0; } 107 148 elsif ($arg eq '-keep') { $clearRest = 0; } 149 elsif ($arg eq '-ambiguous') { $ambiguous = 1; } 150 elsif ($arg eq '-partial') { $partial = 1; } 108 151 else { die_usage("Unknown switch '$arg'"); } 109 152 $args--; 110 153 } 111 154 112 my $file = shift @ARGV; 155 my $file = shift @ARGV; 156 my $field = shift @ARGV; 157 $field = 'name' if (not defined $field); 158 113 159 my %marklist; 114 160 buildMarklist($file,%marklist); 115 my ($marked,$unmarked) = markSpecies(%marklist,$mark,$clearRest );161 my ($marked,$unmarked) = markSpecies(%marklist,$mark,$clearRest,$field,$ambiguous,$partial); 116 162 117 163 if ($marked>0) { print "Marked $marked species\n"; } … … 120 166 my @notFound = keys %marklist; 121 167 if (scalar(@notFound)) { 122 print "Some species were not found in database:\n"; 168 if ($field eq 'name') { 169 print "Some species were not found in database:\n"; 170 } 171 else { 172 print "Some entries did not match any species:\n"; 173 } 123 174 foreach (@notFound) { print "- '$_'\n"; } 124 175 }
