1 | #!/usr/bin/perl |
---|
2 | # ================================================================ # |
---|
3 | # # |
---|
4 | # File : useFieldAsID.pl # |
---|
5 | # Purpose : hack IDs in ARB database # |
---|
6 | # # |
---|
7 | # Coded by Ralf Westram (coder@reallysoft.de) in February 2018 # |
---|
8 | # http://www.arb-home.de/ # |
---|
9 | # # |
---|
10 | # ================================================================ # |
---|
11 | |
---|
12 | use strict; |
---|
13 | use warnings; |
---|
14 | |
---|
15 | BEGIN { |
---|
16 | if (not exists $ENV{'ARBHOME'}) { die "Environment variable \$ARBHOME has to be defined"; } |
---|
17 | my $arbhome = $ENV{'ARBHOME'}; |
---|
18 | push @INC, "$arbhome/lib"; |
---|
19 | push @INC, "$arbhome/PERL_SCRIPTS/lib"; |
---|
20 | 1; |
---|
21 | } |
---|
22 | |
---|
23 | use ARB; |
---|
24 | use tools; |
---|
25 | |
---|
26 | sub hackIDs($) { |
---|
27 | my ($srcField) = @_; |
---|
28 | |
---|
29 | print "Overwriting field 'name' with content of '$srcField' ..\n"; |
---|
30 | |
---|
31 | my $gb_main = ARB::open(":","r"); |
---|
32 | $gb_main || expectError('db connect (no running ARB?)'); |
---|
33 | |
---|
34 | dieOnError(ARB::begin_transaction($gb_main), 'begin_transaction'); |
---|
35 | |
---|
36 | print " - scanning contents of '$srcField'\n"; |
---|
37 | |
---|
38 | my $unique = 1; |
---|
39 | { |
---|
40 | my %content = (); |
---|
41 | for (my $gb_species = BIO::first_species($gb_main); |
---|
42 | $gb_species and $unique; |
---|
43 | $gb_species = BIO::next_species($gb_species)) { |
---|
44 | |
---|
45 | my $field_content = BIO::read_string($gb_species, $srcField); |
---|
46 | $field_content || expectError('read_string'); |
---|
47 | |
---|
48 | if (exists $content{$field_content}) { |
---|
49 | $unique = 0; |
---|
50 | print " - field content '$field_content' occurs more than once\n"; |
---|
51 | print " => not usable as ID ('name')\n"; |
---|
52 | } |
---|
53 | $content{$field_content} = 1; |
---|
54 | } |
---|
55 | } |
---|
56 | |
---|
57 | if ($unique) { |
---|
58 | print " - modifying IDs\n"; |
---|
59 | my $count = 0; |
---|
60 | ARB::push_my_security($gb_main); |
---|
61 | for (my $gb_species = BIO::first_species($gb_main); |
---|
62 | $gb_species and $unique; |
---|
63 | $gb_species = BIO::next_species($gb_species)) { |
---|
64 | |
---|
65 | my $field_content = BIO::read_string($gb_species, $srcField); |
---|
66 | my $error = BIO::write_string($gb_species,"name",$field_content); |
---|
67 | if (defined $error) { |
---|
68 | die "Error writing to 'name': ".$error."\n"; |
---|
69 | } |
---|
70 | $count++; |
---|
71 | } |
---|
72 | ARB::pop_my_security($gb_main); |
---|
73 | print "IDs of $count species were overwritten with content of $srcField\n"; |
---|
74 | } |
---|
75 | else { |
---|
76 | print "Please choose a SOURCEFIELDNAME which has unique content for all species.\n"; |
---|
77 | } |
---|
78 | |
---|
79 | ARB::commit_transaction($gb_main); |
---|
80 | ARB::close($gb_main); |
---|
81 | } |
---|
82 | |
---|
83 | sub main() { |
---|
84 | my $args = scalar(@ARGV); |
---|
85 | if ($args==0) { |
---|
86 | print "Usage: useFieldAsID.pl SOURCEFIELDNAME\n"; |
---|
87 | print "Copies the content of field SOURCEFIELDNAME into the field 'name' (which is used as ID)\n"; |
---|
88 | print "The contents of SOURCEFIELDNAME need to be unique\n"; |
---|
89 | print "\n"; |
---|
90 | print "Warning: Only use this script in special cases!\n"; |
---|
91 | print " Afterwards regenerate species IDs as soon as possible.\n"; |
---|
92 | } |
---|
93 | else { |
---|
94 | my $srcField = shift @ARGV; |
---|
95 | hackIDs($srcField); |
---|
96 | } |
---|
97 | } |
---|
98 | |
---|
99 | main(); |
---|