source: branches/help/PERL_SCRIPTS/SPECIES/useFieldAsID.pl

Last change on this file was 16866, checked in by westram, 7 years ago
  • adds a perl script that allows to overwrite field 'name' with the content of another database field
    • helpful e.g. to import trees that use 'acc' or 'full_name' as leafname
  • Property svn:executable set to *
File size: 3.0 KB
Line 
1#!/usr/bin/perl
2# ================================================================ #
3#                                                                  #
4#   File      : useFieldAsID.pl                                    #
5#   Purpose   : hack IDs in ARB database                           #
6#                                                                  #
7#   Coded by Ralf Westram (coder@reallysoft.de) in February 2018   #
8#   http://www.arb-home.de/                                        #
9#                                                                  #
10# ================================================================ #
11
12use strict;
13use warnings;
14
15BEGIN {
16  if (not exists $ENV{'ARBHOME'}) { die "Environment variable \$ARBHOME has to be defined"; }
17  my $arbhome = $ENV{'ARBHOME'};
18  push @INC, "$arbhome/lib";
19  push @INC, "$arbhome/PERL_SCRIPTS/lib";
20  1;
21}
22
23use ARB;
24use tools;
25
26sub hackIDs($) {
27  my ($srcField) = @_;
28
29  print "Overwriting field 'name' with content of '$srcField' ..\n";
30
31  my $gb_main = ARB::open(":","r");
32  $gb_main || expectError('db connect (no running ARB?)');
33
34  dieOnError(ARB::begin_transaction($gb_main), 'begin_transaction');
35
36  print " - scanning contents of '$srcField'\n";
37
38  my $unique = 1;
39  {
40    my %content = ();
41    for (my $gb_species = BIO::first_species($gb_main);
42         $gb_species and $unique;
43         $gb_species = BIO::next_species($gb_species)) {
44
45      my $field_content = BIO::read_string($gb_species, $srcField);
46      $field_content || expectError('read_string');
47
48      if (exists $content{$field_content}) {
49        $unique = 0;
50        print " - field content '$field_content' occurs more than once\n";
51        print "   => not usable as ID ('name')\n";
52      }
53      $content{$field_content} = 1;
54    }
55  }
56
57  if ($unique) {
58    print " - modifying IDs\n";
59    my $count = 0;
60    ARB::push_my_security($gb_main);
61    for (my $gb_species = BIO::first_species($gb_main);
62         $gb_species and $unique;
63         $gb_species = BIO::next_species($gb_species)) {
64
65      my $field_content = BIO::read_string($gb_species, $srcField);
66      my $error = BIO::write_string($gb_species,"name",$field_content);
67      if (defined $error) {
68        die "Error writing to 'name': ".$error."\n";
69      }
70      $count++;
71    }
72    ARB::pop_my_security($gb_main);
73    print "IDs of $count species were overwritten with content of $srcField\n";
74  }
75  else {
76    print "Please choose a SOURCEFIELDNAME which has unique content for all species.\n";
77  }
78
79  ARB::commit_transaction($gb_main);
80  ARB::close($gb_main);
81}
82
83sub main() {
84  my $args = scalar(@ARGV);
85  if ($args==0) {
86    print "Usage: useFieldAsID.pl SOURCEFIELDNAME\n";
87    print "Copies the content of field SOURCEFIELDNAME into the field 'name' (which is used as ID)\n";
88    print "The contents of SOURCEFIELDNAME need to be unique\n";
89    print "\n";
90    print "Warning: Only use this script in special cases!\n";
91    print "         Afterwards regenerate species IDs as soon as possible.\n";
92  }
93  else {
94    my $srcField = shift @ARGV;
95    hackIDs($srcField);
96  }
97}
98
99main();
Note: See TracBrowser for help on using the repository browser.