source: branches/stable/PERL_SCRIPTS/GENOME/collect_gene_info.pl

Last change on this file was 7971, checked in by westram, 9 years ago
  • perl script to collect information from gene-field and store it into organism-field
  • Property svn:executable set to *
File size: 2.8 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4use warnings;
5
6# -----------------------
7# For all (marked) organisms, finds all (marked) genes and
8# concatenates their $gene_source_field into $organism_dest_field
9# -----------------------
10# configure here:
11
12my $organism_dest_field       = 'my_genes'; # or 'db_xref'
13my $fail_if_dest_field_exists = 1;
14my $organism_marked_only      = 1;
15
16my $gene_source_field            = 'name'; # 'tax_xref_embl'
17my $fail_if_source_field_missing = 1;
18my $genes_marked_only            = 1;
19
20my $field_seperator = ';';
21
22# -----------------------
23
24BEGIN {
25  if (not exists $ENV{'ARBHOME'}) { die "Environment variable \$ARBHOME has to be defined"; }
26  my $arbhome = $ENV{'ARBHOME'};
27  push @INC, "$arbhome/lib";
28  push @INC, "$arbhome/PERL_SCRIPTS/GENOME";
29  1;
30}
31
32use ARB;
33use GI;
34
35my $source_fields_used  = 0;
36my $dest_fields_written = 0;
37
38my $collected;
39
40sub collect_gene_info($) {
41  my ($gb_gene) = @_;
42  eval {
43    my $gb_field = ARB::search($gb_gene,$gene_source_field,"NONE");
44    my $content;
45    if ($gb_field) {
46      $content = ARB::read_as_string($gb_field);
47    }
48    else {
49      if ($fail_if_source_field_missing==1) {
50        die "field '$gene_source_field' missing\n";
51      }
52      $content = '';
53    }
54    if ($content ne '') {
55      $source_fields_used++;
56      if ($collected eq '') {
57        $collected = $content;
58      }
59      else {
60        $collected .= $field_seperator.$content;
61      }
62    }
63  };
64  if ($@) {
65    my $name = BIO::read_name($gb_gene);
66    die "@ gene '$name':\n$@";
67  }
68}
69
70sub collect_to_dest_field($) {
71  my ($gb_orga) = @_;
72  eval {
73    if ($fail_if_dest_field_exists==1) {
74      if (ARB::search($gb_orga,$organism_dest_field,"NONE")) {
75        die "field '$organism_dest_field' already exists\n";
76      }
77    }
78    my $gb_dest = ARB::search($gb_orga,$organism_dest_field,"STRING");
79    if (not defined $gb_dest) { die ARB::await_error(); }
80
81    $collected = '';
82    if ($genes_marked_only==1) {
83      GI::with_marked_genes($gb_orga, &collect_gene_info);
84    }
85    else {
86      GI::with_all_genes($gb_orga, &collect_gene_info);
87    }
88    ARB::write_string($gb_dest,$collected);
89    $dest_fields_written++;
90  };
91  if ($@) {
92    my $name = BIO::read_name($gb_orga);
93    die "@ organism '$name':\n$@";
94  }
95}
96
97sub handleOrganisms() {
98  if ($organism_marked_only==1) {
99    GI::with_marked_genomes(&collect_to_dest_field);
100  }
101  else {
102    GI::with_all_genomes(&collect_to_dest_field);
103  }
104}
105
106sub main() {
107  GI::connectDB();
108  eval {
109    GI::message("Extracting '$gene_source_field' into '$organism_dest_field'"); 
110    handleOrganisms();
111    GI::message("Collected $source_fields_used '$gene_source_field' into $dest_fields_written '$organism_dest_field'");
112  };
113  my $err = $@;
114  if ($err) { GI::message("Error in collect_gene_info.pl:\n$err"); }
115  GI::disconnectDB();
116}
117
118main();
Note: See TracBrowser for help on using the repository browser.