1 | #!/usr/bin/perl |
---|
2 | |
---|
3 | use strict; |
---|
4 | use warnings; |
---|
5 | |
---|
6 | # ----------------------- |
---|
7 | # For all (marked) organisms, finds all (marked) genes and |
---|
8 | # concatenates their $gene_source_field into $organism_dest_field |
---|
9 | # ----------------------- |
---|
10 | # configure here: |
---|
11 | |
---|
12 | my $organism_dest_field = 'my_genes'; # or 'db_xref' |
---|
13 | my $fail_if_dest_field_exists = 1; |
---|
14 | my $organism_marked_only = 1; |
---|
15 | |
---|
16 | my $gene_source_field = 'name'; # 'tax_xref_embl' |
---|
17 | my $fail_if_source_field_missing = 1; |
---|
18 | my $genes_marked_only = 1; |
---|
19 | |
---|
20 | my $field_seperator = ';'; |
---|
21 | |
---|
22 | # ----------------------- |
---|
23 | |
---|
24 | BEGIN { |
---|
25 | if (not exists $ENV{'ARBHOME'}) { die "Environment variable \$ARBHOME has to be defined"; } |
---|
26 | my $arbhome = $ENV{'ARBHOME'}; |
---|
27 | push @INC, "$arbhome/lib"; |
---|
28 | push @INC, "$arbhome/PERL_SCRIPTS/GENOME"; |
---|
29 | 1; |
---|
30 | } |
---|
31 | |
---|
32 | use ARB; |
---|
33 | use GI; |
---|
34 | |
---|
35 | my $source_fields_used = 0; |
---|
36 | my $dest_fields_written = 0; |
---|
37 | |
---|
38 | my $collected; |
---|
39 | |
---|
40 | sub collect_gene_info($) { |
---|
41 | my ($gb_gene) = @_; |
---|
42 | eval { |
---|
43 | my $gb_field = ARB::search($gb_gene,$gene_source_field,"NONE"); |
---|
44 | my $content; |
---|
45 | if ($gb_field) { |
---|
46 | $content = ARB::read_as_string($gb_field); |
---|
47 | } |
---|
48 | else { |
---|
49 | if ($fail_if_source_field_missing==1) { |
---|
50 | die "field '$gene_source_field' missing\n"; |
---|
51 | } |
---|
52 | $content = ''; |
---|
53 | } |
---|
54 | if ($content ne '') { |
---|
55 | $source_fields_used++; |
---|
56 | if ($collected eq '') { |
---|
57 | $collected = $content; |
---|
58 | } |
---|
59 | else { |
---|
60 | $collected .= $field_seperator.$content; |
---|
61 | } |
---|
62 | } |
---|
63 | }; |
---|
64 | if ($@) { |
---|
65 | my $name = BIO::read_name($gb_gene); |
---|
66 | die "@ gene '$name':\n$@"; |
---|
67 | } |
---|
68 | } |
---|
69 | |
---|
70 | sub collect_to_dest_field($) { |
---|
71 | my ($gb_orga) = @_; |
---|
72 | eval { |
---|
73 | if ($fail_if_dest_field_exists==1) { |
---|
74 | if (ARB::search($gb_orga,$organism_dest_field,"NONE")) { |
---|
75 | die "field '$organism_dest_field' already exists\n"; |
---|
76 | } |
---|
77 | } |
---|
78 | my $gb_dest = ARB::search($gb_orga,$organism_dest_field,"STRING"); |
---|
79 | if (not defined $gb_dest) { die ARB::await_error(); } |
---|
80 | |
---|
81 | $collected = ''; |
---|
82 | if ($genes_marked_only==1) { |
---|
83 | GI::with_marked_genes($gb_orga, &collect_gene_info); |
---|
84 | } |
---|
85 | else { |
---|
86 | GI::with_all_genes($gb_orga, &collect_gene_info); |
---|
87 | } |
---|
88 | ARB::write_string($gb_dest,$collected); |
---|
89 | $dest_fields_written++; |
---|
90 | }; |
---|
91 | if ($@) { |
---|
92 | my $name = BIO::read_name($gb_orga); |
---|
93 | die "@ organism '$name':\n$@"; |
---|
94 | } |
---|
95 | } |
---|
96 | |
---|
97 | sub handleOrganisms() { |
---|
98 | if ($organism_marked_only==1) { |
---|
99 | GI::with_marked_genomes(&collect_to_dest_field); |
---|
100 | } |
---|
101 | else { |
---|
102 | GI::with_all_genomes(&collect_to_dest_field); |
---|
103 | } |
---|
104 | } |
---|
105 | |
---|
106 | sub main() { |
---|
107 | GI::connectDB(); |
---|
108 | eval { # @@@ eval is broken (need to use set_inGlobalEvalState) |
---|
109 | GI::message("Extracting '$gene_source_field' into '$organism_dest_field'"); |
---|
110 | handleOrganisms(); |
---|
111 | GI::message("Collected $source_fields_used '$gene_source_field' into $dest_fields_written '$organism_dest_field'"); |
---|
112 | }; |
---|
113 | my $err = $@; |
---|
114 | if ($err) { GI::message("Error in collect_gene_info.pl:\n$err"); } |
---|
115 | GI::disconnectDB(); |
---|
116 | } |
---|
117 | |
---|
118 | main(); |
---|