| 1 | #!/usr/bin/perl |
|---|
| 2 | |
|---|
| 3 | use strict; |
|---|
| 4 | use warnings; |
|---|
| 5 | |
|---|
| 6 | # ----------------------- |
|---|
| 7 | # For all (marked) organisms, finds all (marked) genes and |
|---|
| 8 | # concatenates their $gene_source_field into $organism_dest_field |
|---|
| 9 | # ----------------------- |
|---|
| 10 | # configure here: |
|---|
| 11 | |
|---|
| 12 | my $organism_dest_field = 'my_genes'; # or 'db_xref' |
|---|
| 13 | my $fail_if_dest_field_exists = 1; |
|---|
| 14 | my $organism_marked_only = 1; |
|---|
| 15 | |
|---|
| 16 | my $gene_source_field = 'name'; # 'tax_xref_embl' |
|---|
| 17 | my $fail_if_source_field_missing = 1; |
|---|
| 18 | my $genes_marked_only = 1; |
|---|
| 19 | |
|---|
| 20 | my $field_seperator = ';'; |
|---|
| 21 | |
|---|
| 22 | # ----------------------- |
|---|
| 23 | |
|---|
| 24 | BEGIN { |
|---|
| 25 | if (not exists $ENV{'ARBHOME'}) { die "Environment variable \$ARBHOME has to be defined"; } |
|---|
| 26 | my $arbhome = $ENV{'ARBHOME'}; |
|---|
| 27 | push @INC, "$arbhome/lib"; |
|---|
| 28 | push @INC, "$arbhome/PERL_SCRIPTS/GENOME"; |
|---|
| 29 | 1; |
|---|
| 30 | } |
|---|
| 31 | |
|---|
| 32 | use ARB; |
|---|
| 33 | use GI; |
|---|
| 34 | |
|---|
| 35 | my $source_fields_used = 0; |
|---|
| 36 | my $dest_fields_written = 0; |
|---|
| 37 | |
|---|
| 38 | my $collected; |
|---|
| 39 | |
|---|
| 40 | sub collect_gene_info($) { |
|---|
| 41 | my ($gb_gene) = @_; |
|---|
| 42 | eval { |
|---|
| 43 | my $gb_field = ARB::search($gb_gene,$gene_source_field,"NONE"); |
|---|
| 44 | my $content; |
|---|
| 45 | if ($gb_field) { |
|---|
| 46 | $content = ARB::read_as_string($gb_field); |
|---|
| 47 | } |
|---|
| 48 | else { |
|---|
| 49 | if ($fail_if_source_field_missing==1) { |
|---|
| 50 | die "field '$gene_source_field' missing\n"; |
|---|
| 51 | } |
|---|
| 52 | $content = ''; |
|---|
| 53 | } |
|---|
| 54 | if ($content ne '') { |
|---|
| 55 | $source_fields_used++; |
|---|
| 56 | if ($collected eq '') { |
|---|
| 57 | $collected = $content; |
|---|
| 58 | } |
|---|
| 59 | else { |
|---|
| 60 | $collected .= $field_seperator.$content; |
|---|
| 61 | } |
|---|
| 62 | } |
|---|
| 63 | }; |
|---|
| 64 | if ($@) { |
|---|
| 65 | my $name = BIO::read_name($gb_gene); |
|---|
| 66 | die "@ gene '$name':\n$@"; |
|---|
| 67 | } |
|---|
| 68 | } |
|---|
| 69 | |
|---|
| 70 | sub collect_to_dest_field($) { |
|---|
| 71 | my ($gb_orga) = @_; |
|---|
| 72 | eval { |
|---|
| 73 | if ($fail_if_dest_field_exists==1) { |
|---|
| 74 | if (ARB::search($gb_orga,$organism_dest_field,"NONE")) { |
|---|
| 75 | die "field '$organism_dest_field' already exists\n"; |
|---|
| 76 | } |
|---|
| 77 | } |
|---|
| 78 | my $gb_dest = ARB::search($gb_orga,$organism_dest_field,"STRING"); |
|---|
| 79 | if (not defined $gb_dest) { die ARB::await_error(); } |
|---|
| 80 | |
|---|
| 81 | $collected = ''; |
|---|
| 82 | if ($genes_marked_only==1) { |
|---|
| 83 | GI::with_marked_genes($gb_orga, &collect_gene_info); |
|---|
| 84 | } |
|---|
| 85 | else { |
|---|
| 86 | GI::with_all_genes($gb_orga, &collect_gene_info); |
|---|
| 87 | } |
|---|
| 88 | ARB::write_string($gb_dest,$collected); |
|---|
| 89 | $dest_fields_written++; |
|---|
| 90 | }; |
|---|
| 91 | if ($@) { |
|---|
| 92 | my $name = BIO::read_name($gb_orga); |
|---|
| 93 | die "@ organism '$name':\n$@"; |
|---|
| 94 | } |
|---|
| 95 | } |
|---|
| 96 | |
|---|
| 97 | sub handleOrganisms() { |
|---|
| 98 | if ($organism_marked_only==1) { |
|---|
| 99 | GI::with_marked_genomes(&collect_to_dest_field); |
|---|
| 100 | } |
|---|
| 101 | else { |
|---|
| 102 | GI::with_all_genomes(&collect_to_dest_field); |
|---|
| 103 | } |
|---|
| 104 | } |
|---|
| 105 | |
|---|
| 106 | sub main() { |
|---|
| 107 | GI::connectDB(); |
|---|
| 108 | eval { # @@@ eval is broken (need to use set_inGlobalEvalState) |
|---|
| 109 | GI::message("Extracting '$gene_source_field' into '$organism_dest_field'"); |
|---|
| 110 | handleOrganisms(); |
|---|
| 111 | GI::message("Collected $source_fields_used '$gene_source_field' into $dest_fields_written '$organism_dest_field'"); |
|---|
| 112 | }; |
|---|
| 113 | my $err = $@; |
|---|
| 114 | if ($err) { GI::message("Error in collect_gene_info.pl:\n$err"); } |
|---|
| 115 | GI::disconnectDB(); |
|---|
| 116 | } |
|---|
| 117 | |
|---|
| 118 | main(); |
|---|