| 1 | #!/usr/bin/perl |
|---|
| 2 | |
|---|
| 3 | use strict; |
|---|
| 4 | use warnings; |
|---|
| 5 | |
|---|
| 6 | BEGIN { |
|---|
| 7 | if (not exists $ENV{'ARBHOME'}) { die "Environment variable \$ARBHOME has to be defined"; } |
|---|
| 8 | my $arbhome = $ENV{'ARBHOME'}; |
|---|
| 9 | push @INC, "$arbhome/lib"; |
|---|
| 10 | push @INC, "$arbhome/PERL_SCRIPTS/GENOME"; |
|---|
| 11 | 1; |
|---|
| 12 | } |
|---|
| 13 | |
|---|
| 14 | use ARB; |
|---|
| 15 | use GI; |
|---|
| 16 | |
|---|
| 17 | # ----------------------- |
|---|
| 18 | # configure here: |
|---|
| 19 | |
|---|
| 20 | my $create_ORFs = 0; # 0 -> error if ORF not found; 1 -> auto-create gene |
|---|
| 21 | my $verbose = 0; # 0 -> be quiet; 1 -> be noisy |
|---|
| 22 | my $overwrite = 0; # 0 -> if entry exists -> error; 1 -> silently overwrite entry |
|---|
| 23 | |
|---|
| 24 | # ----------------------- |
|---|
| 25 | |
|---|
| 26 | |
|---|
| 27 | my $importfilename; |
|---|
| 28 | |
|---|
| 29 | { |
|---|
| 30 | my $argcount = $#ARGV + 1; |
|---|
| 31 | if ($argcount == 1) { |
|---|
| 32 | $importfilename = $ARGV[0]; |
|---|
| 33 | } |
|---|
| 34 | else { |
|---|
| 35 | print "\nUsage: import_proteomdata.pl datafile\n"; |
|---|
| 36 | print "Imports a proteom datafile in CSV format.\n"; |
|---|
| 37 | GI::show_csv_info(); |
|---|
| 38 | print "The columns in datafile should contain:\n"; |
|---|
| 39 | print '"ORF-Name","Substrat","Ausstiche","Mittelw Score","Stabw Score","best hit","x","y"'. |
|---|
| 40 | "\n\n"; |
|---|
| 41 | exit(1); |
|---|
| 42 | } |
|---|
| 43 | } |
|---|
| 44 | |
|---|
| 45 | GI::connectDB(); |
|---|
| 46 | GI::define_tokenizer_columns(8); # datafile is expected to contain 8 columns |
|---|
| 47 | |
|---|
| 48 | GI::message("Reading '$importfilename'.."); |
|---|
| 49 | open(IMPORT,"<$importfilename") || GI::error("Can't read '$importfilename'"); |
|---|
| 50 | my $lineno = 1; |
|---|
| 51 | my @head = GI::tokenize_columns(<IMPORT>,"$lineno of $importfilename"); |
|---|
| 52 | |
|---|
| 53 | my ($gb_genome,$genome_name) = GI::findCurrentGenome(); |
|---|
| 54 | my $gb_gene_data = ARB::search($gb_genome, "gene_data", "CONTAINER"); |
|---|
| 55 | if (!$gb_gene_data) { |
|---|
| 56 | my $reason = ARB::await_error(); |
|---|
| 57 | GI::error("Couldn't find or create container 'gene_data' for organism '$genome_name' ($reason)"); |
|---|
| 58 | } |
|---|
| 59 | GI::unmarkGenesOfGenome($gb_genome); |
|---|
| 60 | |
|---|
| 61 | GI::message("Importing data to organism '$genome_name' .."); |
|---|
| 62 | my $gene_count = 0; |
|---|
| 63 | |
|---|
| 64 | ORF: foreach (<IMPORT>) { # loop over all lines from inputfile |
|---|
| 65 | $lineno++; |
|---|
| 66 | my @elems = GI::tokenize_columns($_,"$lineno of $importfilename"); |
|---|
| 67 | my ($orf,$substrate,$spots,$mean_score,$sd_score,$best_hit,$coordx,$coordy) = @elems; |
|---|
| 68 | |
|---|
| 69 | # find (or create) the orf gene: |
|---|
| 70 | my ($gb_orf, $error) = GI::findORF($gb_gene_data,$genome_name,$orf,$create_ORFs,$verbose); |
|---|
| 71 | if (!$error) { |
|---|
| 72 | my $substrate_field = "proteome/$substrate"; |
|---|
| 73 | my $gb_substrate = ARB::search($gb_orf, $substrate_field, "NONE"); |
|---|
| 74 | |
|---|
| 75 | if (!$gb_substrate) { |
|---|
| 76 | $gb_substrate = ARB::search($gb_orf, $substrate_field, "CONTAINER"); |
|---|
| 77 | } |
|---|
| 78 | |
|---|
| 79 | if (!$gb_substrate) { |
|---|
| 80 | my $reason = ARB::await_error(); |
|---|
| 81 | $error = "Could not create container '$substrate_field' ($reason)"; |
|---|
| 82 | } |
|---|
| 83 | else { |
|---|
| 84 | $error = GI::write_entry($gb_substrate, "spots", "STRING", $spots, $overwrite, $verbose); |
|---|
| 85 | if (!$error) { $error = GI::write_entry($gb_substrate, "score", "INT", $mean_score, $overwrite, $verbose); } |
|---|
| 86 | if (!$error) { $error = GI::write_entry($gb_substrate, "sd", "INT", $sd_score, $overwrite, $verbose); } |
|---|
| 87 | if (!$error) { $error = GI::write_entry($gb_substrate, "id", "STRING", $best_hit, $overwrite, $verbose); } |
|---|
| 88 | if (!$error) { $error = GI::write_entry($gb_substrate, "coordx","INT", $coordx, $overwrite, $verbose); } |
|---|
| 89 | if (!$error) { $error = GI::write_entry($gb_substrate, "coordy","INT", $coordy, $overwrite, $verbose); } |
|---|
| 90 | if (!$error) { |
|---|
| 91 | my $marked = ARB::read_flag($gb_orf); |
|---|
| 92 | if ($marked == 0) { |
|---|
| 93 | ARB::write_flag($gb_orf,1); # mark changed genes |
|---|
| 94 | $gene_count++; |
|---|
| 95 | } |
|---|
| 96 | } |
|---|
| 97 | } |
|---|
| 98 | } |
|---|
| 99 | |
|---|
| 100 | if ($error) { GI::error("$error (while parsing $lineno of $importfilename)"); } |
|---|
| 101 | } |
|---|
| 102 | close IMPORT; |
|---|
| 103 | GI::message("$gene_count genes modified and marked."); |
|---|
| 104 | |
|---|
| 105 | GI::disconnectDB(); |
|---|