1 | #!/usr/bin/perl |
---|
2 | |
---|
3 | use strict; |
---|
4 | use warnings; |
---|
5 | |
---|
6 | BEGIN { |
---|
7 | if (not exists $ENV{'ARBHOME'}) { die "Environment variable \$ARBHOME has to be defined"; } |
---|
8 | my $arbhome = $ENV{'ARBHOME'}; |
---|
9 | push @INC, "$arbhome/lib"; |
---|
10 | push @INC, "$arbhome/PERL_SCRIPTS/GENOME"; |
---|
11 | 1; |
---|
12 | } |
---|
13 | |
---|
14 | use ARB; |
---|
15 | use GI; |
---|
16 | |
---|
17 | # ----------------------- |
---|
18 | # configure here: |
---|
19 | |
---|
20 | my $create_ORFs = 0; # 0 -> error if ORF not found; 1 -> auto-create gene |
---|
21 | my $verbose = 0; # 0 -> be quiet; 1 -> be noisy |
---|
22 | my $overwrite = 0; # 0 -> if entry exists -> error; 1 -> silently overwrite entry |
---|
23 | |
---|
24 | # ----------------------- |
---|
25 | |
---|
26 | |
---|
27 | my $importfilename; |
---|
28 | |
---|
29 | { |
---|
30 | my $argcount = $#ARGV + 1; |
---|
31 | if ($argcount == 1) { |
---|
32 | $importfilename = $ARGV[0]; |
---|
33 | } |
---|
34 | else { |
---|
35 | print "\nUsage: import_proteomdata.pl datafile\n"; |
---|
36 | print "Imports a proteom datafile in CSV format.\n"; |
---|
37 | GI::show_csv_info(); |
---|
38 | print "The columns in datafile should contain:\n"; |
---|
39 | print '"ORF-Name","Substrat","Ausstiche","Mittelw Score","Stabw Score","best hit","x","y"'. |
---|
40 | "\n\n"; |
---|
41 | exit(1); |
---|
42 | } |
---|
43 | } |
---|
44 | |
---|
45 | GI::connectDB(); |
---|
46 | GI::define_tokenizer_columns(8); # datafile is expected to contain 8 columns |
---|
47 | |
---|
48 | GI::message("Reading '$importfilename'.."); |
---|
49 | open(IMPORT,"<$importfilename") || GI::error("Can't read '$importfilename'"); |
---|
50 | my $lineno = 1; |
---|
51 | my @head = GI::tokenize_columns(<IMPORT>,"$lineno of $importfilename"); |
---|
52 | |
---|
53 | my ($gb_genome,$genome_name) = GI::findCurrentGenome(); |
---|
54 | my $gb_gene_data = ARB::search($gb_genome, "gene_data", "CONTAINER"); |
---|
55 | if (!$gb_gene_data) { |
---|
56 | my $reason = ARB::await_error(); |
---|
57 | GI::error("Couldn't find or create container 'gene_data' for organism '$genome_name' ($reason)"); |
---|
58 | } |
---|
59 | GI::unmarkGenesOfGenome($gb_genome); |
---|
60 | |
---|
61 | GI::message("Importing data to organism '$genome_name' .."); |
---|
62 | my $gene_count = 0; |
---|
63 | |
---|
64 | ORF: foreach (<IMPORT>) { # loop over all lines from inputfile |
---|
65 | $lineno++; |
---|
66 | my @elems = GI::tokenize_columns($_,"$lineno of $importfilename"); |
---|
67 | my ($orf,$substrate,$spots,$mean_score,$sd_score,$best_hit,$coordx,$coordy) = @elems; |
---|
68 | |
---|
69 | # find (or create) the orf gene: |
---|
70 | my ($gb_orf, $error) = GI::findORF($gb_gene_data,$genome_name,$orf,$create_ORFs,$verbose); |
---|
71 | if (!$error) { |
---|
72 | my $substrate_field = "proteome/$substrate"; |
---|
73 | my $gb_substrate = ARB::search($gb_orf, $substrate_field, "NONE"); |
---|
74 | |
---|
75 | if (!$gb_substrate) { |
---|
76 | $gb_substrate = ARB::search($gb_orf, $substrate_field, "CONTAINER"); |
---|
77 | } |
---|
78 | |
---|
79 | if (!$gb_substrate) { |
---|
80 | my $reason = ARB::await_error(); |
---|
81 | $error = "Could not create container '$substrate_field' ($reason)"; |
---|
82 | } |
---|
83 | else { |
---|
84 | $error = GI::write_entry($gb_substrate, "spots", "STRING", $spots, $overwrite, $verbose); |
---|
85 | if (!$error) { $error = GI::write_entry($gb_substrate, "score", "INT", $mean_score, $overwrite, $verbose); } |
---|
86 | if (!$error) { $error = GI::write_entry($gb_substrate, "sd", "INT", $sd_score, $overwrite, $verbose); } |
---|
87 | if (!$error) { $error = GI::write_entry($gb_substrate, "id", "STRING", $best_hit, $overwrite, $verbose); } |
---|
88 | if (!$error) { $error = GI::write_entry($gb_substrate, "coordx","INT", $coordx, $overwrite, $verbose); } |
---|
89 | if (!$error) { $error = GI::write_entry($gb_substrate, "coordy","INT", $coordy, $overwrite, $verbose); } |
---|
90 | if (!$error) { |
---|
91 | my $marked = ARB::read_flag($gb_orf); |
---|
92 | if ($marked == 0) { |
---|
93 | ARB::write_flag($gb_orf,1); # mark changed genes |
---|
94 | $gene_count++; |
---|
95 | } |
---|
96 | } |
---|
97 | } |
---|
98 | } |
---|
99 | |
---|
100 | if ($error) { GI::error("$error (while parsing $lineno of $importfilename)"); } |
---|
101 | } |
---|
102 | close IMPORT; |
---|
103 | GI::message("$gene_count genes modified and marked."); |
---|
104 | |
---|
105 | GI::disconnectDB(); |
---|