1 | #!/usr/bin/perl -w |
---|
2 | # switches for stringent coding and debugging |
---|
3 | |
---|
4 | # This program is an adapter between the ARB program package and the DSZM website. |
---|
5 | # It allows to run a taxonomic query using the web form offered by the DSZM via |
---|
6 | # the http-POST method implemented by the LWP package of perl. |
---|
7 | # Subsequent modification of relative URLs into absolute one allows allows the browser |
---|
8 | # started by ARB to connect with the DSZM website. |
---|
9 | # (c) Lothar Richter Oct. 2003 |
---|
10 | |
---|
11 | use strict; |
---|
12 | use diagnostics; |
---|
13 | |
---|
14 | |
---|
15 | # script for automated information retrieval from DSZM |
---|
16 | |
---|
17 | |
---|
18 | # moduls in use |
---|
19 | use LWP::Simple; |
---|
20 | use HTTP::Request::Common qw(POST); |
---|
21 | use LWP::UserAgent; |
---|
22 | |
---|
23 | use File::Temp qw/ tempfile tempdir /; |
---|
24 | |
---|
25 | |
---|
26 | |
---|
27 | |
---|
28 | #die"code successful parsed and compiled\n"; |
---|
29 | |
---|
30 | |
---|
31 | |
---|
32 | |
---|
33 | my $errordocument = |
---|
34 | "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"> |
---|
35 | <html> |
---|
36 | <head> |
---|
37 | <title>DSMZ Failure</title> |
---|
38 | </head> |
---|
39 | |
---|
40 | <body> |
---|
41 | <h1>DSZM Access Error</h1> |
---|
42 | <br/> |
---|
43 | You have given no search items! Please give at least on search item to access the taxonomic search tool at the DSMZ! |
---|
44 | |
---|
45 | <hr> |
---|
46 | <address><a href=\"mailto:\">Lothar Richter</a></address> |
---|
47 | <!-- Created: Mon Sep 8 14:23:58 CEST 2003 --> |
---|
48 | <!-- hhmts start --> |
---|
49 | Last modified: Mon Sep 8 14:25:42 CEST 2003 |
---|
50 | <!-- hhmts end --> |
---|
51 | </body> |
---|
52 | </html>"; |
---|
53 | |
---|
54 | my $TMPOUT; |
---|
55 | my $template = 'arbdsmz_XXXXXX'; |
---|
56 | (undef, $TMPOUT) = tempfile($template, OPEN => 0); |
---|
57 | $TMPOUT = $TMPOUT . ".html"; |
---|
58 | |
---|
59 | open (OUTPUT , "> $TMPOUT") or die "cannot open temporary input file $TMPOUT"; |
---|
60 | |
---|
61 | if (scalar(@ARGV) == 0) { |
---|
62 | print OUTPUT $errordocument; |
---|
63 | die("no search items given ! Give at least one item!"); |
---|
64 | } |
---|
65 | ##print length(@ARGV)."\n"; |
---|
66 | my $item1 = shift() || ""; |
---|
67 | ##print $item1."\n"; |
---|
68 | my $item2 = shift() || ""; |
---|
69 | |
---|
70 | print STDERR "Searching for '$item1'\n"; |
---|
71 | print STDERR "Searching for '$item2'\n"; |
---|
72 | |
---|
73 | #-------------------------------------------------------------------------------- |
---|
74 | # begin of post-method emulations |
---|
75 | #-------------------------------------------------------------------------------- |
---|
76 | |
---|
77 | |
---|
78 | my $ua_selection = LWP::UserAgent ->new ; |
---|
79 | |
---|
80 | #$ua_selection -> agent ("UpdateAgent/0.1" . $ua_selection -> agent); |
---|
81 | |
---|
82 | ##my $req_selection = new HTTP::Request POST => $baseURL; |
---|
83 | my $req_selection = HTTP::Request -> new( POST => 'http://www.dsmz.de/cgi-bin/dsmzfind.pl'); |
---|
84 | ##my $req_selection = HTTP::Request -> new( POST => $baseURL); |
---|
85 | |
---|
86 | $req_selection->content_type('application/x-www-form-urlencoded'); |
---|
87 | # my $selection_content = 'VAR_DATABASE=bact&VAR_HITS=25&VAR_DSMZITEM=Escherichia&VAR_DSMZITEM2=coli&B1=Search'; |
---|
88 | my $selection_content = 'VAR_DATABASE=bact&VAR_HITS=25&VAR_DSMZITEM='."$item1".'&VAR_DSMZITEM2='."$item2".'&B1=Search'; |
---|
89 | |
---|
90 | $req_selection->content($selection_content); |
---|
91 | |
---|
92 | # Pass request to the user agent and get a response back |
---|
93 | my $TMPUSEROUT; |
---|
94 | (undef, $TMPUSEROUT) = tempfile($template, OPEN => 0); |
---|
95 | $TMPUSEROUT = $TMPUSEROUT . ".htm"; |
---|
96 | my $res_selection = $ua_selection -> request($req_selection, $TMPUSEROUT); |
---|
97 | |
---|
98 | |
---|
99 | |
---|
100 | # Check the outcome of the response |
---|
101 | if ($res_selection->is_success) {print $res_selection->content;} |
---|
102 | else {die "Bad luck this time, request failed\n";}; |
---|
103 | |
---|
104 | |
---|
105 | open (INPUT , "< $TMPUSEROUT") or die "cannot open input file $TMPUSEROUT"; |
---|
106 | |
---|
107 | |
---|
108 | my $htmlcontent; |
---|
109 | { |
---|
110 | local $/; |
---|
111 | $htmlcontent = <INPUT>; |
---|
112 | } |
---|
113 | ##print "$htmlcontent\n"; |
---|
114 | |
---|
115 | ##$htmlcontent =~ s{(.*HREF=")(\/w+)}{$1http:\/\/www.dszm.de$1}igm; ##" |
---|
116 | $htmlcontent =~ s{HREF="}{HREF="http://www.dsmz.de}igm; |
---|
117 | $htmlcontent =~ s{HREF=[^"]}{HREF=http://www.dsmz.de/}igm; ##" |
---|
118 | |
---|
119 | print OUTPUT $htmlcontent ; |
---|
120 | |
---|
121 | #exec ('netscape', $TMPOUT); |
---|
122 | print "file://" . $TMPOUT; |
---|
123 | |
---|
124 | ##print "$htmlcontent\n"; |
---|