| 1 | #!/usr/bin/perl -w |
|---|
| 2 | # switches for stringent coding and debugging |
|---|
| 3 | |
|---|
| 4 | # This program is an adapter between the ARB program package and the DSZM website. |
|---|
| 5 | # It allows to run a taxonomic query using the web form offered by the DSZM via |
|---|
| 6 | # the http-POST method implemented by the LWP package of perl. |
|---|
| 7 | # Subsequent modification of relative URLs into absolute one allows allows the browser |
|---|
| 8 | # started by ARB to connect with the DSZM website. |
|---|
| 9 | # (c) Lothar Richter Oct. 2003 |
|---|
| 10 | |
|---|
| 11 | use strict; |
|---|
| 12 | use diagnostics; |
|---|
| 13 | |
|---|
| 14 | |
|---|
| 15 | # script for automated information retrieval from DSZM |
|---|
| 16 | |
|---|
| 17 | |
|---|
| 18 | # moduls in use |
|---|
| 19 | use LWP::Simple; |
|---|
| 20 | use HTTP::Request::Common qw(POST); |
|---|
| 21 | use LWP::UserAgent; |
|---|
| 22 | |
|---|
| 23 | use File::Temp qw/ tempfile tempdir /; |
|---|
| 24 | |
|---|
| 25 | |
|---|
| 26 | |
|---|
| 27 | |
|---|
| 28 | #die"code successful parsed and compiled\n"; |
|---|
| 29 | |
|---|
| 30 | |
|---|
| 31 | |
|---|
| 32 | |
|---|
| 33 | my $errordocument = |
|---|
| 34 | "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"> |
|---|
| 35 | <html> |
|---|
| 36 | <head> |
|---|
| 37 | <title>DSMZ Failure</title> |
|---|
| 38 | </head> |
|---|
| 39 | |
|---|
| 40 | <body> |
|---|
| 41 | <h1>DSZM Access Error</h1> |
|---|
| 42 | <br/> |
|---|
| 43 | You have given no search items! Please give at least on search item to access the taxonomic search tool at the DSMZ! |
|---|
| 44 | |
|---|
| 45 | <hr> |
|---|
| 46 | <address><a href=\"mailto:\">Lothar Richter</a></address> |
|---|
| 47 | <!-- Created: Mon Sep 8 14:23:58 CEST 2003 --> |
|---|
| 48 | <!-- hhmts start --> |
|---|
| 49 | Last modified: Mon Sep 8 14:25:42 CEST 2003 |
|---|
| 50 | <!-- hhmts end --> |
|---|
| 51 | </body> |
|---|
| 52 | </html>"; |
|---|
| 53 | |
|---|
| 54 | my $TMPOUT; |
|---|
| 55 | my $template = 'arbdsmz_XXXXXX'; |
|---|
| 56 | (undef, $TMPOUT) = tempfile($template, OPEN => 0); |
|---|
| 57 | $TMPOUT = $TMPOUT . ".html"; |
|---|
| 58 | |
|---|
| 59 | open (OUTPUT , "> $TMPOUT") or die "cannot open temporary input file $TMPOUT"; |
|---|
| 60 | |
|---|
| 61 | if (scalar(@ARGV) == 0) |
|---|
| 62 | {print OUTPUT $errordocument; |
|---|
| 63 | die("no search items given ! Give at least one item!");} |
|---|
| 64 | ##print length(@ARGV)."\n"; |
|---|
| 65 | my $item1 = shift() || ""; |
|---|
| 66 | ##print $item1."\n"; |
|---|
| 67 | my $item2 = shift() || ""; |
|---|
| 68 | |
|---|
| 69 | print STDERR "Searching for '$item1'\n"; |
|---|
| 70 | print STDERR "Searching for '$item2'\n"; |
|---|
| 71 | |
|---|
| 72 | #-------------------------------------------------------------------------------- |
|---|
| 73 | # begin of post-method emulations |
|---|
| 74 | #-------------------------------------------------------------------------------- |
|---|
| 75 | |
|---|
| 76 | |
|---|
| 77 | my $ua_selection = LWP::UserAgent ->new ; |
|---|
| 78 | |
|---|
| 79 | #$ua_selection -> agent ("UpdateAgent/0.1" . $ua_selection -> agent); |
|---|
| 80 | |
|---|
| 81 | ##my $req_selection = new HTTP::Request POST => $baseURL; |
|---|
| 82 | my $req_selection = HTTP::Request -> new( POST => 'http://www.dsmz.de/cgi-bin/dsmzfind.pl'); |
|---|
| 83 | ##my $req_selection = HTTP::Request -> new( POST => $baseURL); |
|---|
| 84 | |
|---|
| 85 | $req_selection->content_type('application/x-www-form-urlencoded'); |
|---|
| 86 | # my $selection_content = 'VAR_DATABASE=bact&VAR_HITS=25&VAR_DSMZITEM=Escherichia&VAR_DSMZITEM2=coli&B1=Search'; |
|---|
| 87 | my $selection_content = 'VAR_DATABASE=bact&VAR_HITS=25&VAR_DSMZITEM='."$item1".'&VAR_DSMZITEM2='."$item2".'&B1=Search'; |
|---|
| 88 | |
|---|
| 89 | $req_selection->content($selection_content); |
|---|
| 90 | |
|---|
| 91 | # Pass request to the user agent and get a response back |
|---|
| 92 | my $TMPUSEROUT; |
|---|
| 93 | (undef, $TMPUSEROUT) = tempfile($template, OPEN => 0); |
|---|
| 94 | $TMPUSEROUT = $TMPUSEROUT . ".htm"; |
|---|
| 95 | my $res_selection = $ua_selection -> request($req_selection, $TMPUSEROUT); |
|---|
| 96 | |
|---|
| 97 | |
|---|
| 98 | |
|---|
| 99 | # Check the outcome of the response |
|---|
| 100 | if ($res_selection->is_success) {print $res_selection->content;} |
|---|
| 101 | else {die "Bad luck this time, request failed\n";}; |
|---|
| 102 | |
|---|
| 103 | |
|---|
| 104 | open (INPUT , "< $TMPUSEROUT") or die "cannot open input file $TMPUSEROUT"; |
|---|
| 105 | |
|---|
| 106 | |
|---|
| 107 | my $htmlcontent; |
|---|
| 108 | { |
|---|
| 109 | local $/; |
|---|
| 110 | $htmlcontent = <INPUT>; |
|---|
| 111 | } |
|---|
| 112 | ##print "$htmlcontent\n"; |
|---|
| 113 | |
|---|
| 114 | ##$htmlcontent =~ s{(.*HREF=")(\/w+)}{$1http:\/\/www.dszm.de$1}igm; ##" |
|---|
| 115 | $htmlcontent =~ s{HREF="}{HREF="http://www.dsmz.de}igm; |
|---|
| 116 | $htmlcontent =~ s{HREF=[^"]}{HREF=http://www.dsmz.de/}igm; ##" |
|---|
| 117 | |
|---|
| 118 | print OUTPUT $htmlcontent ; |
|---|
| 119 | |
|---|
| 120 | #exec ('netscape', $TMPOUT); |
|---|
| 121 | print "file://" . $TMPOUT; |
|---|
| 122 | |
|---|
| 123 | ##print "$htmlcontent\n"; |
|---|