scrape COBISS
authordpavlin <dpavlin@ae73d1a6-5fa4-44a9-8f13-f281fb455051>
Fri, 19 Jun 2009 17:50:34 +0000 (17:50 +0000)
committerdpavlin <dpavlin@ae73d1a6-5fa4-44a9-8f13-f281fb455051>
Fri, 19 Jun 2009 17:50:34 +0000 (17:50 +0000)
git-svn-id: svn+ssh://llin.lib/home/dpavlin/private/svn/Z3950-HTML-Scraper@1 ae73d1a6-5fa4-44a9-8f13-f281fb455051

COBISS.pm [new file with mode: 0644]
t/1-COBISS.t [new file with mode: 0755]

diff --git a/COBISS.pm b/COBISS.pm
new file mode 100644 (file)
index 0000000..f9ec719
--- /dev/null
+++ b/COBISS.pm
@@ -0,0 +1,60 @@
+package COBISS;
+
+use warnings;
+use strict;
+
+use WWW::Mechanize;
+
+sub search {
+
+       my $url = 'http://cobiss.izum.si/scripts/cobiss?ukaz=GETID&lani=en';
+
+warn "# get $url\n";
+
+       my $mech = WWW::Mechanize->new();
+       $mech->get( $url );
+
+warn "# got session\n";
+
+       $mech->follow_link( text_regex => qr/union/ );
+
+warn "# submit search\n";
+
+       $mech->submit_form(
+               fields => {
+                       'SS1' => 'Krleza',
+               },
+       );
+
+       my $hits = 1;
+       if ( $mech->content =~ m{hits:\s*<b>\s*(\d+)\s*</b>}s ) {
+               $hits = $1;
+       } else {
+               warn "get't find results in ", $mech->content;
+       }
+
+warn "# got $hits results, get first one\n";
+
+       $mech->follow_link( url_regex => qr/ukaz=DISP/ );
+
+warn "# in COMARC format\n";
+
+       $mech->follow_link( url_regex => qr/fmt=13/ );
+
+       my $comarc;
+
+       if ( $mech->content =~ m{<pre>\s*(.+1\..+?)\s*</pre>}s ) {
+               my $comarc = $1;
+               $comarc =~ s{</?b>}{}gs;
+               $comarc =~ s{<(/?font)[^>]*>}{<sf>}gs;
+
+               print $comarc;
+
+               return $comarc;
+       } else {
+               die "can't fetch COMARC format from ", $mech->content;
+       }
+
+}
+
+1;
diff --git a/t/1-COBISS.t b/t/1-COBISS.t
new file mode 100755 (executable)
index 0000000..782c6a0
--- /dev/null
@@ -0,0 +1,11 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+
+use Test::More tests => 2;
+
+use_ok 'COBISS';
+
+ok( my $results = COBISS->search() );
+