From 6fe3f8a7ff856864af8366854b152ad15fb4506c Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Sat, 23 Oct 2010 01:09:55 +0200 Subject: [PATCH] extract common code into Scraper package --- Aleph.pm | 16 +++++++--------- COBISS.pm | 14 +++----------- Scraper.pm | 18 ++++++++++++++++++ 3 files changed, 28 insertions(+), 20 deletions(-) create mode 100644 Scraper.pm diff --git a/Aleph.pm b/Aleph.pm index f89aaf7..71d67a7 100644 --- a/Aleph.pm +++ b/Aleph.pm @@ -3,11 +3,10 @@ package Aleph; use warnings; use strict; -use WWW::Mechanize; use MARC::Record; use Data::Dump qw/dump/; -binmode STDOUT, ':utf8'; +use base 'Scraper'; our $mech = WWW::Mechanize->new(); our $hits; @@ -53,9 +52,7 @@ sub diag { # WGA - Riječi u geografskim odrednicama # WYR - Godina izdavanja -our $usemap = { -# 8 => '', -# 7 => '', +sub usemap {{ 4 => 'WTI', 1003 => 'WTI', 16 => 'CU', @@ -66,10 +63,6 @@ our $usemap = { }; -sub usemap { - my $f = shift || die; - $usemap->{$f}; -} sub search { my ( $self, $query ) = @_; @@ -81,6 +74,7 @@ sub search { diag "get $url"; + my $mech = $self->{mech} || die "no mech?"; $mech->get( $url ); diag "advanced search"; @@ -111,12 +105,16 @@ diag "got $hits results, get first one"; diag "in MARC format"; $mech->follow_link( url_regex => qr/format=001/ ); + + return $hits; } sub next_marc { my ($self,$format) = @_; + my $mech = $self->{mech} || die "no mech?"; + print $mech->content; if ( $mech->content =~ m{Zapis\s+(\d+)}s ) { diff --git a/COBISS.pm b/COBISS.pm index ce1eea0..5611007 100644 --- a/COBISS.pm +++ b/COBISS.pm @@ -3,19 +3,10 @@ package COBISS; use warnings; use strict; -use WWW::Mechanize; use MARC::Record; use Data::Dump qw/dump/; -binmode STDOUT, ':utf8'; - -sub new { - my ( $class ) = @_; - my $self = {}; - bless $self, $class; - return $self; -} - +use base 'Scraper'; my $cobiss_marc21 = { '010' => { a => [ '020', 'a' ] }, @@ -78,7 +69,8 @@ sub search { diag "get $url"; - my $mech = $self->{mech} = WWW::Mechanize->new(); + my $mech = $self->{mech} || die "no mech?"; + my $hits; $mech->get( $url ); diff --git a/Scraper.pm b/Scraper.pm new file mode 100644 index 0000000..e1b13f4 --- /dev/null +++ b/Scraper.pm @@ -0,0 +1,18 @@ +package Scraper; + +use warnings; +use strict; + +use WWW::Mechanize; + +binmode STDOUT, ':utf8'; + +sub new { + my ( $class ) = @_; + my $self = { + mech => WWW::Mechanize->new(), + }; + bless $self, $class; + return $self; +} + -- 2.20.1