X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;ds=sidebyside;f=Scraper.pm;h=d3e1c84a4b6376abca45f8c4f7a910214213802d;hb=HEAD;hp=89209789f007624002334b6cfcb7441b6dbb8fb1;hpb=ab265403df6d57311a7a5839e0f664ecd97bf8a7;p=Biblio-Z3950.git diff --git a/Scraper.pm b/Scraper.pm index 8920978..d3e1c84 100644 --- a/Scraper.pm +++ b/Scraper.pm @@ -3,21 +3,34 @@ package Scraper; use warnings; use strict; +use IO::Socket::SSL qw(); use WWW::Mechanize; + sub new { - my ( $class ) = @_; + my ( $class, $database ) = @_; + + $database ||= $class; + my $self = { - mech => WWW::Mechanize->new(), + mech => WWW::Mechanize->new( + ssl_opts => { + SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_NONE, + verify_hostname => 0, # this key is likely going to be removed in future LWP >6.04 + } + ), + database => $database, }; bless $self, $class; return $self; } +sub mech { $_[0]->{mech} } + sub save_marc { my ( $self, $id, $marc ) = @_; - my $database = ref $self; + my $database = $self->{database}; mkdir 'marc' unless -e 'marc'; mkdir "marc/$database" unless -e "marc/$database"; @@ -31,4 +44,16 @@ sub save_marc { } +our $dump_nr = 1; + +sub save_content { + my $self = shift; + my $path = "/tmp/$dump_nr.html"; + open(my $html, '>', $path); + print $html $self->{mech}->content; + close($html); + warn "# save_content $path ", -s $path, " bytes"; + $dump_nr++; +} + 1;