X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=Scraper.pm;h=692a4c6f4496c2a639a6f84852a73ed6ebff0744;hb=f26d4b82ad95b3975d8315a77928d3a1897090e4;hp=e1b13f497408aae5b0a8290a56b318ec7f0e9c46;hpb=6fe3f8a7ff856864af8366854b152ad15fb4506c;p=Biblio-Z3950.git diff --git a/Scraper.pm b/Scraper.pm index e1b13f4..692a4c6 100644 --- a/Scraper.pm +++ b/Scraper.pm @@ -5,14 +5,48 @@ use strict; use WWW::Mechanize; -binmode STDOUT, ':utf8'; - sub new { - my ( $class ) = @_; + my ( $class, $database ) = @_; + + $database ||= $class; + my $self = { mech => WWW::Mechanize->new(), + database => $database, }; bless $self, $class; return $self; } +sub mech { $_[0]->{mech} } + +sub save_marc { + my ( $self, $id, $marc ) = @_; + + my $database = $self->{database}; + mkdir 'marc' unless -e 'marc'; + mkdir "marc/$database" unless -e "marc/$database"; + + my $path = "marc/$database/$id"; + + open(my $out, '>:utf8', $path) || die "$path: $!"; + print $out $marc; + close($out); + + warn "# created $path ", -s $path, " bytes"; + +} + +our $dump_nr = 1; + +sub save_content { + my $self = shift; + my $path = "/tmp/$dump_nr.html"; + open(my $html, '>', $path); + print $html $self->{mech}->content; + close($html); + warn "# save_content $path ", -s $path, " bytes"; + $dump_nr++; +} + +1;