use warnings;
use strict;
-use WWW::Mechanize;
use MARC::Record;
use Data::Dump qw/dump/;
-binmode STDOUT, ':utf8';
-
-sub new {
- my ( $class ) = @_;
- my $self = {};
- bless $self, $class;
- return $self;
-}
-
+use base 'Scraper';
my $cobiss_marc21 = {
'010' => { a => [ '020', 'a' ] },
diag "get $url";
- my $mech = $self->{mech} = WWW::Mechanize->new();
+ my $mech = $self->{mech} || die "no mech?";
+
my $hits;
$mech->get( $url );
$hits = 0;
if ( $mech->content =~ m{hits:\s*<b>\s*(\d+)\s*</b>}s ) {
- $hits = $1;
+ $self->{hits} = $hits = $1;
} else {
diag "get't find results in ", $mech->content;
return;
my $marc = MARC::Record->new;
+ $comarc =~ s/[\r\n]+\s{5}//gs; # join continuation lines
+warn "## comarc join: $comarc\n";
+
foreach my $line ( split(/[\r\n]+/, $comarc) ) {
if ( $line !~ s{^(\d\d\d)([01 ])([01 ])}{} ) {
}
}
- my $path = "marc/$id.$format";
-
- open($out, '>:utf8', $path);
- print $out $marc->as_usmarc;
- close($out);
-
- diag "created $path ", -s $path, " bytes";
-
+ $self->save_marc( $id, $marc->as_usmarc );
diag $marc->as_formatted;
- $nr++;
- $mech->follow_link( url_regex => qr/rec=$nr/ );
+ if ( $nr < $self->{hints} ) {
+ warn "# fetch next result";
+ $nr++;
+ $mech->follow_link( url_regex => qr/rec=$nr/ );
+ } else {
+ warn "# no more results";
+ }
return $marc->as_usmarc;
} else {