use warnings;
use strict;
-use WWW::Mechanize;
use MARC::Record;
use Data::Dump qw/dump/;
-binmode STDOUT, ':utf8';
-
-our $mech = WWW::Mechanize->new();
-our $hits;
+use base 'Scraper';
sub diag {
- print "# ", @_, $/;
+ warn "# ", @_, $/;
}
# Koha Z39.50 query:
# WGA - Riječi u geografskim odrednicama
# WYR - Godina izdavanja
-our $usemap = {
-# 8 => '',
-# 7 => '',
+sub usemap {{
4 => 'WTI',
1003 => 'WTI',
16 => 'CU',
# 12 => '',
# 1007 => '',
# 1016 => '',
+}};
-};
-
-sub usemap {
- my $f = shift || die;
- $usemap->{$f};
-}
sub search {
my ( $self, $query ) = @_;
diag "get $url";
+ my $mech = $self->{mech} || die "no mech?";
$mech->get( $url );
diag "advanced search";
},
);
- $hits = 0;
- if ( $mech->content =~ m{ukupno\s+(\d+).*(do\s+(\d+))}s ) {
+ my $hits = 0;
+ if ( $mech->content =~ m{ukupno\s+(\d+).*do\s+(\d+)}s ) {
$hits = $1;
$hits = $2 if $2 && $2 < $1; # correct for max. results
} else {
diag "in MARC format";
$mech->follow_link( url_regex => qr/format=001/ );
+
+ return $hits;
}
+our ( $hash, $marc );
+
sub next_marc {
my ($self,$format) = @_;
-print $mech->content;
+ $format ||= 'marc';
+
+ my $mech = $self->{mech} || die "no mech?";
+
+#warn "## ", $mech->content;
if ( $mech->content =~ m{Zapis\s+(\d+)}s ) {
my $nr = $1;
-diag "parse $nr";
+warn "parse $nr";
- my $marc = MARC::Record->new;
+ $marc = MARC::Record->new;
+ $hash = {};
my $html = $mech->content;
- my $hash;
sub field {
my ( $f, $v ) = @_;
$v =~ s/\Q \E/ /gs;
-warn "# $f\t$v\n";
+#warn "## $f\t$v\n";
$hash->{$f} = $v;
my ($i1,$i2) = (' ',' ');
($i1,$i2) = ($2,$3) if $f =~ s/^(...)(.)?(.)?/$1/;
my @sf = split(/\|/, $v);
shift @sf;
@sf = map { s/^(\w)\s+//; { $1 => $_ } } @sf;
-diag "sf = ", dump(@sf);
+#warn "## sf = ", dump(@sf);
$marc->add_fields( $f, $i1, $i2, @sf ) if $f =~ m/^\d+$/;
}
$html =~ s|<tr>\s*<td class=td1 id=bold[^>]*>(.+?)</td>\s*<td class=td1>(.+?)</td>|field($1,$2)|ges;
- diag dump($hash);
+# diag "# hash ",dump($hash);
my $id = $hash->{SYS} || die "no SYS";
-
-
- my $path = "marc/$id.$format";
-
- open(my $out, '>:utf8', $path);
- print $out $marc->as_usmarc;
- close($out);
-
- diag "created $path ", -s $path, " bytes";
-
- diag $marc->as_formatted;
+ $self->save_marc( $id, $marc->as_usmarc );
$nr++;
- die if $nr == 3; # FIXME
-
$mech->follow_link( url_regex => qr/set_entry=0*$nr/ );
return $marc->as_usmarc;