projects
/
Biblio-Z3950.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
allocate session just once
[Biblio-Z3950.git]
/
Aleph.pm
diff --git
a/Aleph.pm
b/Aleph.pm
index
71d67a7
..
6a3cf3c
100644
(file)
--- a/
Aleph.pm
+++ b/
Aleph.pm
@@
-8,11
+8,8
@@
use Data::Dump qw/dump/;
use base 'Scraper';
use base 'Scraper';
-our $mech = WWW::Mechanize->new();
-our $hits;
-
sub diag {
sub diag {
-
print
"# ", @_, $/;
+
warn
"# ", @_, $/;
}
# Koha Z39.50 query:
}
# Koha Z39.50 query:
@@
-60,16
+57,18
@@
sub usemap {{
# 12 => '',
# 1007 => '',
# 1016 => '',
# 12 => '',
# 1007 => '',
# 1016 => '',
+}};
-};
-
+our $session_id;
sub search {
my ( $self, $query ) = @_;
die "need query" unless defined $query;
sub search {
my ( $self, $query ) = @_;
die "need query" unless defined $query;
- my $url = 'http://161.53.240.197:8991/F?RN=' . rand(1000000000);
+ $session_id ||= int rand(1000000000);
+ # FIXME allocate session just once
+ my $url = 'http://161.53.240.197:8991/F?RN=' . $session_id;
# fake JavaScript code on page which creates random session
diag "get $url";
# fake JavaScript code on page which creates random session
diag "get $url";
@@
-81,16
+80,19
@@
diag "advanced search";
$mech->follow_link( url_regex => qr/find-c/ );
$mech->follow_link( url_regex => qr/find-c/ );
-diag "submit search $query";
+diag "submit search [$query] on ", $self->{database};
+
+ $self->save_content;
$mech->submit_form(
fields => {
'ccl_term' => $query,
$mech->submit_form(
fields => {
'ccl_term' => $query,
+ 'local_base' => $self->{database},
},
);
},
);
- $hits = 0;
- if ( $mech->content =~ m{ukupno\s+(\d+).*
(do\s+(\d+)
)}s ) {
+
my
$hits = 0;
+ if ( $mech->content =~ m{ukupno\s+(\d+).*
do\s+(\d+
)}s ) {
$hits = $1;
$hits = $2 if $2 && $2 < $1; # correct for max. results
} else {
$hits = $1;
$hits = $2 if $2 && $2 < $1; # correct for max. results
} else {
@@
-110,59
+112,69
@@
diag "in MARC format";
}
}
+our ( $hash, $marc );
+
sub next_marc {
my ($self,$format) = @_;
sub next_marc {
my ($self,$format) = @_;
+ $format ||= 'marc';
+
my $mech = $self->{mech} || die "no mech?";
my $mech = $self->{mech} || die "no mech?";
-
print
$mech->content;
+
#warn "## ",
$mech->content;
if ( $mech->content =~ m{Zapis\s+(\d+)}s ) {
my $nr = $1;
if ( $mech->content =~ m{Zapis\s+(\d+)}s ) {
my $nr = $1;
-
diag
"parse $nr";
+
warn
"parse $nr";
- my $marc = MARC::Record->new;
+ $marc = MARC::Record->new;
+ $hash = {};
my $html = $mech->content;
my $html = $mech->content;
- my $hash;
sub field {
my ( $f, $v ) = @_;
$v =~ s/\Q \E/ /gs;
sub field {
my ( $f, $v ) = @_;
$v =~ s/\Q \E/ /gs;
-warn "# $f\t$v\n";
+warn "#
#
$f\t$v\n";
$hash->{$f} = $v;
$hash->{$f} = $v;
+
+ if ( $f eq 'LDR' ) {
+ $marc->leader( $v );
+ return;
+ }
+
+ if ( $f =~ m/\D/ ) {
+ warn "$f not numeric!";
+ return;
+ }
+
+ if ( $v !~ s/^\|// ) { # no subfields
+ $marc->add_fields( $f, $v );
+warn "## ++ ", dump( $f, $v );
+ return;
+ }
+
my ($i1,$i2) = (' ',' ');
($i1,$i2) = ($2,$3) if $f =~ s/^(...)(.)?(.)?/$1/;
my @sf = split(/\|/, $v);
my ($i1,$i2) = (' ',' ');
($i1,$i2) = ($2,$3) if $f =~ s/^(...)(.)?(.)?/$1/;
my @sf = split(/\|/, $v);
- shift @sf;
@sf = map { s/^(\w)\s+//; { $1 => $_ } } @sf;
@sf = map { s/^(\w)\s+//; { $1 => $_ } } @sf;
-diag "sf = ", dump(@sf);
- $marc->add_fields( $f, $i1, $i2, @sf ) if $f =~ m/^\d+$/;
+#warn "## sf = ", dump(@sf);
+ $marc->add_fields( $f, $i1, $i2, @sf );
+warn "## ++ ", dump( $f, $i1, $i2, @sf );
}
$html =~ s|<tr>\s*<td class=td1 id=bold[^>]*>(.+?)</td>\s*<td class=td1>(.+?)</td>|field($1,$2)|ges;
}
$html =~ s|<tr>\s*<td class=td1 id=bold[^>]*>(.+?)</td>\s*<td class=td1>(.+?)</td>|field($1,$2)|ges;
- diag dump($hash);
+ diag "# hash ",dump($hash);
+ diag "# marc ", $marc->as_formatted;
my $id = $hash->{SYS} || die "no SYS";
my $id = $hash->{SYS} || die "no SYS";
-
-
- my $path = "marc/$id.$format";
-
- open(my $out, '>:utf8', $path);
- print $out $marc->as_usmarc;
- close($out);
-
- diag "created $path ", -s $path, " bytes";
-
- diag $marc->as_formatted;
+ $self->save_marc( $id, $marc->as_usmarc );
$nr++;
$nr++;
- die if $nr == 3; # FIXME
-
$mech->follow_link( url_regex => qr/set_entry=0*$nr/ );
return $marc->as_usmarc;
$mech->follow_link( url_regex => qr/set_entry=0*$nr/ );
return $marc->as_usmarc;