From 3cb274d8ed7a89201ff08fcf941b4958670cb4b9 Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Tue, 24 Apr 2012 19:59:28 +0200 Subject: [PATCH] correctly fetch next page of results from vuFind --- t/4-vuFind.t | 4 ++-- t/yaz/vuFind | 2 +- vuFind.pm | 27 ++++++++++++++++++++++++--- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/t/4-vuFind.t b/t/4-vuFind.t index b5d5809..e00e320 100755 --- a/t/4-vuFind.t +++ b/t/4-vuFind.t @@ -3,7 +3,7 @@ use warnings; use strict; -use Test::More tests => 14; +use Test::More tests => 29; my $search = join(' ', @ARGV) || 'croatia'; @@ -14,7 +14,7 @@ ok( my $o = vuFind->new(), 'new' ); ok( my $hits = $o->search( $search ), "search: $search" ); like $hits, qr/^\d+$/, "hits: $hits"; -foreach ( 1 .. 10 ) { +foreach ( 1 .. 25 ) { # > 20 to hit next page ok( my $marc = $o->next_marc, "next_marc $_" ); diag $marc; diff --git a/t/yaz/vuFind b/t/yaz/vuFind index 4aeccfa..60cbfad 100644 --- a/t/yaz/vuFind +++ b/t/yaz/vuFind @@ -1,5 +1,5 @@ open localhost:9999/HATHITRUST find "Krleža" -show 1+3 +show 1+25 quit diff --git a/vuFind.pm b/vuFind.pm index d3e95e5..ffdd987 100644 --- a/vuFind.pm +++ b/vuFind.pm @@ -64,13 +64,20 @@ diag "get $url"; diag "got $hits results"; - foreach my $link ( $self->mech->find_all_links( url_regex => qr{/Record/\d+} ) ) { - push @{ $self->{records} }, $link->url; - } + $self->populate_records; return $self->{hits} = $hits; } +sub populate_records { + my ($self) = @_; + + foreach my $link ( $self->mech->find_all_links( url_regex => qr{/Record/\d+} ) ) { + my $url = $link->url; + push @{ $self->{records} }, $url; + warn "## ++ $url\n"; + } +} sub next_marc { my ($self,$format) = @_; @@ -79,6 +86,18 @@ sub next_marc { my $url = shift @{ $self->{records} }; + if ( ! $url ) { + diag "fetch next page"; + $self->save_content; + $self->mech->follow_link( text_regex => qr/Next/ ); + $self->populate_records; + $url = shift @{ $self->{records} }; + if ( ! $url ) { + warn "ERROR no more results\n"; + return; + } + } + my $id = $1 if $url =~ m{Record/(\d+)}; $self->mech->get( $url . '.mrc' ); @@ -87,6 +106,8 @@ sub next_marc { $self->save_marc( "$id.marc", $marc ); + $self->mech->back; # return to search results for next page + return $id; } -- 2.20.1