From 0f26b3031d2ac007f7340a185af3ae733591daab Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Tue, 29 Jun 2010 18:52:19 +0000 Subject: [PATCH] extract next_page out and use it git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@1337 07558da8-63fa-0310-ba24-9fe276d99e06 --- bin/isi-download-results.pl | 43 ++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/bin/isi-download-results.pl b/bin/isi-download-results.pl index 9504185..b768fe0 100755 --- a/bin/isi-download-results.pl +++ b/bin/isi-download-results.pl @@ -217,6 +217,27 @@ sub years { return $years; } + +our $page = 1; +sub next_page { + $page++; + warn "next_page $page\n"; + + $mech->submit_form( + form_name => 'summary_navigation', + fields => { + 'page' => $page, + }, + ); + + save_mech; + + $mech->form_name( 'summary_navigation' ); + my $is_next_page = $mech->value('page') == $page; + warn "no next_page" unless $is_next_page; + return $is_next_page; +} + if ( $results ) { search; years; @@ -241,6 +262,8 @@ if ( $citations ) { } + + if ( $q =~ m{CA=(.+)} && $cited_reference ) { my $CA = $1; @@ -265,17 +288,7 @@ if ( $q =~ m{CA=(.+)} && $cited_reference ) { while (1) { save_mech "/tmp/isi.$q.citedref.$page"; - $page++; - - $mech->submit_form( - form_name => 'summary_navigation', - fields => { - 'page' => $page, - }, - ); - - $mech->form_name( 'summary_navigation' ); - last if $mech->value('page') < $page; + last unless next_page(); if ( $mech->content =~ m/(\d+\s*-\s*(\d+))/ ) { warn "span: $1\n"; @@ -298,6 +311,8 @@ if ( $q =~ m{CA=(.+)} && $citing_articles ) { my $orig_q = $q; my $nr = 0; + do { + foreach my $link ( $mech->find_all_links( url_regex => qr/CitingArticles.do/ ) ) { $nr++; warn "link $nr\n"; @@ -306,11 +321,13 @@ if ( $q =~ m{CA=(.+)} && $citing_articles ) { $q = $orig_q . '.citing_article.' . $nr; get_results; $mech->back; - save_mech; $mech->back; - save_mech; + + #last if $nr > 3; # FIXME only for development } + } while next_page; + $q = $orig_q; } -- 2.20.1