my $results = 0;
my $citations = 0;
+my $cited_reference = 0; # html tables
+my $citing_articles = 1; # as many files as cited articles
my $cites_by_year = 0;
last;
}
+ if ( $mech->content !~ m{Please wait while your request is processed} ) {
+ warn "WARNING: expecting processing request";
+ }
+
my $path = "/tmp/isi.$q.$from-$to";
$path .= '.' . $desc if $desc;
return $years;
}
+
+our $page = 1;
+sub next_page {
+ $page++;
+ warn "next_page $page\n";
+
+ $mech->submit_form(
+ form_name => 'summary_navigation',
+ fields => {
+ 'page' => $page,
+ },
+ );
+
+ save_mech;
+
+ $mech->form_name( 'summary_navigation' );
+ my $is_next_page = $mech->value('page') == $page;
+ warn "no next_page" unless $is_next_page;
+ return $is_next_page;
+}
+
if ( $results ) {
search;
years;
}
-if ( $q =~ m{CA=(.+)} ) {
+
+
+if ( $q =~ m{CA=(.+)} && $cited_reference ) {
my $CA = $1;
while (1) {
save_mech "/tmp/isi.$q.citedref.$page";
- $page++;
-
- $mech->submit_form(
- form_name => 'summary_navigation',
- fields => {
- 'page' => $page,
- },
- );
-
- $mech->form_name( 'summary_navigation' );
- last if $mech->value('page') < $page;
+ last unless next_page();
if ( $mech->content =~ m/(\d+\s*-\s*(\d+))/ ) {
warn "span: $1\n";
}
+if ( $q =~ m{CA=(.+)} && $citing_articles ) {
+
+ search;
+
+ my $orig_q = $q;
+ my $nr = 0;
+
+ do {
+
+ foreach my $link ( $mech->find_all_links( url_regex => qr/CitingArticles.do/ ) ) {
+ $nr++;
+ warn "link $nr\n";
+ $mech->get( $link->url );
+ save_mech;
+ $q = $orig_q . '.citing_article.' . $nr;
+ get_results;
+ $mech->back;
+ $mech->back;
+
+ #last if $nr > 3; # FIXME only for development
+ }
+
+ } while next_page;
+
+ $q = $orig_q;
+}
+
warn "OVER\n";