From: Dobrica Pavlinusic Date: Tue, 29 Jun 2010 18:32:27 +0000 (+0000) Subject: download first page of citing articles X-Git-Url: http://git.rot13.org/?p=webpac2;a=commitdiff_plain;h=8f8575598b93c1d730e66b81379ae170b25cb1d4 download first page of citing articles git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@1336 07558da8-63fa-0310-ba24-9fe276d99e06 --- diff --git a/bin/isi-download-results.pl b/bin/isi-download-results.pl index 598ac2c..9504185 100755 --- a/bin/isi-download-results.pl +++ b/bin/isi-download-results.pl @@ -19,6 +19,8 @@ my $overlap = 3; # between previous and this range my $results = 0; my $citations = 0; +my $cited_reference = 0; # html tables +my $citing_articles = 1; # as many files as cited articles my $cites_by_year = 0; @@ -118,6 +120,10 @@ sub get_results { last; } + if ( $mech->content =~ m{Please wait while your request is processed} ) { + warn "WARNING: processing request"; + } + my $path = "/tmp/isi.$q.$from-$to"; $path .= '.' . $desc if $desc; @@ -235,7 +241,7 @@ if ( $citations ) { } -if ( $q =~ m{CA=(.+)} ) { +if ( $q =~ m{CA=(.+)} && $cited_reference ) { my $CA = $1; @@ -285,4 +291,27 @@ if ( $q =~ m{CA=(.+)} ) { } +if ( $q =~ m{CA=(.+)} && $citing_articles ) { + + search; + + my $orig_q = $q; + my $nr = 0; + + foreach my $link ( $mech->find_all_links( url_regex => qr/CitingArticles.do/ ) ) { + $nr++; + warn "link $nr\n"; + $mech->get( $link->url ); + save_mech; + $q = $orig_q . '.citing_article.' . $nr; + get_results; + $mech->back; + save_mech; + $mech->back; + save_mech; + } + + $q = $orig_q; +} + warn "OVER\n";