From cac2e509edf8973c6a87a598aa897535d62cfbb6 Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Sat, 5 Jun 2010 15:49:59 +0000 Subject: [PATCH] use cited reference search for CA= query git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@1335 07558da8-63fa-0310-ba24-9fe276d99e06 --- bin/isi-download-results.pl | 74 ++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 10 deletions(-) diff --git a/bin/isi-download-results.pl b/bin/isi-download-results.pl index dfab364..598ac2c 100755 --- a/bin/isi-download-results.pl +++ b/bin/isi-download-results.pl @@ -12,23 +12,22 @@ use Text::Unaccent; # http://images.isiknowledge.com/WOK46/help/WOS/h_advanced_examples.html our $q = 'AD=Croatia'; +$q = 'CA=BRATKO, D'; + my $range_size = 500; my $overlap = 3; # between previous and this range + my $results = 0; my $citations = 0; + my $cites_by_year = 0; my $max_cites = 5000; # ISI limit to get cites -if ( 0 ) { - $q = 'TS=psychology AND AD=Croatia'; - $range_size = 50; - $overlap = 0; - $max_cites = 50; -} - $q = unac_string( 'utf-8', join(' ', @ARGV) ) if @ARGV; +warn "QUERY: $q\n"; + our $mech = WWW::Mechanize->new( autocheck => 0, # it dies in reference download with it! cookie_jar => undef, @@ -212,13 +211,16 @@ sub years { return $years; } -search; -years; -get_results 'results' if $results; +if ( $results ) { + search; + years; + get_results; +} if ( $citations ) { citations; + years unless @ranges; do { my $part; @@ -232,3 +234,55 @@ if ( $citations ) { } while ( @ranges ); } + +if ( $q =~ m{CA=(.+)} ) { + + my $CA = $1; + + warn "# citated reference search"; + $mech->follow_link( url_regex => qr/CitedReferenceSearch/ ); + save_mech; + + + $mech->submit_form( + form_name => 'WOS_CitedReferenceSearch_input_form', + fields => { + 'value(input1)' => $CA, + }, + ); + + my $page = 1; + my $records = $1 if $mech->content =~ m/(\d+)\s+records/; + warn "# found $records records\n"; + my $last_span = 'fake'; + + while (1) { + save_mech "/tmp/isi.$q.citedref.$page"; + + $page++; + + $mech->submit_form( + form_name => 'summary_navigation', + fields => { + 'page' => $page, + }, + ); + + $mech->form_name( 'summary_navigation' ); + last if $mech->value('page') < $page; + + if ( $mech->content =~ m/(\d+\s*-\s*(\d+))/ ) { + warn "span: $1\n"; + last if $2 == $records; + last if $1 == $last_span; + $last_span = $1; + } elsif ( $page > 5 ) { + warn "ARTIFICALLY LIMITED TO 5 PAGES WITHOUT VALID SPAN!"; + last; + } + + } + +} + +warn "OVER\n"; -- 2.20.1