# http://images.isiknowledge.com/WOK46/help/WOS/h_advanced_examples.html
our $q = 'AD=Croatia';
+$q = 'CA=BRATKO, D';
+
my $range_size = 500;
my $overlap = 3; # between previous and this range
-my $skip_results = 0;
+
+my $results = 0;
+my $citations = 0;
+my $cited_reference = 0; # html tables
+my $citing_articles = 1; # as many files as cited articles
+
my $cites_by_year = 0;
my $max_cites = 5000; # ISI limit to get cites
-if ( 0 ) {
- $q = 'TS=psychology AND AD=Croatia';
- $range_size = 50;
- $overlap = 0;
- $max_cites = 50;
-}
-
$q = unac_string( 'utf-8', join(' ', @ARGV) ) if @ARGV;
+warn "QUERY: $q\n";
+
our $mech = WWW::Mechanize->new(
- autocheck => 1,
+ autocheck => 0, # it dies in reference download with it!
cookie_jar => undef,
);
last;
}
+ if ( $mech->content !~ m{Please wait while your request is processed} ) {
+ warn "WARNING: expecting processing request";
+ }
+
my $path = "/tmp/isi.$q.$from-$to";
$path .= '.' . $desc if $desc;
return $years;
}
-search;
-years;
-get_results unless $skip_results;
+our $page = 1;
+sub next_page {
+ $page++;
+ warn "next_page $page\n";
-citations;
+ $mech->submit_form(
+ form_name => 'summary_navigation',
+ fields => {
+ 'page' => $page,
+ },
+ );
+
+ save_mech;
+
+ $mech->form_name( 'summary_navigation' );
+ my $is_next_page = $mech->value('page') == $page;
+ warn "no next_page" unless $is_next_page;
+ return $is_next_page;
+}
+
+if ( $results ) {
+ search;
+ years;
+ get_results;
+}
+
+if ( $citations ) {
+
+ citations;
+ years unless @ranges;
+
+ do {
+ my $part;
+ if ( @ranges ) {
+ $part .= $ranges[0]->[0] . '.';
+ search;
+ citations;
+ }
+ $part .= 'citing';
+ get_results $part;
+ } while ( @ranges );
+
+}
+
+
+
+if ( $q =~ m{CA=(.+)} && $cited_reference ) {
+
+ my $CA = $1;
+
+ warn "# citated reference search";
+ $mech->follow_link( url_regex => qr/CitedReferenceSearch/ );
+ save_mech;
+
+
+ $mech->submit_form(
+ form_name => 'WOS_CitedReferenceSearch_input_form',
+ fields => {
+ 'value(input1)' => $CA,
+ },
+ );
+
+ my $page = 1;
+ my $records = $1 if $mech->content =~ m/(\d+)\s+records/;
+ warn "# found $records records\n";
+ my $last_span = 'fake';
+
+ while (1) {
+ save_mech "/tmp/isi.$q.citedref.$page";
+
+ last unless next_page();
+
+ if ( $mech->content =~ m/(\d+\s*-\s*(\d+))/ ) {
+ warn "span: $1\n";
+ last if $2 == $records;
+ last if $1 == $last_span;
+ $last_span = $1;
+ } elsif ( $page > 5 ) {
+ warn "ARTIFICALLY LIMITED TO 5 PAGES WITHOUT VALID SPAN!";
+ last;
+ }
-do {
- my $part;
- if ( @ranges ) {
- $part .= $ranges[0]->[0] . '.';
- search;
- citations;
}
- $part .= 'citing';
- get_results $part;
-} while ( @ranges );
+}
+
+if ( $q =~ m{CA=(.+)} && $citing_articles ) {
+
+ search;
+
+ my $orig_q = $q;
+ my $nr = 0;
+
+ do {
+
+ foreach my $link ( $mech->find_all_links( url_regex => qr/CitingArticles.do/ ) ) {
+ $nr++;
+ warn "link $nr\n";
+ $mech->get( $link->url );
+ save_mech;
+ $q = $orig_q . '.citing_article.' . $nr;
+ get_results;
+ $mech->back;
+ $mech->back;
+
+ #last if $nr > 3; # FIXME only for development
+ }
+
+ } while next_page;
+
+ $q = $orig_q;
+}
+
+warn "OVER\n";