use warnings;
use strict;
-my $q = 'AD=Croatia';
+our $q = 'AD=Croatia';
my $range_size = 500;
my $dump = @ARGV ? 1 : 0;
$q = 'TS=psychology AND AD=Croatia';
use WWW::Mechanize;
-use Data::Dump qw/dump/;
+use Data::Dump qw(dump);
+use File::Path;
our $mech = WWW::Mechanize->new(
autocheck => 1,
our $step = 0;
+my $dir = '/tmp/isi/';
+rmtree $dir if -e $dir;
+mkdir $dir;
+
sub save_mech {
my ( $mech, $path ) = @_;
$step++;
- mkdir '/tmp/isi/' unless -e '/tmp/isi';
- my $base_path = sprintf('/tmp/isi/%04d', $step);
+ my $base_path = sprintf('%s/%04d', $dir,$step);
$path ||= $base_path . ( $mech->{ct} =~ m{html}i ? '.html' : '.txt' );
$mech->save_content( $path );
warn "# [$step] $path ", -s $path, " ", $mech->ct, "\n";
save_mech $mech;
sub search {
- my $q = shift;
-
warn "# advanced serach";
$mech->follow_link( url_regex => qr/AdvancedSearch/ );
save_mech $mech;
}
sub get_results {
- my $q = shift;
my $from = 1;
while ( 1 ) {
}
sub years {
- my $years_url = $mech->find_link( text_regex => qr/more options/ )->url_abs;
+ my $years_url = $mech->find_link( text_regex => qr/more options/ );
+ if ( ! $years_url ) {
+ warn "W: can't find years\n";
+ return;
+ }
+ $years_url = $years_url->url_abs;
warn "## $years_url";
- $years_url =~ s{ra_name=\w+}{ra_name=PublicationYear} || die "ra_name";
+ if ( $years_url !~ s{ra_name=\w+}{ra_name=PublicationYear} ) {
+ warn "W: no ra_name\n";
+ return;
+ }
warn "# refine years (hidden by javascript)";
warn "http://apps.isiknowledge.com/RAMore.do?product=WOS&search_mode=TotalCitingArticles&SID=T1o6bChdN9PGP1LN1Nh&qid=3&ra_mode=more&ra_name=PublicationYear&db_id=WOS&viewType=raMore\n$years_url\n";
$mech->get( $years_url );
save_mech $mech;
my $html = $mech->content;
- my @years;
+ my $years;
while ( $html =~ s{>(\d\d\d\d)\s\((\d+)\)</label.+?value="PublicationYear_}{} ) {
- push @years, [ $1 => $2 ];
+ $years->{$1} = $2;
}
- warn "# years ",dump @years;
+ warn "# years ",dump $years;
$mech->back;
- return @years;
+ return $years;
}
-search $q;
+search;
years;
-get_results $q;
+get_results;
+
citations;
years;
-get_results $q . '.citing';
+$q .= '.citing';
+get_results;