summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
e359c0b)
git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@1291
07558da8-63fa-0310-ba24-
9fe276d99e06
use warnings;
use strict;
use warnings;
use strict;
my $range_size = 500;
my $dump = @ARGV ? 1 : 0;
my $range_size = 500;
my $dump = @ARGV ? 1 : 0;
$q = 'TS=psychology AND AD=Croatia';
use WWW::Mechanize;
$q = 'TS=psychology AND AD=Croatia';
use WWW::Mechanize;
-use Data::Dump qw/dump/;
+use Data::Dump qw(dump);
+use File::Path;
our $mech = WWW::Mechanize->new(
autocheck => 1,
our $mech = WWW::Mechanize->new(
autocheck => 1,
+my $dir = '/tmp/isi/';
+rmtree $dir if -e $dir;
+mkdir $dir;
+
sub save_mech {
my ( $mech, $path ) = @_;
$step++;
sub save_mech {
my ( $mech, $path ) = @_;
$step++;
- mkdir '/tmp/isi/' unless -e '/tmp/isi';
- my $base_path = sprintf('/tmp/isi/%04d', $step);
+ my $base_path = sprintf('%s/%04d', $dir,$step);
$path ||= $base_path . ( $mech->{ct} =~ m{html}i ? '.html' : '.txt' );
$mech->save_content( $path );
warn "# [$step] $path ", -s $path, " ", $mech->ct, "\n";
$path ||= $base_path . ( $mech->{ct} =~ m{html}i ? '.html' : '.txt' );
$mech->save_content( $path );
warn "# [$step] $path ", -s $path, " ", $mech->ct, "\n";
save_mech $mech;
sub search {
save_mech $mech;
sub search {
warn "# advanced serach";
$mech->follow_link( url_regex => qr/AdvancedSearch/ );
save_mech $mech;
warn "# advanced serach";
$mech->follow_link( url_regex => qr/AdvancedSearch/ );
save_mech $mech;
my $from = 1;
while ( 1 ) {
my $from = 1;
while ( 1 ) {
- my $years_url = $mech->find_link( text_regex => qr/more options/ )->url_abs;
+ my $years_url = $mech->find_link( text_regex => qr/more options/ );
+ if ( ! $years_url ) {
+ warn "W: can't find years\n";
+ return;
+ }
+ $years_url = $years_url->url_abs;
- $years_url =~ s{ra_name=\w+}{ra_name=PublicationYear} || die "ra_name";
+ if ( $years_url !~ s{ra_name=\w+}{ra_name=PublicationYear} ) {
+ warn "W: no ra_name\n";
+ return;
+ }
warn "# refine years (hidden by javascript)";
warn "http://apps.isiknowledge.com/RAMore.do?product=WOS&search_mode=TotalCitingArticles&SID=T1o6bChdN9PGP1LN1Nh&qid=3&ra_mode=more&ra_name=PublicationYear&db_id=WOS&viewType=raMore\n$years_url\n";
$mech->get( $years_url );
save_mech $mech;
my $html = $mech->content;
warn "# refine years (hidden by javascript)";
warn "http://apps.isiknowledge.com/RAMore.do?product=WOS&search_mode=TotalCitingArticles&SID=T1o6bChdN9PGP1LN1Nh&qid=3&ra_mode=more&ra_name=PublicationYear&db_id=WOS&viewType=raMore\n$years_url\n";
$mech->get( $years_url );
save_mech $mech;
my $html = $mech->content;
while ( $html =~ s{>(\d\d\d\d)\s\((\d+)\)</label.+?value="PublicationYear_}{} ) {
while ( $html =~ s{>(\d\d\d\d)\s\((\d+)\)</label.+?value="PublicationYear_}{} ) {
- push @years, [ $1 => $2 ];
- warn "# years ",dump @years;
+ warn "# years ",dump $years;
-get_results $q . '.citing';
+$q .= '.citing';
+get_results;