sub save_mech {
my ( $mech, $path ) = @_;
$step++;
- $path ||= sprintf('/tmp/isi.%02d.%s', $step, $mech->{ct} =~ m{html}i ? 'html' : 'txt' );
+ mkdir '/tmp/isi/' unless -e '/tmp/isi';
+ my $base_path = sprintf('/tmp/isi/%04d', $step);
+ $path ||= $base_path . ( $mech->{ct} =~ m{html}i ? '.html' : '.txt' );
$mech->save_content( $path );
- warn "# [$step] $path ", -s $path, " ", $mech->ct;
- $mech->dump_all if $dump;
+ warn "# [$step] $path ", -s $path, " ", $mech->ct, "\n";
+ open(my $dump, '>', "$base_path.dump.txt");
+ $mech->dump_all($dump);
}
warn "# get session";
$mech->get( 'http://isiknowledge.com/?DestApp=WOS' );
save_mech $mech;
-warn "# advanced serach";
-$mech->follow_link( url_regex => qr/AdvancedSearch/ );
-save_mech $mech;
+sub search {
+ my $q = shift;
-warn "# cookie_jar ", dump $mech->cookie_jar;
+ warn "# advanced serach";
+ $mech->follow_link( url_regex => qr/AdvancedSearch/ );
+ save_mech $mech;
-$mech->submit_form(
- fields => {
- 'value(input1)' => $q,
- }
-);
-save_mech $mech;
+ warn "# cookie_jar ", dump $mech->cookie_jar;
-warn "# summary";
-$mech->follow_link( url_regex => qr/summary/ );
-save_mech $mech;
+ $mech->submit_form(
+ fields => {
+ 'value(input1)' => $q,
+ }
+ );
+ save_mech $mech;
+
+ warn "# summary";
+ $mech->follow_link( url_regex => qr/summary/ );
+ save_mech $mech;
+}
sub get_results {
my $q = shift;
if ( $mech->content =~ m{invalid API call} ) {
$mech->back;
- return;
+ last;
}
- warn "# save_file $from - $to [$q]";
+ warn "range $from - $to [$q]\n";
$mech->follow_link( url_regex => qr/save_file/ );
save_mech $mech => "/tmp/isi.$q.$from-$to.txt";
$mech->back;
#save_mech $mech;
- } # while
+ }
}
-get_results $q;
-save_mech $mech;
-warn "# citations";
-$mech->follow_link( url_regex => qr/search_mode=CitationReport/ );
-save_mech $mech;
+sub citations {
+ save_mech $mech;
+ warn "# citation report";
+ $mech->follow_link( url_regex => qr/search_mode=CitationReport/ );
+ save_mech $mech;
-$mech->follow_link( url_regex => qr/search_mode=TotalCitingArticles/ );
-save_mech $mech;
+ warn "view citing articles";
+ $mech->follow_link( url_regex => qr/search_mode=TotalCitingArticles/ );
+ save_mech $mech;
+}
+
+sub years {
+ my $years_url = $mech->find_link( text_regex => qr/more options/ )->url_abs;
+ warn "## $years_url";
+ $years_url =~ s{ra_name=\w+}{ra_name=PublicationYear} || die "ra_name";
+ warn "# refine years (hidden by javascript)";
+ warn "http://apps.isiknowledge.com/RAMore.do?product=WOS&search_mode=TotalCitingArticles&SID=T1o6bChdN9PGP1LN1Nh&qid=3&ra_mode=more&ra_name=PublicationYear&db_id=WOS&viewType=raMore\n$years_url\n";
+ $mech->get( $years_url );
+ save_mech $mech;
+
+ my $html = $mech->content;
+ my @years;
+ while ( $html =~ s{>(\d\d\d\d)\s\((\d+)\)</label.+?value="PublicationYear_}{} ) {
+ push @years, [ $1 => $2 ];
+ }
+ warn "# years ",dump @years;
+ $mech->back;
+ return @years;
+}
+
+search $q;
+years;
+get_results $q;
+citations;
+years;
get_results $q . '.citing';