# http://images.isiknowledge.com/WOK46/help/WOS/h_advanced_examples.html
our $q = 'AD=Croatia';
-$q = 'CA=BRATKO, D';
+$q = 'AU=BRATKO, D';
+#$q = 'AD=(croat* OR hrvat*)';
-my $range_size = 500;
+my $range_size = 100;
my $overlap = 3; # between previous and this range
-my $results = 0;
+my $results = 1;
my $citations = 0;
my $cited_reference = 0; # html tables
-my $citing_articles = 1; # as many files as cited articles
+my $citing_articles = 0; # as many files as cited articles
my $cites_by_year = 0;
}
warn "# get session";
-$mech->get( 'http://isiknowledge.com/?DestApp=WOS' );
+$mech->get( 'http://www.webofknowledge.com/?DestApp=WOS' );
save_mech;
sub search {
my $to = $from + $range_size;
warn "# submit_form results $from - $to\n";
+ save_mech;
$mech->submit_form(
- form_name => 'summary_output_form',
+ form_name => 'output_form',
fields => {
- record_select_type => 'range',
+ 'value(record_select_type)' => 'range',
+ markFrom => $from,
+ markTo => $to,
+
mark_from => $from,
mark_to => $to,
mark_id => 'WOS',
- qo_fields => 'fullrecord',
- citedref => 'citedref',
+ fields_selection => 'ACCESSION_NUM FUNDING SUBJECT_CATEGORY JCR_CATEGORY LANG IDS PAGEC SABBR CITREFC ISSN PUBINFO KEYWORDS CITTIMES ADDRS CONFERENCE_SPONSORS DOCTYPE ABSTRACT CONFERENCE_INFO SOURCE TITLE AUTHORS',
+ filters => 'ACCESSION_NUM FUNDING SUBJECT_CATEGORY JCR_CATEGORY LANG IDS PAGEC SABBR CITREFC ISSN PUBINFO KEYWORDS CITTIMES ADDRS CONFERENCE_SPONSORS DOCTYPE ABSTRACT CONFERENCE_INFO SOURCE TITLE AUTHORS CITREF',
+ fullrec_fields_option => 'CITREF',
- save_options => 'plain_text',
+ save_options => 'fieldtagged',
+ format => 'saveToFile',
- fields => 'Full',
- format => 'save',
},
- button => 'save',
+ button => 'saveToFile',
);
save_mech;
last;
}
- if ( $mech->content =~ m{Please wait while your request is processed} ) {
- warn "WARNING: processing request";
+ if ( $mech->content !~ m{Please wait while your request is processed} ) {
+ warn "WARNING: expecting processing request";
}
$path .= '.' . $desc if $desc;
warn "save $from - $to into $path\n";
- $mech->follow_link( url_regex => qr/save_file/ );
+ $mech->submit_form(
+ form_name => 'etsForm',
+ );
save_mech $path;
$from += $range_size - $overlap;
$mech->back;
$mech->back;
- #save_mech;
+ save_mech;
}
}
return $years;
}
+
+our $page = 1;
+sub next_page {
+ $page++;
+ warn "next_page $page\n";
+
+ $mech->submit_form(
+ form_name => 'summary_navigation',
+ fields => {
+ 'page' => $page,
+ },
+ );
+
+ save_mech;
+
+ $mech->form_name( 'summary_navigation' );
+ my $is_next_page = $mech->value('page') == $page;
+ warn "no next_page" unless $is_next_page;
+ return $is_next_page;
+}
+
if ( $results ) {
search;
years;
}
+
+
if ( $q =~ m{CA=(.+)} && $cited_reference ) {
my $CA = $1;
while (1) {
save_mech "/tmp/isi.$q.citedref.$page";
- $page++;
-
- $mech->submit_form(
- form_name => 'summary_navigation',
- fields => {
- 'page' => $page,
- },
- );
-
- $mech->form_name( 'summary_navigation' );
- last if $mech->value('page') < $page;
+ last unless next_page();
if ( $mech->content =~ m/(\d+\s*-\s*(\d+))/ ) {
warn "span: $1\n";
my $orig_q = $q;
my $nr = 0;
+ do {
+
foreach my $link ( $mech->find_all_links( url_regex => qr/CitingArticles.do/ ) ) {
$nr++;
warn "link $nr\n";
$q = $orig_q . '.citing_article.' . $nr;
get_results;
$mech->back;
- save_mech;
$mech->back;
- save_mech;
+
+ #last if $nr > 3; # FIXME only for development
}
+ } while next_page;
+
$q = $orig_q;
}