# http://images.isiknowledge.com/WOK46/help/WOS/h_advanced_examples.html
our $q = 'AD=Croatia';
+#$q = 'AU=BRATKO, D';
$q = 'CA=BRATKO, D';
+#$q = 'AD=(croat* OR hrvat*)';
-my $range_size = 500;
+my $range_size = 100;
my $overlap = 3; # between previous and this range
my $results = 0;
my $citations = 0;
+my $cited_reference = 1; # html tables
+my $citing_articles = 1; # as many files as cited articles
my $cites_by_year = 0;
}
warn "# get session";
-$mech->get( 'http://isiknowledge.com/?DestApp=WOS' );
+$mech->get( 'http://www.webofknowledge.com/?DestApp=WOS' );
save_mech;
sub search {
my $to = $from + $range_size;
warn "# submit_form results $from - $to\n";
+ save_mech;
$mech->submit_form(
- form_name => 'summary_output_form',
+ form_name => 'output_form',
fields => {
- record_select_type => 'range',
+ 'value(record_select_type)' => 'range',
+ markFrom => $from,
+ markTo => $to,
+
mark_from => $from,
mark_to => $to,
mark_id => 'WOS',
- qo_fields => 'fullrecord',
- citedref => 'citedref',
+ fields_selection => 'ACCESSION_NUM FUNDING SUBJECT_CATEGORY JCR_CATEGORY LANG IDS PAGEC SABBR CITREFC ISSN PUBINFO KEYWORDS CITTIMES ADDRS CONFERENCE_SPONSORS DOCTYPE ABSTRACT CONFERENCE_INFO SOURCE TITLE AUTHORS',
+ filters => 'ACCESSION_NUM FUNDING SUBJECT_CATEGORY JCR_CATEGORY LANG IDS PAGEC SABBR CITREFC ISSN PUBINFO KEYWORDS CITTIMES ADDRS CONFERENCE_SPONSORS DOCTYPE ABSTRACT CONFERENCE_INFO SOURCE TITLE AUTHORS CITREF',
+ fullrec_fields_option => 'CITREF',
- save_options => 'plain_text',
+ save_options => 'fieldtagged',
+ format => 'saveToFile',
- fields => 'Full',
- format => 'save',
},
- button => 'save',
+ button => 'saveToFile',
);
save_mech;
last;
}
+ if ( $mech->content !~ m{Please wait while your request is processed} ) {
+ warn "WARNING: expecting processing request";
+ }
+
my $path = "/tmp/isi.$q.$from-$to";
$path .= '.' . $desc if $desc;
warn "save $from - $to into $path\n";
- $mech->follow_link( url_regex => qr/save_file/ );
+ $mech->submit_form(
+ form_name => 'etsForm',
+ );
save_mech $path;
$from += $range_size - $overlap;
$mech->back;
$mech->back;
- #save_mech;
+ save_mech;
}
}
return $years;
}
+
+our $page = 1;
+sub next_page {
+ $page++;
+ warn "next_page $page\n";
+
+ $mech->submit_form(
+ form_name => 'summary_navigation',
+ fields => {
+ 'page' => $page,
+ },
+ );
+
+ save_mech;
+
+ $mech->form_name( 'summary_navigation' );
+ my $is_next_page = $mech->value('page') == $page;
+ warn "no next_page" unless $is_next_page;
+ return $is_next_page;
+}
+
if ( $results ) {
search;
years;
}
-if ( $q =~ m{CA=(.+)} ) {
+
+
+if ( $q =~ m{CA=(.+)} && $cited_reference ) {
my $CA = $1;
while (1) {
save_mech "/tmp/isi.$q.citedref.$page";
+ last unless next_page();
$page++;
- $mech->submit_form(
- form_name => 'summary_navigation',
- fields => {
- 'page' => $page,
- },
- );
-
- $mech->form_name( 'summary_navigation' );
- last if $mech->value('page') < $page;
-
if ( $mech->content =~ m/(\d+\s*-\s*(\d+))/ ) {
warn "span: $1\n";
last if $2 == $records;
}
+if ( $q =~ m{CA=(.+)} && $citing_articles ) {
+
+ search;
+
+ my $orig_q = $q;
+ my $nr = 0;
+
+ do {
+
+ foreach my $link ( $mech->find_all_links( url_regex => qr/CitingArticles.do/ ) ) {
+ $nr++;
+ warn "link $nr\n";
+ $mech->get( $link->url );
+ save_mech;
+ $q = $orig_q . '.citing_article.' . $nr;
+ get_results;
+ $mech->back;
+ $mech->back;
+
+ #last if $nr > 3; # FIXME only for development
+ }
+
+ } while next_page;
+
+ $q = $orig_q;
+}
+
warn "OVER\n";