save results range_size by range_size
[webpac2] / bin / isi-download-results.pl
1 #!/usr/bin/perl
2
3 use warnings;
4 use strict;
5
6 my $q = 'TS=psychology AND AD=Croatia';
7
8 use WWW::Mechanize;
9 use Data::Dump qw/dump/;
10
11 my $mech = WWW::Mechanize->new(
12         autocheck => 1,
13         cookie_jar => undef,
14 );
15
16 our $step = 0;
17
18 sub save_mech {
19         my ( $mech, $path ) = @_;
20         $step++;
21         $path ||= sprintf('/tmp/isi.%02d.%s', $step, $mech->{ct} =~ m{html}i ? 'html' : 'txt' );
22         $mech->save_content( $path );
23         warn "# [$step] $path ", -s $path, " ", $mech->ct;
24         $mech->dump_all;
25 }
26
27 warn "# get session";
28 $mech->get( 'http://isiknowledge.com/?DestApp=WOS' );
29 save_mech $mech;
30
31 warn "# advanced serach";
32 $mech->follow_link( url_regex => qr/AdvancedSearch/ );
33 save_mech $mech;
34
35 warn "# cookie_jar ", dump $mech->cookie_jar;
36
37 $mech->submit_form(
38         fields => {
39                 'value(input1)' => $q,
40         }
41 );
42 save_mech $mech;
43
44 warn "# summary";
45 $mech->follow_link( url_regex => qr/summary/ );
46 save_mech $mech;
47
48 my $from = 1;
49 my $range_size = 50;
50
51 while ( $from ) {
52
53         my $to = $from + $range_size;
54
55         $mech->submit_form(
56                 form_name => 'summary_output_form',
57                 fields => {
58                         record_select_type => 'range',
59                         mark_from => $from,
60                         mark_to => $to,
61                         mark_id => 'WOS',
62
63                         qo_fields => 'fullrecord',
64                         citedref => 'citedref',
65
66                         save_options => 'plain_text',
67
68                         fields => 'Full',
69                         format => 'save',
70                 },
71                 button => 'save',
72         );
73         save_mech $mech;
74
75         last if $mech->content =~ m{invalid API call};
76
77         warn "# save_file";
78         $mech->follow_link( url_regex => qr/save_file/ );
79         save_mech $mech => "/tmp/isi.$q.$from-$to.txt";
80
81         $from += $range_size;
82
83         $mech->back;
84         $mech->back;
85         #save_mech $mech;
86
87 }