my $debug=1;
+my $file;
+
+# uncomment following line if you want to use file instead of http connection
+#$file="list.html";
+
# configure ScienceDirect CVS files location
my $csv_dir="/data/isis_data/sciencedirect";
my $j_holdings="sd_JournalHoldingsRpt.txt";
}
foreach my $i (4, 6, 8, 10) {
+ push @{$journal->{$key}},$data[$i] || "";
if ($data[$i]) {
- push @{$journal->{$key}},$data[$i];
$c_nr++;
}
}
$debug++ if (lc($ARGV[0]) eq "-d");
-my $ua = new LWP::UserAgent;
-$ua->agent("Mjesec educational harvester -- contact mglavica\@ffzg.hr 0.0");
-$ua->timeout(60);
-#$ua->env_proxy();
-#$ua->proxy(['http', 'ftp'], 'http://proxy.carnet.hr:8001/');
-print STDERR "getting '$url'...\n" if ($debug);
-my $req = HTTP::Request->new(GET => $url);
+my $res;
+if (! $file) {
+ my $ua = new LWP::UserAgent;
+ $ua->agent("Mjesec educational harvester -- contact mglavica\@ffzg.hr 0.0");
+ $ua->timeout(60);
+ #$ua->env_proxy();
+ #$ua->proxy(['http', 'ftp'], 'http://proxy.carnet.hr:8001/');
-my @out;
+ print STDERR "getting '$url'...\n" if ($debug);
+ my $req = HTTP::Request->new(GET => $url);
-my $res = $ua->request($req);
-if ($res->is_success) {
+ $res = $ua->request($req);
+} elsif (! -e $file) {
+ die "can't find feed file '$file'";
+}
+
+if ($file || $res->is_success) {
print STDERR "parsing html...\n" if ($debug);
my $tree = HTML::TreeBuilder->new;
-# $tree->parse_file("list.html"); # !
- $tree->parse($res->content);
+ if ($file) {
+ $tree->parse_file("list.html");
+ } else {
+ $tree->parse($res->content);
+ }
foreach my $tr ($tree->look_down('_tag', 'tr')) {
my $link;
- if ($link = $tr->look_down('_tag','a')) {
+ foreach my $link ($tr->look_down('_tag','a')) {
if ($link->attr('href') =~ m{/science\?_ob=JournalURL}) {
my $j=nuc($link->as_text);
if ($journal->{$j}) {
- print join("\n",@{$journal->{$j}});
+ my $i=0;
+ foreach my $line (@{$journal->{$j}}) {
+ print $i++,": $line\n";
+ }
$j_detailed++;
} else {
- print $link->attr('href')."\n";
- print $link->as_text."\n";
+ print "0: ",$link->as_text."\n";
+ print "7: http://www.sciencedirect.com",$link->attr('href')."\n";
$j_basic++;
print STDERR "can't find details for $j\n" if ($debug);
}