bugfix: check Isis database error in correct place
[webpac] / feeds / sciencedirect2.pl
index 9b9f1eb..0c8ba91 100755 (executable)
@@ -14,6 +14,11 @@ use strict;
 
 my $debug=1;
 
+my $file;
+
+# uncomment following line if you want to use file instead of http connection
+#$file="list.html";
+
 # configure ScienceDirect CVS files location
 my $csv_dir="/data/isis_data/sciencedirect";
 my $j_holdings="sd_JournalHoldingsRpt.txt";
@@ -69,8 +74,8 @@ while(<C>) {
        }
 
        foreach my $i (4, 6, 8, 10) {
+               push @{$journal->{$key}},$data[$i] || "";
                if ($data[$i]) {
-                       push @{$journal->{$key}},$data[$i];
                        $c_nr++;
                }
        }
@@ -81,35 +86,46 @@ print STDERR "$c_nr categories assigned, $c_wo_h categories with holdings\n";
 
 $debug++ if (lc($ARGV[0]) eq "-d");
 
-my $ua = new LWP::UserAgent;
-$ua->agent("Mjesec educational harvester -- contact mglavica\@ffzg.hr 0.0");
-$ua->timeout(60);
-#$ua->env_proxy();
-#$ua->proxy(['http', 'ftp'], 'http://proxy.carnet.hr:8001/');
 
-print STDERR "getting '$url'...\n" if ($debug);
-my $req = HTTP::Request->new(GET => $url);
+my $res;
+if (! $file) {
+       my $ua = new LWP::UserAgent;
+       $ua->agent("Mjesec educational harvester -- contact mglavica\@ffzg.hr 0.0");
+       $ua->timeout(60);
+       #$ua->env_proxy();
+       #$ua->proxy(['http', 'ftp'], 'http://proxy.carnet.hr:8001/');
 
-my @out;
+       print STDERR "getting '$url'...\n" if ($debug);
+       my $req = HTTP::Request->new(GET => $url);
 
-my $res = $ua->request($req);
-if ($res->is_success) {
+       $res = $ua->request($req);
+} elsif (! -e $file) {
+       die "can't find feed file '$file'";
+}
+
+if ($file || $res->is_success) {
        print STDERR "parsing html...\n" if ($debug);
        my $tree = HTML::TreeBuilder->new;
-#      $tree->parse_file("list.html");   # !
-       $tree->parse($res->content);
+       if ($file) {
+               $tree->parse_file("list.html");
+       } else {
+               $tree->parse($res->content);
+       }
 
        foreach my $tr ($tree->look_down('_tag', 'tr')) {
                my $link;
-               if ($link = $tr->look_down('_tag','a')) {
+               foreach my $link ($tr->look_down('_tag','a')) {
                        if ($link->attr('href') =~ m{/science\?_ob=JournalURL}) {
                                my $j=nuc($link->as_text);
                                if ($journal->{$j}) {
-                                       print join("\n",@{$journal->{$j}});
+                                       my $i=0;
+                                       foreach my $line (@{$journal->{$j}}) {
+                                               print $i++,": $line\n";
+                                       }
                                        $j_detailed++;
                                } else {
-                                       print $link->attr('href')."\n";
-                                       print $link->as_text."\n";
+                                       print "0: ",$link->as_text."\n";
+                                       print "7: http://www.sciencedirect.com",$link->attr('href')."\n";
                                        $j_basic++;
                                        print STDERR "can't find details for $j\n" if ($debug);
                                }